diff --git a/markov.py b/markov.py index ddb69c7..99a3f67 100644 --- a/markov.py +++ b/markov.py @@ -38,7 +38,12 @@ for infile in args.input: with open(infile, mode='r') as f: input_text = f.read() - doc = nlp(input_text) + i = 1000000 + if len(input_text) > i: + while input_text[i] != ' ': + i -= 1 + + doc = nlp(input_text[:i]) for sent in doc.sents: cnt = 0 @@ -101,7 +106,7 @@ for key in transitions.keys(): probs.append(transitions[key]['to'][choice] / cnt) chain[key] = { 'choices': choices, 'probs': probs} -sents = [] +sents = [] for _ in range(10): choice = 'START'