From 84e8dd667142cca9587afa6d3fac49306e1bbd91 Mon Sep 17 00:00:00 2001 From: Blink The Things Date: Fri, 6 Nov 2020 13:29:20 -0500 Subject: [PATCH] Truncate large text files --- markov.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/markov.py b/markov.py index ddb69c7..99a3f67 100644 --- a/markov.py +++ b/markov.py @@ -38,7 +38,12 @@ for infile in args.input: with open(infile, mode='r') as f: input_text = f.read() - doc = nlp(input_text) + i = 1000000 + if len(input_text) > i: + while input_text[i] != ' ': + i -= 1 + + doc = nlp(input_text[:i]) for sent in doc.sents: cnt = 0 @@ -101,7 +106,7 @@ for key in transitions.keys(): probs.append(transitions[key]['to'][choice] / cnt) chain[key] = { 'choices': choices, 'probs': probs} -sents = [] +sents = [] for _ in range(10): choice = 'START'