|
|
@ -41,18 +41,30 @@ words_doc = nlp(words_text) |
|
|
|
|
|
|
|
words = {} |
|
|
|
for sent in words_doc.sents: |
|
|
|
cnt = 0 |
|
|
|
for token in sent: |
|
|
|
if token.pos_ in ('SPACE', 'PUNCT', 'X'): |
|
|
|
continue |
|
|
|
|
|
|
|
state = token.tag_ |
|
|
|
cnt += 1 |
|
|
|
|
|
|
|
word = token.text |
|
|
|
|
|
|
|
state = f'{token.tag_},{token.dep_}' |
|
|
|
|
|
|
|
if state in words: |
|
|
|
words[state].append(word) |
|
|
|
else: |
|
|
|
words[state] = [word] |
|
|
|
|
|
|
|
state = f'{token.tag_},{token.dep_},{str(cnt)}' |
|
|
|
|
|
|
|
if state in words: |
|
|
|
words[state].append(word) |
|
|
|
else: |
|
|
|
words[state] = [word] |
|
|
|
|
|
|
|
|
|
|
|
pos_text = '' |
|
|
|
with open(args.pos_file, mode='r') as f: |
|
|
|
pos_text = f.read() |
|
|
@ -63,11 +75,13 @@ edges = [] |
|
|
|
for sent in pos_doc.sents: |
|
|
|
curr_state = 'START' |
|
|
|
|
|
|
|
cnt = 0 |
|
|
|
for token in sent: |
|
|
|
if token.pos_ in ('SPACE', 'PUNCT', 'X'): |
|
|
|
continue |
|
|
|
|
|
|
|
next_state = token.tag_ |
|
|
|
cnt += 1 |
|
|
|
next_state = f'{token.tag_},{token.dep_},{str(cnt)}' |
|
|
|
|
|
|
|
edges.append((curr_state, next_state)) |
|
|
|
|
|
|
@ -110,7 +124,10 @@ for _ in range(10): |
|
|
|
sents.append(' '.join(choices)) |
|
|
|
break |
|
|
|
|
|
|
|
word = rng.choice(words[next_choice]) |
|
|
|
try: |
|
|
|
word = rng.choice(words[next_choice]) |
|
|
|
except KeyError: |
|
|
|
word = rng.choice(words[','.join(next_choice.split(',')[:-1])]) |
|
|
|
|
|
|
|
choices.append(word) |
|
|
|
|
|
|
|