Add syntactic dependency and word position to state

4 years ago · d258640ab3
--- a/markov.py
+++ b/markov.py
@ -41,18 +41,30 @@ words_doc = nlp(words_text)

 words = {}
 for sent in words_doc.sents:
    cnt = 0
    for token in sent:
        if token.pos_ in ('SPACE', 'PUNCT', 'X'):
            continue

        state = token.tag_
        cnt += 1

        word = token.text

        state = f'{token.tag_},{token.dep_}'

        if state in words:
            words[state].append(word)
        else:
            words[state] = [word]

        state = f'{token.tag_},{token.dep_},{str(cnt)}'

        if state in words:
            words[state].append(word)
        else:
            words[state] = [word]


 pos_text = ''
 with open(args.pos_file, mode='r') as f:
    pos_text = f.read()
@ -63,11 +75,13 @@ edges = []
 for sent in pos_doc.sents:
    curr_state = 'START'

    cnt = 0
    for token in sent:
        if token.pos_ in ('SPACE', 'PUNCT', 'X'):
            continue

        next_state = token.tag_
        cnt += 1
        next_state = f'{token.tag_},{token.dep_},{str(cnt)}'

        edges.append((curr_state, next_state))

@ -110,7 +124,10 @@ for _ in range(10):
            sents.append(' '.join(choices))
            break

        word = rng.choice(words[next_choice])
        try:
            word = rng.choice(words[next_choice])
        except KeyError:
            word = rng.choice(words[','.join(next_choice.split(',')[:-1])])

        choices.append(word)