Browse Source

Add syntactic dependency and word position to state

master
Blink The Things 4 years ago
parent
commit
d258640ab3
1 changed files with 20 additions and 3 deletions
  1. +20
    -3
      markov.py

+ 20
- 3
markov.py View File

@ -41,18 +41,30 @@ words_doc = nlp(words_text)
words = {} words = {}
for sent in words_doc.sents: for sent in words_doc.sents:
cnt = 0
for token in sent: for token in sent:
if token.pos_ in ('SPACE', 'PUNCT', 'X'): if token.pos_ in ('SPACE', 'PUNCT', 'X'):
continue continue
state = token.tag_
cnt += 1
word = token.text word = token.text
state = f'{token.tag_},{token.dep_}'
if state in words: if state in words:
words[state].append(word) words[state].append(word)
else: else:
words[state] = [word] words[state] = [word]
state = f'{token.tag_},{token.dep_},{str(cnt)}'
if state in words:
words[state].append(word)
else:
words[state] = [word]
pos_text = '' pos_text = ''
with open(args.pos_file, mode='r') as f: with open(args.pos_file, mode='r') as f:
pos_text = f.read() pos_text = f.read()
@ -63,11 +75,13 @@ edges = []
for sent in pos_doc.sents: for sent in pos_doc.sents:
curr_state = 'START' curr_state = 'START'
cnt = 0
for token in sent: for token in sent:
if token.pos_ in ('SPACE', 'PUNCT', 'X'): if token.pos_ in ('SPACE', 'PUNCT', 'X'):
continue continue
next_state = token.tag_
cnt += 1
next_state = f'{token.tag_},{token.dep_},{str(cnt)}'
edges.append((curr_state, next_state)) edges.append((curr_state, next_state))
@ -110,7 +124,10 @@ for _ in range(10):
sents.append(' '.join(choices)) sents.append(' '.join(choices))
break break
word = rng.choice(words[next_choice])
try:
word = rng.choice(words[next_choice])
except KeyError:
word = rng.choice(words[','.join(next_choice.split(',')[:-1])])
choices.append(word) choices.append(word)


Loading…
Cancel
Save