Browse Source

Change the way input files are used

master
Blink The Things 4 years ago
parent
commit
dc1cf4bdaa
1 changed files with 36 additions and 44 deletions
  1. +36
    -44
      markov.py

+ 36
- 44
markov.py View File

@ -22,72 +22,64 @@ import os
import spacy import spacy
parser = argparse.ArgumentParser(description='Generate a novel using Markov chains.') parser = argparse.ArgumentParser(description='Generate a novel using Markov chains.')
parser.add_argument('word_file', help='file used for word selection')
parser.add_argument('pos_file', help='file used to build part-of-speech Markov chain')
parser.add_argument('input', nargs='+', help='used to construct Markov transition matrix')
parser.add_argument('-s', '--seed', type=int, help='seed for random number generator') parser.add_argument('-s', '--seed', type=int, help='seed for random number generator')
args = parser.parse_args() args = parser.parse_args()
nlp = spacy.load('en_core_web_sm') nlp = spacy.load('en_core_web_sm')
seed = args.seed or 12345
rng = np.random.default_rng(seed)
words_text = ''
with open(args.word_file, mode='r') as f:
words_text = f.read()
words_doc = nlp(words_text)
rng = np.random.default_rng(args.seed or 12345)
words = {} words = {}
for sent in words_doc.sents:
cnt = 0
for token in sent:
if token.pos_ in ('SPACE', 'PUNCT', 'X'):
continue
edges = []
cnt += 1
input_text = ''
for infile in args.input:
with open(infile, mode='r') as f:
input_text = f.read()
word = token.text
doc = nlp(input_text)
state = f'{token.tag_},{token.dep_}'
for sent in doc.sents:
cnt = 0
for token in sent:
if token.pos_ in ('SPACE', 'PUNCT', 'X'):
continue
if state in words:
words[state].append(word)
else:
words[state] = [word]
cnt += 1
state = f'{token.tag_},{token.dep_},{str(cnt)}'
word = token.text
if state in words:
words[state].append(word)
else:
words[state] = [word]
state = f'{token.tag_},{token.dep_}'
if state in words:
words[state].append(word)
else:
words[state] = [word]
pos_text = ''
with open(args.pos_file, mode='r') as f:
pos_text = f.read()
state = f'{token.tag_},{token.dep_},{str(cnt)}'
pos_doc = nlp(pos_text)
if state in words:
words[state].append(word)
else:
words[state] = [word]
edges = []
for sent in pos_doc.sents:
curr_state = 'START'
for sent in doc.sents:
curr_state = 'START'
cnt = 0
for token in sent:
if token.pos_ in ('SPACE', 'PUNCT', 'X'):
continue
cnt = 0
for token in sent:
if token.pos_ in ('SPACE', 'PUNCT', 'X'):
continue
cnt += 1
next_state = f'{token.tag_},{token.dep_},{str(cnt)}'
cnt += 1
next_state = f'{token.tag_},{token.dep_},{str(cnt)}'
edges.append((curr_state, next_state))
edges.append((curr_state, next_state))
curr_state = next_state
curr_state = next_state
edges.append((curr_state, 'STOP'))
edges.append((curr_state, 'STOP'))
transitions = {} transitions = {}
for edge in edges: for edge in edges:


Loading…
Cancel
Save