|
@ -20,17 +20,19 @@ import argparse |
|
|
import numpy as np |
|
|
import numpy as np |
|
|
import os |
|
|
import os |
|
|
import spacy |
|
|
import spacy |
|
|
|
|
|
import sys |
|
|
|
|
|
from textwrap import fill |
|
|
|
|
|
|
|
|
parser = argparse.ArgumentParser(description='Generate a novel using Markov chains.') |
|
|
parser = argparse.ArgumentParser(description='Generate a novel using Markov chains.') |
|
|
parser.add_argument('input', nargs='+', help='used to construct Markov transition matrix') |
|
|
parser.add_argument('input', nargs='+', help='used to construct Markov transition matrix') |
|
|
parser.add_argument('-c','--count', type=int, help='generate at least COUNT words') |
|
|
|
|
|
|
|
|
parser.add_argument('-w','--words', type=int, help='generate at least WORDS words') |
|
|
parser.add_argument('-s', '--seed', type=int, help='seed for random number generator') |
|
|
parser.add_argument('-s', '--seed', type=int, help='seed for random number generator') |
|
|
args = parser.parse_args() |
|
|
args = parser.parse_args() |
|
|
|
|
|
|
|
|
nlp = spacy.load('en_core_web_sm') |
|
|
nlp = spacy.load('en_core_web_sm') |
|
|
|
|
|
|
|
|
rng = np.random.default_rng(args.seed or 12345) |
|
|
rng = np.random.default_rng(args.seed or 12345) |
|
|
word_cnt = args.count or 100 |
|
|
|
|
|
|
|
|
word_cnt = args.words or 100 |
|
|
|
|
|
|
|
|
words = {} |
|
|
words = {} |
|
|
edges = [] |
|
|
edges = [] |
|
@ -109,12 +111,12 @@ for key in transitions.keys(): |
|
|
chain[key] = { 'choices': choices, 'probs': probs} |
|
|
chain[key] = { 'choices': choices, 'probs': probs} |
|
|
|
|
|
|
|
|
sents = [] |
|
|
sents = [] |
|
|
|
|
|
paragraphs = [] |
|
|
paragraph_sent_cnt = rng.integers(5, 10) |
|
|
paragraph_sent_cnt = rng.integers(5, 10) |
|
|
while word_cnt > 0: |
|
|
while word_cnt > 0: |
|
|
choice = 'START' |
|
|
choice = 'START' |
|
|
|
|
|
|
|
|
choices = [] |
|
|
choices = [] |
|
|
sent_word_cnt = 0 |
|
|
|
|
|
while True: |
|
|
while True: |
|
|
next_choice = rng.choice(chain[choice]['choices'], p=chain[choice]['probs']) |
|
|
next_choice = rng.choice(chain[choice]['choices'], p=chain[choice]['probs']) |
|
|
|
|
|
|
|
@ -126,14 +128,13 @@ while word_cnt > 0: |
|
|
.replace(" '", "'") |
|
|
.replace(" '", "'") |
|
|
.replace(" ’", "’") |
|
|
.replace(" ’", "’") |
|
|
.replace(" `", "`") |
|
|
.replace(" `", "`") |
|
|
+ '.' |
|
|
|
|
|
|
|
|
+ '. ' |
|
|
) |
|
|
) |
|
|
|
|
|
|
|
|
word_cnt -= sent_word_cnt |
|
|
|
|
|
paragraph_sent_cnt -= 1 |
|
|
paragraph_sent_cnt -= 1 |
|
|
if paragraph_sent_cnt < 0: |
|
|
if paragraph_sent_cnt < 0: |
|
|
sents.append(os.linesep) |
|
|
|
|
|
sents.append(os.linesep) |
|
|
|
|
|
|
|
|
paragraphs.append(fill(''.join(sents), replace_whitespace=False, drop_whitespace=False)) |
|
|
|
|
|
sents = [] |
|
|
paragraph_sent_cnt = rng.integers(5, 10) |
|
|
paragraph_sent_cnt = rng.integers(5, 10) |
|
|
break |
|
|
break |
|
|
|
|
|
|
|
@ -148,8 +149,9 @@ while word_cnt > 0: |
|
|
word = str.lower(word) |
|
|
word = str.lower(word) |
|
|
|
|
|
|
|
|
choices.append(word) |
|
|
choices.append(word) |
|
|
sent_word_cnt += 1 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
word_cnt -= 1 |
|
|
|
|
|
|
|
|
choice = next_choice |
|
|
choice = next_choice |
|
|
|
|
|
|
|
|
print(' '.join(sents)) |
|
|
|
|
|
|
|
|
print(f'{os.linesep}{os.linesep}'.join(paragraphs)) |