Browse Source

Wrap output text

master
Blink The Things 4 years ago
parent
commit
6730c37aec
3 changed files with 4304 additions and 693 deletions
  1. +1
    -1
      example/build.sh
  2. +4292
    -683
      example/novel.txt
  3. +11
    -9
      markov.py

+ 1
- 1
example/build.sh View File

@ -1 +1 @@
python ../markov.py -s 1000 -c 50000 ./inputs/*.txt > novel.txt
python ../markov.py -s 1000 -w 50000 ./inputs/*.txt > novel.txt

+ 4292
- 683
example/novel.txt
File diff suppressed because it is too large
View File


+ 11
- 9
markov.py View File

@ -20,17 +20,19 @@ import argparse
import numpy as np import numpy as np
import os import os
import spacy import spacy
import sys
from textwrap import fill
parser = argparse.ArgumentParser(description='Generate a novel using Markov chains.') parser = argparse.ArgumentParser(description='Generate a novel using Markov chains.')
parser.add_argument('input', nargs='+', help='used to construct Markov transition matrix') parser.add_argument('input', nargs='+', help='used to construct Markov transition matrix')
parser.add_argument('-c','--count', type=int, help='generate at least COUNT words')
parser.add_argument('-w','--words', type=int, help='generate at least WORDS words')
parser.add_argument('-s', '--seed', type=int, help='seed for random number generator') parser.add_argument('-s', '--seed', type=int, help='seed for random number generator')
args = parser.parse_args() args = parser.parse_args()
nlp = spacy.load('en_core_web_sm') nlp = spacy.load('en_core_web_sm')
rng = np.random.default_rng(args.seed or 12345) rng = np.random.default_rng(args.seed or 12345)
word_cnt = args.count or 100
word_cnt = args.words or 100
words = {} words = {}
edges = [] edges = []
@ -109,12 +111,12 @@ for key in transitions.keys():
chain[key] = { 'choices': choices, 'probs': probs} chain[key] = { 'choices': choices, 'probs': probs}
sents = [] sents = []
paragraphs = []
paragraph_sent_cnt = rng.integers(5, 10) paragraph_sent_cnt = rng.integers(5, 10)
while word_cnt > 0: while word_cnt > 0:
choice = 'START' choice = 'START'
choices = [] choices = []
sent_word_cnt = 0
while True: while True:
next_choice = rng.choice(chain[choice]['choices'], p=chain[choice]['probs']) next_choice = rng.choice(chain[choice]['choices'], p=chain[choice]['probs'])
@ -126,14 +128,13 @@ while word_cnt > 0:
.replace(" '", "'") .replace(" '", "'")
.replace("", "") .replace("", "")
.replace(" `", "`") .replace(" `", "`")
+ '.'
+ '. '
) )
word_cnt -= sent_word_cnt
paragraph_sent_cnt -= 1 paragraph_sent_cnt -= 1
if paragraph_sent_cnt < 0: if paragraph_sent_cnt < 0:
sents.append(os.linesep)
sents.append(os.linesep)
paragraphs.append(fill(''.join(sents), replace_whitespace=False, drop_whitespace=False))
sents = []
paragraph_sent_cnt = rng.integers(5, 10) paragraph_sent_cnt = rng.integers(5, 10)
break break
@ -148,8 +149,9 @@ while word_cnt > 0:
word = str.lower(word) word = str.lower(word)
choices.append(word) choices.append(word)
sent_word_cnt += 1
word_cnt -= 1
choice = next_choice choice = next_choice
print(' '.join(sents))
print(f'{os.linesep}{os.linesep}'.join(paragraphs))

Loading…
Cancel
Save