''' markov.py - Gernerate a novel using Markov chains Copyright (C) 2020 Blink The Things This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . ''' import argparse import numpy as np import os import spacy parser = argparse.ArgumentParser(description='Generate a novel using Markov chains.') parser.add_argument('input', nargs='+', help='used to construct Markov transition matrix') parser.add_argument('-s', '--seed', type=int, help='seed for random number generator') args = parser.parse_args() nlp = spacy.load('en_core_web_sm') rng = np.random.default_rng(args.seed or 12345) words = {} edges = [] input_text = '' for infile in args.input: with open(infile, mode='r') as f: input_text = f.read() doc = nlp(input_text) for sent in doc.sents: cnt = 0 for token in sent: if token.pos_ in ('SPACE', 'PUNCT', 'X'): continue cnt += 1 word = token.text state = f'{token.tag_},{token.dep_}' if state in words: words[state].append(word) else: words[state] = [word] state = f'{token.tag_},{token.dep_},{str(cnt)}' if state in words: words[state].append(word) else: words[state] = [word] for sent in doc.sents: curr_state = 'START' cnt = 0 for token in sent: if token.pos_ in ('SPACE', 'PUNCT', 'X'): continue cnt += 1 next_state = f'{token.tag_},{token.dep_},{str(cnt)}' edges.append((curr_state, next_state)) curr_state = next_state edges.append((curr_state, 'STOP')) transitions = {} for edge in edges: if edge[0] in transitions: transitions[edge[0]]['cnt'] += 1 if edge[1] in transitions[edge[0]]['to']: transitions[edge[0]]['to'][edge[1]] += 1 else: transitions[edge[0]]['to'][edge[1]] = 1 else: transitions[edge[0]] = { 'cnt': 1, 'to': {edge[1]: 1}} chain = {} for key in transitions.keys(): cnt = transitions[key]['cnt'] choices = list(transitions[key]['to']) probs = [] for choice in choices: probs.append(transitions[key]['to'][choice] / cnt) chain[key] = { 'choices': choices, 'probs': probs} sents = [] for _ in range(10): choice = 'START' choices = [] while True: next_choice = rng.choice(chain[choice]['choices'], p=chain[choice]['probs']) if choice == 'START' and next_choice == 'STOP': continue if next_choice == 'STOP': sents.append(' '.join(choices)) break try: word = rng.choice(words[next_choice]) except KeyError: word = rng.choice(words[','.join(next_choice.split(',')[:-1])]) choices.append(word) choice = next_choice print(os.linesep.join(sents))