'''
    markov.py - Gernerate a novel using Markov chains
    Copyright (C) 2020  Blink The Things

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.

    You should have received a copy of the GNU Affero General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.
'''

import argparse
import numpy as np
import os
import spacy

parser = argparse.ArgumentParser(description='Generate a novel using Markov chains.')
parser.add_argument('input', nargs='+', help='used to construct Markov transition matrix')
parser.add_argument('-s', '--seed', type=int, help='seed for random number generator')
args = parser.parse_args()

nlp = spacy.load('en_core_web_sm')

rng = np.random.default_rng(args.seed or 12345)

words = {}
edges = []

input_text = ''
for infile in args.input:
    with open(infile, mode='r') as f:
        input_text = f.read()

    doc = nlp(input_text)

    for sent in doc.sents:
        cnt = 0
        for token in sent:
            if token.pos_ in ('SPACE', 'PUNCT', 'X'):
                continue

            cnt += 1

            word = token.text

            state = f'{token.tag_},{token.dep_}'

            if state in words:
                words[state].append(word)
            else:
                words[state] = [word]

            state = f'{token.tag_},{token.dep_},{str(cnt)}'

            if state in words:
                words[state].append(word)
            else:
                words[state] = [word]

    for sent in doc.sents:
        curr_state = 'START'

        cnt = 0
        for token in sent:
            if token.pos_ in ('SPACE', 'PUNCT', 'X'):
                continue

            cnt += 1
            next_state = f'{token.tag_},{token.dep_},{str(cnt)}'

            edges.append((curr_state, next_state))

            curr_state = next_state

        edges.append((curr_state, 'STOP'))

transitions = {}
for edge in edges:
    if edge[0] in transitions:
        transitions[edge[0]]['cnt'] += 1
        if edge[1] in transitions[edge[0]]['to']:
            transitions[edge[0]]['to'][edge[1]] += 1
        else:
            transitions[edge[0]]['to'][edge[1]] = 1
    else:
        transitions[edge[0]] = { 'cnt': 1, 'to': {edge[1]: 1}}

chain = {}
for key in transitions.keys():
    cnt = transitions[key]['cnt']
    choices = list(transitions[key]['to'])
    probs = []
    for choice in choices:
        probs.append(transitions[key]['to'][choice] / cnt)
    chain[key] = { 'choices': choices, 'probs': probs}

sents = [] 
for _ in range(10):
    choice = 'START'

    choices = []
    while True:
        next_choice = rng.choice(chain[choice]['choices'], p=chain[choice]['probs'])

        if choice == 'START' and next_choice == 'STOP':
            continue

        if next_choice == 'STOP':
            sents.append(' '.join(choices))
            break

        try:
            word = rng.choice(words[next_choice])
        except KeyError:
            word = rng.choice(words[','.join(next_choice.split(',')[:-1])])

        choices.append(word)

        choice = next_choice

print(os.linesep.join(sents))