import {
    anyCharIsAlphanumeric,
    isAlphaNumeric,
    strip,
} from '../../TextNormalization/TextNormalization'
import PlexusEntityRecognizer, {wordTag} from './PlexusEntityRecognizer'
import {checkEntityStatus} from './CheckEntityStatus'
import {timeFunction} from '../../../Testing/NotesTesting'
import GraphBuilderLite, {
    PlexusPagesMap,
} from '../../GraphBuilding/GraphBuilderLite/GraphBuilderLite'
import {PlexusParagraph} from '../PageInterfaces'

export interface RangeInNote {
    startIndex: number
    endIndex: number
}
export interface EnglishEntity {
    phrase: string
    tagArr?: wordTag[]
    wordRange?: RangeInNote
    score?: number //between 0 & 1
    whereItRepeats?: PlexusParagraph[]
    refPageId?: string //the page it references. must exist if where it repeats is defined. this should define whether the entity is considered repeating
}

export interface TextLocation {
    documentId: string
    paragraphIndex: number
    wordIndex: number
}

//Filters repeating and unique
export const findEntitiesInParagraph = (
    paragraph: PlexusParagraph,
    builder?: GraphBuilderLite,
    pageMap?: PlexusPagesMap
): EnglishEntity[] => {
    const entities = findEntities(paragraph, builder, pageMap)

    //option 2 for location to add pages for meaningful repetition
    //this is a good possible to sort prioritize repeating phrases and strike suggested ones that overlap with each other and with repeating phrases
    const cleaned = entities.map(
        ({largestEntity, largestRepeatingEntity, largestPageReference}) => {
            return largestPageReference
                ? largestPageReference
                : largestRepeatingEntity
                ? largestRepeatingEntity
                : largestEntity
        }
    )
    return cleaned
}

/**
 * Finds an array of english entities in the given note.
 * To be used for testing and notes-importing
 * Probably don't want to add pages for meaningful repetition WITHIN this function, for now, because would require deleting and updating a ton
 * @param note the given string, to split up and find entities in
 * @param repeatingAndUnique whether to find both repeating and unique phrases
 */
export const findEntities = (
    paragraph: PlexusParagraph,
    builder?: GraphBuilderLite,
    pageMap?: PlexusPagesMap
): {
    largestEntity: EnglishEntity
    largestRepeatingEntity: EnglishEntity
    largestPageReference: EnglishEntity
}[] => {
    //the entities to be found
    const entities: {
        largestEntity: EnglishEntity
        largestRepeatingEntity: EnglishEntity
        largestPageReference: EnglishEntity
    }[] = []

    const tags = PlexusEntityRecognizer.bestPosTagger(paragraph.text)
    const decoratedWords = getDecoratedWordsWithIndices(paragraph.text)

    //reinsert internally punctuated words
    const correctedTags = decoratedWords.map(({word, index}, tagIndex) => {
        //word with all its decoration 0, pos 1, lemma 2, word as found by tagger 3, originalWordIndex 4
        return [word, tags[tagIndex][1], tags[tagIndex][2], tags[tagIndex][0], index]
    })

    //loop through words, backwards, looking for longest entities that ennd at each
    for (let i = correctedTags.length - 1; i >= 0; i--) {
        const endIndexInNote: number = correctedTags[i][4] as number
        let priorityEntity: EnglishEntity
        //See if there's a page reference.
        const largestPageReference = timeFunction(
            () =>
                findLongestPageReference(
                    i,
                    endIndexInNote,
                    correctedTags,
                    builder,
                    paragraph,
                    pageMap
                ),
            'find longest page reference: ' + paragraph.text.slice(0, 20)
        )

        let largestRepeatingEntity
        let largestEntity

        if (!largestPageReference) {
            let result = timeFunction(
                () =>
                    findLongestRepeatingEntity(
                        i,
                        endIndexInNote,
                        correctedTags,
                        builder,
                        paragraph
                    ),
                'find longest repeatig entity: "' + paragraph.text.slice(0, 20) + i + '"'
            )
            largestRepeatingEntity = result.largestRepeatingEntity
            largestEntity = result.largestEntity
        }

        priorityEntity = largestPageReference ? largestPageReference : largestRepeatingEntity

        //This is the location for prioritizatio
        //record the entity if found. update tag-index accordingly.
        //ONLY FIND REPEATING ENTITIES FOR NOW
        //if(largestEntity)
        if (priorityEntity) {
            entities.unshift({
                largestEntity: undefined,
                largestRepeatingEntity,
                largestPageReference,
            })

            //decrement i if entity is longer than one word, so that entities don't overlap.
            //janky way of finding length right now. should probably just use start and end indices.

            i -= priorityEntity ? entityWordCount(priorityEntity) - 1 : 0
        }
    }

    return entities
}

const entityWordCount = (entity: EnglishEntity) =>
    entity.wordRange.endIndex - entity.wordRange.startIndex

/**
 * Finds an array of english entities in the given note.
 * To be used for testing and notes-importing
 * @param note the given string, to split up and find entities in
 * @param repeatingAndUnique whether to find both repeating and unique phrases
 */
export const findEnglishEntities = (note: string): EnglishEntity[] => {
    return findEntities({
        text: note,
        cleanWords: GraphBuilderLite.cleanPhrase(note),
        paragraphId: '1',
        pageId: '1',
    }).map(e => e.largestEntity)
}
/**
 * NOT USED!
 * Finds an array of english entities in the given note.
 * To be used for testing and notes-importing
 * @param note the given string, to split up and find entities in
 * @param repeatingAndUnique whether to find both repeating and unique phrases
 */
export const findRepeatingEnglishEntities = (
    note: string,
    builder: GraphBuilderLite
): EnglishEntity[] => {
    return findEntities(
        {
            text: note,
            cleanWords: GraphBuilderLite.cleanPhrase(note),
            paragraphId: '1',
            pageId: '1',
        },
        builder
    )
        .filter(e => e.largestRepeatingEntity)
        .map(e => e.largestRepeatingEntity)
}

export enum ENGLISH_ENTITY_STATUS {
    NO,
    MAYBE,
    YES,
    YES_REPEATS,
}

export function getDecoratedWords(text: string): string[] {
    return text.split(' ').filter(word => anyCharIsAlphanumeric(word, false))
}

export function getRawWords(text: string): string[] {
    return text ? text.split(' ') : []
}
export function getDecoratedWordsWithIndices(text: string): {word: string; index: number}[] {
    return getRawWords(text)
        .map((word, index) => ({word, index}))
        .filter(({word}) => anyCharIsAlphanumeric(word, false))
}

/**
 * Find the longest english-entity phrase ending at the given word
 * @param endTagIndex a word-index of the word-to-be-considered in a given node. will be mutated by fn
 * @param note the given note
 * @param pos a part of speech assigned to the current word. Might not have already been found, in which case it should be found here.
 * @param lemma a lemmatized version of the phrase found here.
 * @returns entity
 */
export const findLongestEntityPhrase = (
    endTagIndex: number,
    endWordIndex: number,
    note: wordTag[],
    builder?: GraphBuilderLite
): EnglishEntity => {
    return findLongestRepeatingEntity(endTagIndex, endWordIndex, note, builder).largestEntity
}

//page reference or english entity or repeated english entity
const findLongestPageReference = (
    endTagIndex: number,
    endWordIndex: number, //actual word index in note
    note: wordTag[],
    builder: GraphBuilderLite,
    paragraph?: PlexusParagraph,
    pagesMap?: PlexusPagesMap
): EnglishEntity => {
    let status: ENGLISH_ENTITY_STATUS = ENGLISH_ENTITY_STATUS.MAYBE
    let largestPageReference: EnglishEntity //the temp longest entity found so far
    let startTagIndex = endTagIndex
    //While the previous considered phrase is at least an Entity candidate:
    while (
        startTagIndex >= 0 &&
        status >= ENGLISH_ENTITY_STATUS.MAYBE //handled internally too
    ) {
        const candidateTags: wordTag[] = note.slice(startTagIndex, endTagIndex + 1)
        const candidatePhrase: string = stringFromTagArr(candidateTags)

        //all paragraphs that include this title that aren't this page.
        const inclusiveTitles = builder
            .findTitlesWhereOccurs(candidatePhrase)
            .filter(para => para.pageId !== paragraph.pageId)

        //If not inclusive titles, this phrase doesn't exist.
        if (!(inclusiveTitles.length > 0)) status = ENGLISH_ENTITY_STATUS.NO
        else {
            //check if any are perfect
            const perfectTitles = builder.filterForPerfect(candidatePhrase, inclusiveTitles)
            const perfectExistingTitles = pagesMap
                ? perfectTitles.filter(title => title.pageId in pagesMap)
                : perfectTitles
            if (!perfectExistingTitles || !(perfectExistingTitles.length > 0))
                status = ENGLISH_ENTITY_STATUS.MAYBE
            else {
                //Otherwise, there is a perfect title that corresponds.
                status = ENGLISH_ENTITY_STATUS.YES
                largestPageReference = {
                    phrase: candidatePhrase,
                    tagArr: candidateTags,
                    wordRange: {
                        startIndex: endWordIndex + 1 - candidateTags.length,
                        endIndex: endWordIndex + 1, //after the last word in this phrase
                    },
                    refPageId: perfectExistingTitles[0].pageId,
                    whereItRepeats: perfectExistingTitles,
                }
                console.assert(perfectExistingTitles.length == 1)
            }
        }
        startTagIndex--
    }
    return largestPageReference
}

/**
 * Used in the thing
 * @param endTagIndex a word-index of the word-to-be-considered in a given node. will be mutated by fn
 * @param note the given note
 * @param pos a part of speech assigned to the current word. Might not have already been found, in which case it should be found here.
 * @param lemma a lemmatized version of the phrase found here.
 * @returns entity
 */
const findLongestRepeatingEntity = (
    endTagIndex: number,
    endWordIndex: number, //actual word index in note
    note: wordTag[],
    builder?: GraphBuilderLite,
    paragraph?: PlexusParagraph
): {
    largestEntity: EnglishEntity
    largestRepeatingEntity: EnglishEntity
    largestPageReference: EnglishEntity
} => {
    //stores the status of the temp candidate phrase (ie temp word + subseq_phrase)
    //initialized to maybe, because whatever word is passed to this function is, by presumption, possibly an entity.
    let status: ENGLISH_ENTITY_STATUS = ENGLISH_ENTITY_STATUS.MAYBE
    let largestEntity: EnglishEntity //the temp longest entity found so far
    let largestRepeatingEntity: EnglishEntity //the temp longest entity found so far
    let lastCandidateScore = 0
    let subsequentCandidatePhrase: wordTag[] = []

    //While the previous considered phrase is at least an Entity candidate:
    while (
        endTagIndex >= 0 &&
        status >= ENGLISH_ENTITY_STATUS.MAYBE //handled internally too
    ) {
        // Check status for this entity
        let tempScore: number

        //if previous entity and starts with punctuation, call it quits
        //For the parenthesis use case (which gavin picked up on)
        if (
            subsequentCandidatePhrase.length > 0 &&
            !isAlphaNumeric(subsequentCandidatePhrase[0][0][0])
        ) {
            status = ENGLISH_ENTITY_STATUS.NO
            tempScore = -1
        }
        // {
        else {
            // timeFunction(() => {
            ;({status, candidateScore: tempScore} = checkEntityStatus(
                note[endTagIndex],
                subsequentCandidatePhrase,
                lastCandidateScore,
                note
            ))
            // }, 'Check entity status for ' + note[endTagIndex][0])
        }

        //Record this entity, if found
        if (status >= ENGLISH_ENTITY_STATUS.MAYBE) {
            //store the full phrase under consideration
            const fullPhrase = [note[endTagIndex], ...subsequentCandidatePhrase]
            // if (paragraph.text.includes('Jobs')) debugger

            //record this as the largest entity found with the given endpoint
            if (status >= ENGLISH_ENTITY_STATUS.YES) {
                largestEntity = {
                    phrase: stringFromTagArr(fullPhrase),
                    tagArr: fullPhrase,
                    wordRange: {
                        //end word index + 1 is word after the one being considered
                        startIndex: endWordIndex + 1 - fullPhrase.length,
                        endIndex: endWordIndex + 1, //after the last word in this phrase
                    },
                    score: tempScore,
                    whereItRepeats: undefined,
                }
                const phraseString = stringFromTagArr(fullPhrase)
                const placesItRepeats = timeFunction(
                    () => (builder ? builder.findWhereOccurs(phraseString) : undefined),
                    'places it repeats: ' + phraseString
                )

                const placesItRepeatsMinusHere = placesItRepeats
                    ? placesItRepeats.filter(
                          paragraphWhereOccurs =>
                              !sameParagraph(paragraphWhereOccurs, paragraph)
                      )
                    : []
                if (placesItRepeatsMinusHere && placesItRepeatsMinusHere.length > 0) {
                    largestRepeatingEntity = {
                        ...largestEntity,
                        whereItRepeats: placesItRepeats, //include here in lookup
                    }
                }
            }

            //given that this is a candidate, shift index back by one for next run
            endTagIndex--
            subsequentCandidatePhrase = fullPhrase
            lastCandidateScore = tempScore
        }
    }
    return {largestEntity, largestRepeatingEntity, largestPageReference: undefined}
}

//changed this logic for a second
const sameParagraph = (a: PlexusParagraph, b: PlexusParagraph) =>
    a.paragraphIndex === b.paragraphIndex && a.pageId === b.pageId

/**
 * To do: strip beginning and ending punctuation
 * @param tagArr
 */
export const stringFromTagArr = (tagArr: wordTag[]): string => {
    const concatenated = tagArr.reduce(
        (str: string, nextTag: wordTag) => str + ' ' + nextTag[0],
        ''
    )
    const withoutSpace = concatenated.trimStart()
    const stripped = strip(withoutSpace, true, true, false).strippedText
    return stripped
}
