import winkTokenizer from 'wink-tokenizer'
import winkPosTagger from 'wink-pos-tagger'
import lemmatize from 'wink-lemmatizer'
import stem from 'wink-porter2-stemmer'
import {englishRarityScore} from '../../TextFiltering/nodeFilter'
import {
    anyCharIsAlphanumeric,
    isAlphaNumeric,
    splitNote,
} from '../../TextNormalization/TextNormalization'
import pos from 'pos'
import {Tag} from 'en-pos'
import {preprocessForPosTagging} from '../PreprocessForPosTagger'
import {combineParallelArrays} from '../../../Miscellaneous/Miscellaneous'

//This file contains a lot of building blocks for the English Entity Detection Evaluator,
//including a few part-of-speech libraries no longer used.

export enum WinkEntityType {
    PROPER_NOUN = 'NNP',
    NORMAL_NOUN = 'NN',
}

export interface WinkTag {
    value: string
    lemma: string
    pos: string
    tag: string
}

//Some element of a word tag: either a word's value, pos, lemma, score, etc.
export type wordTag = (string | number)[]

/**
 * Contains building blocks for entity evaluation.
 * In particular, part of speech tagging and english rarity scores.
 */
class PlexusEntityRecognizer {
    constructor() {}
    static tagWords(words: string[]): any[] {
        const tokenizer = winkPosTagger()
        const tags = tokenizer.tagRawTokens(words)
        return tags
    }
    static winkSentenceTags(
        sentence: string
    ): {value: string; lemma: string; pos: string; tag: string}[] {
        if (!sentence) return []
        const tokenizer = winkTokenizer()
        const tagger = winkPosTagger()
        const tokens = tokenizer.tokenize(sentence)
        const tags = tagger.tag(tokens)
        return tags
    }
    //Filters tags; result set only includes entity types
    static winkFilteredTags(sentence: string): {value: string; lemma: string; pos: string}[] {
        const entityTypes: string[] = Object.values(WinkEntityType)
        return this.winkSentenceTags(sentence).filter(tag => entityTypes.includes(tag.pos))
    }

    static winkWordTag(word: string) {
        return this.winkSentenceTags(word)[0]
    }
    static winkLemma(word: string) {
        return lemmatize(word)
    }
    static winkStem(word: string) {
        return stem(word)
    }

    //Gets the part of speech of a given word
    static winkWordPos(word: string) {
        const tag = this.winkWordTag(word)
        if (!tag || !tag.pos) {
            console.error(word + " didn't get a tag.")
            return
        } else return tag.pos
    }

    static lemmaEnglishRarity = (word: string) => {
        const {lemma, pos} = PlexusEntityRecognizer.winkWordTag(word)
        const rarity = englishRarityScore(lemma ? lemma : word)
        if (isNaN(rarity)) return 10
        return rarity
    }
    static entityScoreSimple = (word: string) => {
        return (
            PlexusEntityRecognizer.lemmaEnglishRarity(word) +
            PlexusEntityRecognizer.posScoreSimple(word)
        )
    }
    /**
     * 0 thru 2
     * @param word
     */
    static posScoreSimple = (word: string, givenPos?: string) => {
        const pos = givenPos ? givenPos : PlexusEntityRecognizer.winkWordPos(word)
        if (pos.startsWith('NNP')) {
            return 10
        } else if (pos.startsWith('NN')) {
            return 5
        }
        return 0
    }
    /**
     * starts with cleaning
     * @param sentence
     */
    static bestPosTagger = (sentence: string): wordTag[] => {
        //remove contractions
        const cleaned = preprocessForPosTagging(sentence)
        return PlexusEntityRecognizer.winkSentenceTags(cleaned)
            .map(e => [e.value, e.pos, e.lemma])
            .filter(tag => anyCharIsAlphanumeric(tag[0], false))
    }

    static entityScore = (
        word: string,
        givenPos?: string,
        bump?: number,
        beginningOfSentence?: boolean
    ) => {
        const rarity = PlexusEntityRecognizer.lemmaEnglishRarity(word) + (bump ? bump : 0)
        if (rarity == 10) return rarity
        const tag = PlexusEntityRecognizer.winkWordTag(word)
        const pos = givenPos ? givenPos : tag.pos
        const posScore =
            pos === WinkEntityType.PROPER_NOUN && !beginningOfSentence
                ? 10
                : pos.startsWith(WinkEntityType.NORMAL_NOUN)
                ? 6
                : 2

        //weighing part of speech and rarity equally
        const score = 0.5 * rarity + 0.5 * posScore
        return score
    }

    /**
     * Gets whether a word represents an entity
     * @param word stripped of punctuation on outside, but caps should be kept. informs wink pos
     */
    static wordIsEntity(word: string, beginningOfSentence?: boolean, givenPos?: string) {
        if (
            !word ||
            word === '' ||
            // (word.length == 1 && !isAlphaNumeric(word)) ||
            !isAlphaNumeric(word.replace('-', '')) || //to get rid of apostrophe s
            //@ts-ignore
            !isNaN(word)
        )
            return false

        const score = this.entityScore(word, givenPos, 0, beginningOfSentence)

        //Empirically established threshold
        return score > 6.5
    }

    //Other part of speech tagger alternatives to WINK:
    static fastTag = (sentence: string) => {
        var words = new pos.Lexer().lex(sentence)
        var tagger = new pos.Tagger()
        var taggedWords = tagger.tag(words)
        return taggedWords
    }
    static enTag = (sentence: string, smoothing: boolean = false) => {
        const tags = new Tag(splitNote(sentence))
        let obj = tags.initial()
        obj = smoothing ? obj.smooth() : obj
        const together = combineParallelArrays(obj.tokens, obj.tags)
        return together
    }
}

export default PlexusEntityRecognizer
