import {
    charIsAlphaNumeric,
    isAlphaNumeric,
    strip,
} from '../../TextNormalization/TextNormalization'
import PlexusEntityRecognizer, {wordTag} from '../EnglishEntitiesHelpers/PlexusEntityRecognizer'

/**
 * Finds both the score and whether it's an entity.
 * @param word
 * @param wordPos
 * @param subsPhrase
 * @param subsPhraseScore
 */
export const englishEntityEval = (
    word: wordTag,
    subsPhrase: wordTag[],
    subsPhraseScore: number
): {isEntity: boolean; score?: number} => {
    const firstWord = !(subsPhrase && subsPhrase.length > 0)
    const firstWordCheck = isEntityWord(word, false, firstWord)
    //Check whether meets rarity threshold
    const {meetsThreshold, score} = recursiveEntityEval(word, subsPhraseScore)
    const isEntity = firstWordCheck && meetsThreshold
    return {isEntity, score}
}

/**
 * Determine whether at least one word in this phrase can serve as the core of an entity (eg a noun, a word with rarity=10, etc.)
 * @param phrase
 */
const containsEntityCore = (phrase: wordTag[]): boolean => {
    let contains = false
    phrase.forEach((word: wordTag) => {
        const correctPos = wordIsQualified(word, EntityPartsOfSpeech)
        if (correctPos) {
            contains = true
            return
        }
    })
    return contains
}

//threshold can eventually be context-based
/**
 * Responsible for housing thresold
 * @param subsequentPhraseScore 0-1
 */
export const recursiveEntityEval = (
    word: wordTag,
    subsequentPhraseScore: number
): {meetsThreshold: boolean; score?: number} => {
    const score = findEntityScore(word, subsequentPhraseScore)
    const meetsThreshold = score > 0.6
    return {meetsThreshold, score}
}

/**
 * Finds a normalized entity score, given a word and subsequent-phrase-score.
 *  First finds the word's score
 *  Then combines with subs phrase score: x + y - xy. a beautiful formula.
 * @param word the word to find the score for
 * @param subsequentPhraseScore between 0 and 1
 */
const findEntityScore = (word: wordTag, subsequentPhraseScore: number): number => {
    const wordScore = findEntityScoreForWord(word)
    console.assert(
        wordScore <= 1 && subsequentPhraseScore <= 1,
        'Either word score or subs phrase score is greater than 1: ' +
            wordScore +
            ', ' +
            subsequentPhraseScore
    )
    return wordScore + subsequentPhraseScore - wordScore * subsequentPhraseScore
}

/**
 *
 * @param word Finds a normalized entity score for a given word
 */
const findEntityScoreForWord = (word: wordTag): number => {
    return PlexusEntityRecognizer.entityScore(word[0] as string, word[1] as string) / 10
}

/**
 * Determines whether a word is a possible first word of an english entity.
 * Ie whether, if the rest of the phrase is a candidate, whether this word as the phrase's first would have it remain an entity
 * @param word
 * @param canBePrep
 */
export const isEntityWord = (
    word: wordTag,
    canBePrep: boolean = true, //default
    firstWord: boolean
): boolean => {
    return (
        (firstWord || !wordHasEndingPunctuation(word[0] as string)) &&
        (wordIsQualified(word, firstWord ? EntityPartsOfSpeech : CandidatePartsOfSpeech) ||
            (canBePrep && isPrep(word[1] as string)))
    )
}

/**
 * Determines whether a given word meets certain qualifications.
 *  of two types: included in an array of parts of speech || has a score of 10
 * @param word
 * @param qualifiedPoses
 */
const wordIsQualified = (
    word: wordTag,
    qualifiedPoses: string[] = CandidatePartsOfSpeech
): boolean => {
    const score = findEntityScoreForWord(word)
    const scoreOf10 = score >= 0.99
    const pos = word[1] ? word[1] : PlexusEntityRecognizer.winkWordPos(word[0] as string) //word[1]
    const correctPos = posQualifies(pos as string, qualifiedPoses)
    return scoreOf10 || correctPos
}

/**
 * Determines whether a given part of speech qualifies (matches) relative to a set of acceptable parts of speech
 * @param pos
 * @param qualifiedPoses
 */
const posQualifies = (pos: string, qualifiedPoses: string[] = CandidatePartsOfSpeech) => {
    let fits = false
    qualifiedPoses.forEach(acceptedPos => {
        if (pos.startsWith(acceptedPos)) {
            fits = true
            return
        }
    })
    return fits
}

/**
 * Determines whether a part of speech is weird preposition thing.
 * @param pos
 */
const isPrep = (pos: string) => pos.startsWith('IN')

export const isBridgeWord = (word: string) =>
    ['of', 'my', 'our', 'your'].includes(strip(word).strippedText)

/**
 * Determines whether a word's last char is non alphanumeric (for english)
 * we want "2K22" and "Covid19" to pass > that's why the numeric check is allowed.
 * @param word
 */
const wordHasEndingPunctuation = (word: string): boolean =>
    !charIsAlphaNumeric(word[word.length - 1])

const EntityPartsOfSpeech = ['NN', 'VBG']
const CandidatePartsOfSpeech = ['JJ', ...EntityPartsOfSpeech]

export default {}
