import {
    anyCharIsAlphanumeric,
    charIsAlphaNumeric,
    charIsEnglishLetter,
    hackOffContractionEnds,
    isAlphaNumeric,
    removeEndContraction,
    splitNote,
    strip,
    stripTextPerWord,
    isCharacterALetter,
} from '../TextNormalization/TextNormalization'

/**
 * A preprocessing function for pos tagging + a lot of helpers.
 **/

export const preprocessForPosTagging = (sentence: string): string => {
    const withoutContractions = hackOffContractionEnds(sentence)
    const withoutSquareBrackets = replaceAllChars(
        [{char: '['}, {char: ']'}, {char: '('}, {char: ')'}, {char: '{'}, {char: '}'}],
        withoutContractions
    )
    //because pos splitter splits ennglish from nonennglish
    const replaceAllNonEnglishChars = replaceAllCharsWith(
        withoutSquareBrackets,
        nonEnglishLetter,
        'MUL'
    )
    //why internal? because if comma ends a word, don't want to ruin that word's pos evalu
    const cleaned = removeNonAlphanumericChars(replaceAllNonEnglishChars, true)
    const cleanedForNumbers = replaceAllChars(
        [
            {char: '0', replacement: 'ZERO'},
            {char: '1', replacement: 'ONE'},
            {char: '2', replacement: 'TWO'},
            {char: '3', replacement: 'THREE'},
            {char: '4', replacement: 'FOUR'},
            {char: '5', replacement: 'FIVE'},
            {char: '6', replacement: 'SIX'},
            {char: '7', replacement: 'SEVEN'},
            {char: '8', replacement: 'EIGHT'},
            {char: '9', replacement: 'NINE'},
        ],
        cleaned
    )

    //decapitalize words that are at beginning of sentences
    const withDecappedStartingWords = decapStartingWords(cleanedForNumbers)
    return withDecappedStartingWords
}

const decapStartingWords = (text: string) => {
    let decappedString = text
    //for each kind of sentence ending puncutation
    const puncts = ['.', '?', '!'].map(punct => punct + ' ')
    puncts.forEach(punct => {
        const sentences = decappedString.split(punct)
        const decappedSentences = sentences.map(sentence => {
            const words = sentence.split(' ')
            if (words.length > 0) {
                const {decoratorsData, strippedText} = strip(
                    words[0],
                    false,
                    false,
                    true,
                    false
                )
                const decappedFirstWord = strippedText
                return (
                    decappedFirstWord + (words.length > 1 ? ' ' : '') + words.slice(1).join(' ')
                )
            } else {
                return ''
            }
        })
        decappedString = decappedSentences.join(punct)
    })
    return decappedString
}
const removeNonAlphanumericChars = (
    text: string,
    onlyRemoveInternal: boolean = false
): string => {
    let newText = text
    for (let i = 0; i < newText.length; i++) {
        //remove internal lettters thatt aren't english
        if (!charIsAlphaNumeric(newText[i], false, false, false) && newText[i] !== ' ') {
            const isInternal = insideOfWord(i, newText)
            if (!onlyRemoveInternal || isInternal) {
                newText = newText.slice(0, i) + 'PUNCTUATION' + newText.slice(i + 1)
                i--
            }
        }
    }
    return newText
}

const nonEnglishLetter = (char: string) => {
    return !charIsEnglishLetter(char) && isCharacterALetter(char)
}
const replaceAllCharsWith = (
    text: string,
    condition: (char: string) => boolean,
    replaceValue: string = 'MUL'
) => {
    let newText = text
    for (let i = 0; i < newText.length; i++) {
        //remove internal lettters thatt aren't english
        if (condition(newText[i])) {
            newText = newText.slice(0, i) + replaceValue + newText.slice(i + 1)
            i--
        }
    }
    return newText
}

const insideOfWord = (i: number, text: string): boolean => {
    const charBeforeIsWordBound = !text[i - 1] || text[i - 1] === ' '
    const charAfterIsWordBound = !text[i + 1] || text[i + 1] === ' '
    return !charBeforeIsWordBound && !charAfterIsWordBound
}

const replaceAllChars = (
    chars: {char: string; replacement?: string}[],
    text: string,
    defaultReplacement: string = ''
) => {
    chars.forEach(({char, replacement}) => {
        text = replaceAllInternal(char, text, replacement ? replacement : defaultReplacement)
    })
    return text
}
const replaceAllInternal = (char: string, text: string, replacement: string = ''): string => {
    while (text.indexOf(char) != -1) {
        text = text.replace(char, replacement)
    }
    return text
}
