import {nonCommonWordsTrie, commonWordsTrie, subtlexTrie} from './WordFreqTries'

import {stripNoteText} from '../TextNormalization/TextNormalization'

export const subtlexCount = (word: string): number | undefined => {
    if (!word) return -1

    const lowerInput = stripNoteText(word)
    const upperInput = lowerInput[0].toUpperCase() + lowerInput.slice(1)
    if (word == null || word === '') return

    //subtlex
    const lowerCount = subtlexTrie.get(lowerInput)
    const upperCount = subtlexTrie.get(upperInput)
    const count = lowerCount ? lowerCount : upperCount
    if (count === null) {
        return
    }
    //The higher the threshold, the harder it is to be trivial
    return count
}
export const subtlexFreq = (word: string): number | undefined => {
    const probTot: number = 49719560
    const count = subtlexCount(word)
    return count / probTot
}

//From 0 (the) to 10 (zymurgy), based on frequency in english lang
//20 or thirty seems like a good cutoff
export const englishRarityScore = (word: string): number => {
    const desiredScoreMax = 10
    const empiricalLogMax = 6.2
    const rawCount = subtlexCount(word)
    if (!rawCount || rawCount == 0) return desiredScoreMax
    const normalizedCount =
        (empiricalLogMax - Math.log10(rawCount)) * (desiredScoreMax / empiricalLogMax)
    return normalizedCount
}
