/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.parser.lexparser;

import edu.stanford.nlp.parser.lexparser.BaseUnknownWordModel;
import edu.stanford.nlp.parser.lexparser.IntTaggedWord;
import edu.stanford.nlp.parser.lexparser.Lexicon;
import edu.stanford.nlp.parser.lexparser.Options;
import edu.stanford.nlp.process.DistSimClassifier;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.util.Index;
import edu.stanford.nlp.util.logging.Redwood;

public class EnglishUnknownWordModel
extends BaseUnknownWordModel {
    private static Redwood.RedwoodChannels log = Redwood.channels(EnglishUnknownWordModel.class);
    private static final long serialVersionUID = 4825624957364628770L;
    private static final boolean DEBUG_UWM = false;
    protected final boolean smartMutation;
    protected final int unknownSuffixSize;
    protected final int unknownPrefixSize;
    protected final String wordClassesFile;
    private static final int MIN_UNKNOWN = 0;
    private static final int MAX_UNKNOWN = 8;
    private transient DistSimClassifier distSim;

    public EnglishUnknownWordModel(Options op, Lexicon lex, Index<String> wordIndex, Index<String> tagIndex, ClassicCounter<IntTaggedWord> unSeenCounter) {
        super(op, lex, wordIndex, tagIndex, unSeenCounter, null, null, null);
        if (this.unknownLevel < 0 || this.unknownLevel > 8) {
            throw new IllegalArgumentException("Invalid value for useUnknownWordSignatures: " + this.unknownLevel);
        }
        this.smartMutation = op.lexOptions.smartMutation;
        this.unknownSuffixSize = op.lexOptions.unknownSuffixSize;
        this.unknownPrefixSize = op.lexOptions.unknownPrefixSize;
        this.wordClassesFile = op.lexOptions.wordClassesFile;
    }

    public EnglishUnknownWordModel(Options op, Lexicon lex, Index<String> wordIndex, Index<String> tagIndex) {
        this(op, lex, wordIndex, tagIndex, new ClassicCounter<IntTaggedWord>());
    }

    @Override
    public float score(IntTaggedWord iTW, int loc, double c_Tseen, double total, double smooth, String word) {
        double p_T;
        double p_W;
        double pb_T_S = this.scoreProbTagGivenWordSignature(iTW, loc, smooth, word);
        double pb_W_T = Math.log(pb_T_S * (p_W = 1.0 / total) / (p_T = c_Tseen / total));
        if (pb_W_T > -100.0) {
            return (float)pb_W_T;
        }
        return Float.NEGATIVE_INFINITY;
    }

    @Override
    public double scoreProbTagGivenWordSignature(IntTaggedWord iTW, int loc, double smooth, String word) {
        int wordSig = this.getSignatureIndex(iTW.word, loc, word);
        IntTaggedWord temp = new IntTaggedWord(wordSig, iTW.tag);
        double c_TS = this.unSeenCounter.getCount(temp);
        temp = new IntTaggedWord(wordSig, -1);
        double c_S = this.unSeenCounter.getCount(temp);
        double c_U = this.unSeenCounter.getCount(NULL_ITW);
        temp = new IntTaggedWord(-1, iTW.tag);
        double c_T = this.unSeenCounter.getCount(temp);
        double p_T_U = c_T / c_U;
        if (this.unknownLevel == 0) {
            c_TS = 0.0;
            c_S = 0.0;
        }
        return (c_TS + smooth * p_T_U) / (c_S + smooth);
    }

    @Override
    public int getSignatureIndex(int index, int sentencePosition, String word) {
        String uwSig = this.getSignature(word, sentencePosition);
        int sig = this.wordIndex.addToIndex(uwSig);
        return sig;
    }

    @Override
    public String getSignature(String word, int loc) {
        StringBuilder sb = new StringBuilder("UNK");
        switch (this.unknownLevel) {
            case 8: {
                this.getSignature8(word, sb);
                break;
            }
            case 7: {
                EnglishUnknownWordModel.getSignature7(word, loc, sb);
                break;
            }
            case 6: {
                this.getSignature6(word, loc, sb);
                break;
            }
            case 5: {
                this.getSignature5(word, loc, sb);
                break;
            }
            case 4: {
                EnglishUnknownWordModel.getSignature4(word, loc, sb);
                break;
            }
            case 3: {
                EnglishUnknownWordModel.getSignature3(word, loc, sb);
                break;
            }
            case 2: {
                EnglishUnknownWordModel.getSignature2(word, loc, sb);
                break;
            }
            case 1: {
                EnglishUnknownWordModel.getSignature1(word, loc, sb);
                break;
            }
        }
        return sb.toString();
    }

    private static void getSignature7(String word, int loc, StringBuilder sb) {
        boolean hasDigit = false;
        boolean hasNonDigit = false;
        boolean hasLower = false;
        boolean hasUpper = false;
        boolean hasDash = false;
        int wlen = word.length();
        for (int i = 0; i < wlen; ++i) {
            char ch = word.charAt(i);
            if (Character.isDigit(ch)) {
                hasDigit = true;
                continue;
            }
            hasNonDigit = true;
            if (Character.isLetter(ch)) {
                if (Character.isLowerCase(ch) || Character.isTitleCase(ch)) {
                    hasLower = true;
                    continue;
                }
                hasUpper = true;
                continue;
            }
            if (ch != '-') continue;
            hasDash = true;
        }
        if (wlen > 0 && hasUpper) {
            if (!hasLower) {
                sb.append("-ALLC");
            } else if (loc == 0) {
                sb.append("-INIT");
            } else {
                sb.append("-UC");
            }
        } else if (hasLower) {
            sb.append("-LC");
        }
        if (hasDash) {
            sb.append("-DASH");
        }
        if (hasDigit) {
            if (!hasNonDigit) {
                sb.append("-NUM");
            } else {
                sb.append("-DIG");
            }
        } else if (wlen > 3) {
            char ch = word.charAt(word.length() - 1);
            sb.append(Character.toLowerCase(ch));
        }
    }

    private void getSignature6(String word, int loc, StringBuilder sb) {
        int wlen = word.length();
        int numCaps = 0;
        boolean hasDigit = false;
        boolean hasDash = false;
        boolean hasLower = false;
        for (int i = 0; i < wlen; ++i) {
            char ch = word.charAt(i);
            if (Character.isDigit(ch)) {
                hasDigit = true;
                continue;
            }
            if (ch == '-') {
                hasDash = true;
                continue;
            }
            if (!Character.isLetter(ch)) continue;
            if (Character.isLowerCase(ch)) {
                hasLower = true;
                continue;
            }
            if (Character.isTitleCase(ch)) {
                hasLower = true;
                ++numCaps;
                continue;
            }
            ++numCaps;
        }
        String lowered = word.toLowerCase();
        if (numCaps > 1) {
            sb.append("-CAPS");
        } else if (numCaps > 0) {
            if (loc == 0) {
                sb.append("-INITC");
                if (this.getLexicon().isKnown(lowered)) {
                    sb.append("-KNOWNLC");
                }
            } else {
                sb.append("-CAP");
            }
        } else if (hasLower) {
            sb.append("-LC");
        }
        if (hasDigit) {
            sb.append("-NUM");
        }
        if (hasDash) {
            sb.append("-DASH");
        }
        if (lowered.endsWith("s") && wlen >= 3) {
            char ch2 = lowered.charAt(wlen - 2);
            if (ch2 != 's' && ch2 != 'i' && ch2 != 'u') {
                sb.append("-s");
            }
        } else if (!(word.length() < 5 || hasDash || hasDigit && numCaps > 0)) {
            if (lowered.endsWith("ed")) {
                sb.append("-ed");
            } else if (lowered.endsWith("ing")) {
                sb.append("-ing");
            } else if (lowered.endsWith("ion")) {
                sb.append("-ion");
            } else if (lowered.endsWith("er")) {
                sb.append("-er");
            } else if (lowered.endsWith("est")) {
                sb.append("-est");
            } else if (lowered.endsWith("ly")) {
                sb.append("-ly");
            } else if (lowered.endsWith("ity")) {
                sb.append("-ity");
            } else if (lowered.endsWith("y")) {
                sb.append("-y");
            } else if (lowered.endsWith("al")) {
                sb.append("-al");
            }
        }
    }

    private void getSignature5(String word, int loc, StringBuilder sb) {
        int wlen = word.length();
        int numCaps = 0;
        boolean hasDigit = false;
        boolean hasDash = false;
        boolean hasLower = false;
        for (int i = 0; i < wlen; ++i) {
            char ch = word.charAt(i);
            if (Character.isDigit(ch)) {
                hasDigit = true;
                continue;
            }
            if (ch == '-') {
                hasDash = true;
                continue;
            }
            if (!Character.isLetter(ch)) continue;
            if (Character.isLowerCase(ch)) {
                hasLower = true;
                continue;
            }
            if (Character.isTitleCase(ch)) {
                hasLower = true;
                ++numCaps;
                continue;
            }
            ++numCaps;
        }
        char ch0 = word.charAt(0);
        String lowered = word.toLowerCase();
        if (Character.isUpperCase(ch0) || Character.isTitleCase(ch0)) {
            if (loc == 0 && numCaps == 1) {
                sb.append("-INITC");
                if (this.getLexicon().isKnown(lowered)) {
                    sb.append("-KNOWNLC");
                }
            } else {
                sb.append("-CAPS");
            }
        } else if (!Character.isLetter(ch0) && numCaps > 0) {
            sb.append("-CAPS");
        } else if (hasLower) {
            sb.append("-LC");
        }
        if (hasDigit) {
            sb.append("-NUM");
        }
        if (hasDash) {
            sb.append("-DASH");
        }
        if (lowered.endsWith("s") && wlen >= 3) {
            char ch2 = lowered.charAt(wlen - 2);
            if (ch2 != 's' && ch2 != 'i' && ch2 != 'u') {
                sb.append("-s");
            }
        } else if (!(word.length() < 5 || hasDash || hasDigit && numCaps > 0)) {
            if (lowered.endsWith("ed")) {
                sb.append("-ed");
            } else if (lowered.endsWith("ing")) {
                sb.append("-ing");
            } else if (lowered.endsWith("ion")) {
                sb.append("-ion");
            } else if (lowered.endsWith("er")) {
                sb.append("-er");
            } else if (lowered.endsWith("est")) {
                sb.append("-est");
            } else if (lowered.endsWith("ly")) {
                sb.append("-ly");
            } else if (lowered.endsWith("ity")) {
                sb.append("-ity");
            } else if (lowered.endsWith("y")) {
                sb.append("-y");
            } else if (lowered.endsWith("al")) {
                sb.append("-al");
            }
        }
    }

    private static void getSignature4(String word, int loc, StringBuilder sb) {
        char ch;
        boolean hasDigit = false;
        boolean hasNonDigit = false;
        boolean hasLetter = false;
        boolean hasLower = false;
        boolean hasDash = false;
        boolean hasPeriod = false;
        boolean hasComma = false;
        for (int i = 0; i < word.length(); ++i) {
            char ch2 = word.charAt(i);
            if (Character.isDigit(ch2)) {
                hasDigit = true;
                continue;
            }
            hasNonDigit = true;
            if (Character.isLetter(ch2)) {
                hasLetter = true;
                if (!Character.isLowerCase(ch2) && !Character.isTitleCase(ch2)) continue;
                hasLower = true;
                continue;
            }
            if (ch2 == '-') {
                hasDash = true;
                continue;
            }
            if (ch2 == '.') {
                hasPeriod = true;
                continue;
            }
            if (ch2 != ',') continue;
            hasComma = true;
        }
        if (Character.isUpperCase(word.charAt(0)) || Character.isTitleCase(word.charAt(0))) {
            if (!hasLower) {
                sb.append("-AC");
            } else if (loc == 0) {
                sb.append("-SC");
            } else {
                sb.append("-C");
            }
        } else if (hasLower) {
            sb.append("-L");
        } else if (hasLetter) {
            sb.append("-U");
        } else {
            sb.append("-S");
        }
        if (hasDigit && !hasNonDigit) {
            sb.append("-N");
        } else if (hasDigit) {
            sb.append("-n");
        }
        if (hasDash) {
            sb.append("-H");
        }
        if (hasPeriod) {
            sb.append("-P");
        }
        if (hasComma) {
            sb.append("-C");
        }
        if (word.length() > 3 && Character.isLetter(ch = word.charAt(word.length() - 1))) {
            sb.append('-');
            sb.append(Character.toLowerCase(ch));
        }
    }

    private static void getSignature3(String word, int loc, StringBuilder sb) {
        sb.append('-');
        int lastClass = 45;
        int num = 0;
        for (int i = 0; i < word.length(); ++i) {
            char ch = word.charAt(i);
            int newClass = Character.isUpperCase(ch) || Character.isTitleCase(ch) ? (loc == 0 ? 83 : 76) : (Character.isLetter(ch) ? 108 : (Character.isDigit(ch) ? 100 : (ch == '-' ? 104 : (ch == '.' ? 112 : 115))));
            if (newClass != lastClass) {
                lastClass = newClass;
                sb.append((char)lastClass);
                num = 1;
                continue;
            }
            if (num < 2) {
                sb.append('+');
            }
            ++num;
        }
        if (word.length() > 3) {
            char ch = Character.toLowerCase(word.charAt(word.length() - 1));
            sb.append('-');
            sb.append(ch);
        }
    }

    private static void getSignature2(String word, int loc, StringBuilder sb) {
        boolean hasDigit = false;
        boolean hasNonDigit = false;
        boolean hasLower = false;
        int wlen = word.length();
        for (int i = 0; i < wlen; ++i) {
            char ch = word.charAt(i);
            if (Character.isDigit(ch)) {
                hasDigit = true;
                continue;
            }
            hasNonDigit = true;
            if (!Character.isLetter(ch) || !Character.isLowerCase(ch) && !Character.isTitleCase(ch)) continue;
            hasLower = true;
        }
        if (wlen > 0 && (Character.isUpperCase(word.charAt(0)) || Character.isTitleCase(word.charAt(0)))) {
            if (!hasLower) {
                sb.append("-ALLC");
            } else if (loc == 0) {
                sb.append("-INIT");
            } else {
                sb.append("-UC");
            }
        } else if (hasLower) {
            sb.append("-LC");
        }
        if (word.indexOf(45) >= 0) {
            sb.append("-DASH");
        }
        if (hasDigit) {
            if (!hasNonDigit) {
                sb.append("-NUM");
            } else {
                sb.append("-DIG");
            }
        } else if (wlen > 3) {
            char ch = word.charAt(word.length() - 1);
            sb.append(Character.toLowerCase(ch));
        }
    }

    private static void getSignature1(String word, int loc, StringBuilder sb) {
        sb.append('-');
        sb.append(word.substring(Math.max(word.length() - 2, 0), word.length()));
        sb.append('-');
        if (Character.isLowerCase(word.charAt(0))) {
            sb.append("LOWER");
        } else if (Character.isUpperCase(word.charAt(0))) {
            if (loc == 0) {
                sb.append("INIT");
            } else {
                sb.append("UPPER");
            }
        } else {
            sb.append("OTHER");
        }
    }

    private void getSignature8(String word, StringBuilder sb) {
        sb.append('-');
        boolean digit = true;
        for (int i = 0; i < word.length(); ++i) {
            char c = word.charAt(i);
            if (Character.isDigit(c) || c == '.' || c == ',' || i == 0 && (c == '-' || c == '+')) continue;
            digit = false;
        }
        if (digit) {
            sb.append("NUMBER");
        } else {
            String cluster;
            if (this.distSim == null) {
                this.distSim = new DistSimClassifier(this.wordClassesFile, false, true);
            }
            if ((cluster = this.distSim.distSimClass(word)) == null) {
                cluster = "NULL";
            }
            sb.append(cluster);
        }
    }
}

