/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.process;

import edu.stanford.nlp.objectbank.ObjectBank;
import edu.stanford.nlp.trees.international.pennchinese.ChineseUtils;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.Timing;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class WordShapeClassifier {
    public static final int NOWORDSHAPE = -1;
    public static final int WORDSHAPEDAN1 = 0;
    public static final int WORDSHAPECHRIS1 = 1;
    public static final int WORDSHAPEDAN2 = 2;
    public static final int WORDSHAPEDAN2USELC = 3;
    public static final int WORDSHAPEDAN2BIO = 4;
    public static final int WORDSHAPEDAN2BIOUSELC = 5;
    public static final int WORDSHAPEJENNY1 = 6;
    public static final int WORDSHAPEJENNY1USELC = 7;
    public static final int WORDSHAPECHRIS2 = 8;
    public static final int WORDSHAPECHRIS2USELC = 9;
    public static final int WORDSHAPECHRIS3 = 10;
    public static final int WORDSHAPECHRIS3USELC = 11;
    public static final int WORDSHAPECHRIS4 = 12;
    public static final int WORDSHAPEDIGITS = 13;
    public static final int WORDSHAPECHINESE = 14;
    public static final int WORDSHAPECLUSTER1 = 15;
    private static final int BOUNDARY_SIZE = 2;
    private static final String[] greek = new String[]{"alpha", "beta", "gamma", "delta", "epsilon", "zeta", "theta", "iota", "kappa", "lambda", "omicron", "rho", "sigma", "tau", "upsilon", "omega"};
    private static final Pattern biogreek = Pattern.compile("alpha|beta|gamma|delta|epsilon|zeta|theta|iota|kappa|lambda|omicron|rho|sigma|tau|upsilon|omega", 2);

    private WordShapeClassifier() {
    }

    public static int lookupShaper(String name) {
        if (name == null) {
            return -1;
        }
        if (name.equalsIgnoreCase("dan1")) {
            return 0;
        }
        if (name.equalsIgnoreCase("chris1")) {
            return 1;
        }
        if (name.equalsIgnoreCase("dan2")) {
            return 2;
        }
        if (name.equalsIgnoreCase("dan2useLC")) {
            return 3;
        }
        if (name.equalsIgnoreCase("dan2bio")) {
            return 4;
        }
        if (name.equalsIgnoreCase("dan2bioUseLC")) {
            return 5;
        }
        if (name.equalsIgnoreCase("jenny1")) {
            return 6;
        }
        if (name.equalsIgnoreCase("jenny1useLC")) {
            return 7;
        }
        if (name.equalsIgnoreCase("chris2")) {
            return 8;
        }
        if (name.equalsIgnoreCase("chris2useLC")) {
            return 9;
        }
        if (name.equalsIgnoreCase("chris3")) {
            return 10;
        }
        if (name.equalsIgnoreCase("chris3useLC")) {
            return 11;
        }
        if (name.equalsIgnoreCase("chris4")) {
            return 12;
        }
        if (name.equalsIgnoreCase("digits")) {
            return 13;
        }
        if (name.equalsIgnoreCase("chinese")) {
            return 14;
        }
        if (name.equalsIgnoreCase("cluster1")) {
            return 15;
        }
        return -1;
    }

    private static boolean dontUseLC(int shape) {
        return shape == 2 || shape == 4 || shape == 6 || shape == 8 || shape == 10;
    }

    public static String wordShape(String inStr, int wordShaper) {
        return WordShapeClassifier.wordShape(inStr, wordShaper, null);
    }

    public static String wordShape(String inStr, int wordShaper, Collection<String> knownLCWords) {
        if (knownLCWords != null && WordShapeClassifier.dontUseLC(wordShaper)) {
            knownLCWords = null;
        }
        switch (wordShaper) {
            case -1: {
                return inStr;
            }
            case 0: {
                return WordShapeClassifier.wordShapeDan1(inStr);
            }
            case 1: {
                return WordShapeClassifier.wordShapeChris1(inStr);
            }
            case 2: {
                return WordShapeClassifier.wordShapeDan2(inStr, knownLCWords);
            }
            case 3: {
                return WordShapeClassifier.wordShapeDan2(inStr, knownLCWords);
            }
            case 4: {
                return WordShapeClassifier.wordShapeDan2Bio(inStr, knownLCWords);
            }
            case 5: {
                return WordShapeClassifier.wordShapeDan2Bio(inStr, knownLCWords);
            }
            case 6: {
                return WordShapeClassifier.wordShapeJenny1(inStr, knownLCWords);
            }
            case 7: {
                return WordShapeClassifier.wordShapeJenny1(inStr, knownLCWords);
            }
            case 8: {
                return WordShapeClassifier.wordShapeChris2(inStr, false, knownLCWords);
            }
            case 9: {
                return WordShapeClassifier.wordShapeChris2(inStr, false, knownLCWords);
            }
            case 10: {
                return WordShapeClassifier.wordShapeChris2(inStr, true, knownLCWords);
            }
            case 11: {
                return WordShapeClassifier.wordShapeChris2(inStr, true, knownLCWords);
            }
            case 12: {
                return WordShapeClassifier.wordShapeChris4(inStr, false, knownLCWords);
            }
            case 13: {
                return WordShapeClassifier.wordShapeDigits(inStr);
            }
            case 14: {
                return WordShapeClassifier.wordShapeChinese(inStr);
            }
            case 15: {
                return WordShapeClassifier.wordShapeCluster1(inStr);
            }
        }
        throw new IllegalStateException("Bad WordShapeClassifier");
    }

    private static String wordShapeDan1(String s) {
        boolean digit = true;
        boolean upper = true;
        boolean lower = true;
        boolean mixed = true;
        for (int i = 0; i < s.length(); ++i) {
            char c = s.charAt(i);
            if (!Character.isDigit(c)) {
                digit = false;
            }
            if (!Character.isLowerCase(c)) {
                lower = false;
            }
            if (!Character.isUpperCase(c)) {
                upper = false;
            }
            if ((i != 0 || Character.isUpperCase(c)) && (i < 1 || Character.isLowerCase(c))) continue;
            mixed = false;
        }
        if (digit) {
            return "ALL-DIGITS";
        }
        if (upper) {
            return "ALL-UPPER";
        }
        if (lower) {
            return "ALL-LOWER";
        }
        if (mixed) {
            return "MIXED-CASE";
        }
        return "OTHER";
    }

    private static String wordShapeDan2(String s, Collection<String> knownLCWords) {
        StringBuilder sb = new StringBuilder("WT-");
        char lastM = '~';
        boolean nonLetters = false;
        int len = s.length();
        for (int i = 0; i < len; ++i) {
            char c;
            char m = c = s.charAt(i);
            if (Character.isDigit(c)) {
                m = 'd';
            } else if (Character.isLowerCase(c) || c == '_') {
                m = 'x';
            } else if (Character.isUpperCase(c)) {
                m = 'X';
            }
            if (m != 'x' && m != 'X') {
                nonLetters = true;
            }
            if (m != lastM) {
                sb.append(m);
            }
            lastM = m;
        }
        if (len <= 3) {
            sb.append(':').append(len);
        }
        if (knownLCWords != null && !nonLetters && knownLCWords.contains(s.toLowerCase())) {
            sb.append('k');
        }
        return sb.toString();
    }

    private static String wordShapeJenny1(String s, Collection<String> knownLCWords) {
        StringBuilder sb = new StringBuilder("WT-");
        int lastM = 126;
        boolean nonLetters = false;
        for (int i = 0; i < s.length(); ++i) {
            char c = s.charAt(i);
            int m = c;
            if (Character.isDigit(c)) {
                m = 100;
            } else if (Character.isLowerCase(c)) {
                m = 120;
            } else if (Character.isUpperCase(c)) {
                m = 88;
            }
            for (String gr : greek) {
                if (!s.startsWith(gr, i)) continue;
                m = 103;
                i = i + gr.length() - 1;
                break;
            }
            if (m != 120 && m != 88) {
                nonLetters = true;
            }
            if (m != lastM) {
                sb.append((char)m);
            }
            lastM = m;
        }
        if (s.length() <= 3) {
            sb.append(':').append(s.length());
        }
        if (knownLCWords != null && !nonLetters && knownLCWords.contains(s.toLowerCase())) {
            sb.append('k');
        }
        return sb.toString();
    }

    private static String wordShapeChris2(String s, boolean omitIfInBoundary, Collection<String> knownLCWords) {
        int len = s.length();
        if (len <= 4) {
            return WordShapeClassifier.wordShapeChris2Short(s, len, knownLCWords);
        }
        return WordShapeClassifier.wordShapeChris2Long(s, omitIfInBoundary, len, knownLCWords);
    }

    private static String wordShapeChris2Short(String s, int len, Collection<String> knownLCWords) {
        int sbLen = knownLCWords != null ? len + 1 : len;
        StringBuilder sb = new StringBuilder(sbLen);
        boolean nonLetters = false;
        for (int i = 0; i < len; ++i) {
            char c = s.charAt(i);
            int m = c;
            if (Character.isDigit(c)) {
                m = 100;
            } else if (Character.isLowerCase(c)) {
                m = 120;
            } else if (Character.isUpperCase(c) || Character.isTitleCase(c)) {
                m = 88;
            }
            for (String gr : greek) {
                if (!s.startsWith(gr, i)) continue;
                m = 103;
                i += gr.length() - 1;
                break;
            }
            if (m != 120 && m != 88) {
                nonLetters = true;
            }
            sb.append((char)m);
        }
        if (knownLCWords != null && !nonLetters && knownLCWords.contains(s.toLowerCase())) {
            sb.append('k');
        }
        return sb.toString();
    }

    private static String wordShapeChris2Long(String s, boolean omitIfInBoundary, int len, Collection<String> knownLCWords) {
        char[] beginChars = new char[2];
        char[] endChars = new char[2];
        int beginUpto = 0;
        int endUpto = 0;
        TreeSet<Character> seenSet = new TreeSet<Character>();
        boolean nonLetters = false;
        for (int i = 0; i < len; ++i) {
            char c;
            int iIncr = 0;
            char m = c = s.charAt(i);
            if (Character.isDigit(c)) {
                m = 'd';
            } else if (Character.isLowerCase(c)) {
                m = 'x';
            } else if (Character.isUpperCase(c) || Character.isTitleCase(c)) {
                m = 'X';
            }
            for (String gr : greek) {
                if (!s.startsWith(gr, i)) continue;
                m = 'g';
                iIncr = gr.length() - 1;
                break;
            }
            if (m != 'x' && m != 'X') {
                nonLetters = true;
            }
            if (i < 2) {
                beginChars[beginUpto++] = m;
            } else if (i < len - 2) {
                seenSet.add(Character.valueOf(m));
            } else {
                endChars[endUpto++] = m;
            }
            i += iIncr;
        }
        int sbSize = beginUpto + endUpto + seenSet.size();
        if (knownLCWords != null) {
            ++sbSize;
        }
        StringBuilder sb = new StringBuilder(sbSize);
        sb.append(beginChars, 0, beginUpto);
        if (omitIfInBoundary) {
            for (Character chr : seenSet) {
                int i;
                char ch = chr.charValue();
                boolean insert = true;
                for (i = 0; i < beginUpto; ++i) {
                    if (beginChars[i] != ch) continue;
                    insert = false;
                    break;
                }
                for (i = 0; i < endUpto; ++i) {
                    if (endChars[i] != ch) continue;
                    insert = false;
                    break;
                }
                if (!insert) continue;
                sb.append(ch);
            }
        } else {
            for (Character chr : seenSet) {
                sb.append(chr.charValue());
            }
        }
        sb.append(endChars, 0, endUpto);
        if (knownLCWords != null && !nonLetters && knownLCWords.contains(s.toLowerCase())) {
            sb.append('k');
        }
        return sb.toString();
    }

    private static char chris4equivalenceClass(char c) {
        int type = Character.getType(c);
        if (Character.isDigit(c) || type == 10 || type == 11 || "\u4e00\u4e8c\u4e09\u56db\u4e94\u516d\u4e03\u516b\u4e5d\u5341\u96f6\u3007\u767e\u5343\u4e07\u4ebf\u5169\u25cb\u25ef".indexOf(c) > 0) {
            return 'd';
        }
        if (c == '\u7b2c') {
            return 'o';
        }
        if (c == '\u5e74' || c == '\u6708' || c == '\u65e5') {
            return 'D';
        }
        if (Character.isLowerCase(c)) {
            return 'x';
        }
        if (Character.isUpperCase(c) || Character.isTitleCase(c)) {
            return 'X';
        }
        if (Character.isWhitespace(c) || Character.isSpaceChar(c)) {
            return 's';
        }
        if (type == 5) {
            return 'c';
        }
        if (type == 26) {
            return '$';
        }
        if (type == 25) {
            return '+';
        }
        if (type == 28 || c == '|') {
            return '|';
        }
        if (type == 21) {
            return '(';
        }
        if (type == 22) {
            return ')';
        }
        if (type == 29) {
            return '`';
        }
        if (type == 30 || c == '\'') {
            return '\'';
        }
        if (c == '%') {
            return '%';
        }
        if (type == 24) {
            return '.';
        }
        if (type == 23) {
            return '_';
        }
        if (type == 20) {
            return '-';
        }
        return 'q';
    }

    public static String wordShapeChris4(String s) {
        return WordShapeClassifier.wordShapeChris4(s, false, null);
    }

    private static String wordShapeChris4(String s, boolean omitIfInBoundary, Collection<String> knownLCWords) {
        int len = s.length();
        if (len <= 4) {
            return WordShapeClassifier.wordShapeChris4Short(s, len, knownLCWords);
        }
        return WordShapeClassifier.wordShapeChris4Long(s, omitIfInBoundary, len, knownLCWords);
    }

    private static String wordShapeChris4Short(String s, int len, Collection<String> knownLCWords) {
        int sbLen = knownLCWords != null ? len + 1 : len;
        StringBuilder sb = new StringBuilder(sbLen);
        boolean nonLetters = false;
        for (int i = 0; i < len; ++i) {
            char c = s.charAt(i);
            int m = WordShapeClassifier.chris4equivalenceClass(c);
            for (String gr : greek) {
                if (!s.startsWith(gr, i)) continue;
                m = 103;
                i += gr.length() - 1;
                break;
            }
            if (m != 120 && m != 88) {
                nonLetters = true;
            }
            sb.append((char)m);
        }
        if (knownLCWords != null && !nonLetters && knownLCWords.contains(s.toLowerCase())) {
            sb.append('k');
        }
        return sb.toString();
    }

    private static String wordShapeChris4Long(String s, boolean omitIfInBoundary, int len, Collection<String> knownLCWords) {
        StringBuilder sb = new StringBuilder(s.length() + 1);
        StringBuilder endSB = new StringBuilder(2);
        Set<Character> boundSet = Generics.newHashSet(4);
        TreeSet<Character> seenSet = new TreeSet<Character>();
        boolean nonLetters = false;
        for (int i = 0; i < len; ++i) {
            char c = s.charAt(i);
            char m = WordShapeClassifier.chris4equivalenceClass(c);
            int iIncr = 0;
            for (String gr : greek) {
                if (!s.startsWith(gr, i)) continue;
                m = 'g';
                iIncr = gr.length() - 1;
                break;
            }
            if (m != 'x' && m != 'X') {
                nonLetters = true;
            }
            if (i < 2) {
                sb.append(m);
                boundSet.add(Character.valueOf(m));
            } else if (i < len - 2) {
                seenSet.add(Character.valueOf(m));
            } else {
                boundSet.add(Character.valueOf(m));
                endSB.append(m);
            }
            i += iIncr;
        }
        for (Character chr : seenSet) {
            if (omitIfInBoundary && boundSet.contains(chr)) continue;
            char ch = chr.charValue();
            sb.append(ch);
        }
        sb.append((CharSequence)endSB);
        if (knownLCWords != null && !nonLetters && knownLCWords.contains(s.toLowerCase())) {
            sb.append('k');
        }
        return sb.toString();
    }

    private static String wordShapeDan2Bio(String s, Collection<String> knownLCWords) {
        if (WordShapeClassifier.containsGreekLetter(s)) {
            return WordShapeClassifier.wordShapeDan2(s, knownLCWords) + "-GREEK";
        }
        return WordShapeClassifier.wordShapeDan2(s, knownLCWords);
    }

    private static boolean containsGreekLetter(String s) {
        Matcher m = biogreek.matcher(s);
        return m.find();
    }

    private static String wordShapeChris1(String s) {
        int length = s.length();
        if (length == 0) {
            return "SYMBOL";
        }
        boolean cardinal = false;
        boolean number = true;
        boolean seenDigit = false;
        boolean seenNonDigit = false;
        for (int i = 0; i < length; ++i) {
            char ch = s.charAt(i);
            boolean digit = Character.isDigit(ch);
            if (digit) {
                seenDigit = true;
            } else {
                seenNonDigit = true;
            }
            boolean bl = digit = digit || ch == '.' || ch == ',' || i == 0 && (ch == '-' || ch == '+');
            if (digit) continue;
            number = false;
        }
        if (!seenDigit) {
            number = false;
        } else if (!seenNonDigit) {
            cardinal = true;
        }
        if (cardinal) {
            if (length < 4) {
                return "CARDINAL13";
            }
            if (length == 4) {
                return "CARDINAL4";
            }
            return "CARDINAL5PLUS";
        }
        if (number) {
            return "NUMBER";
        }
        boolean seenLower = false;
        boolean seenUpper = false;
        boolean allCaps = true;
        boolean allLower = true;
        boolean initCap = false;
        boolean dash = false;
        boolean period = false;
        for (int i = 0; i < length; ++i) {
            char ch = s.charAt(i);
            boolean up = Character.isUpperCase(ch);
            boolean let = Character.isLetter(ch);
            boolean tit = Character.isTitleCase(ch);
            if (ch == '-') {
                dash = true;
            } else if (ch == '.') {
                period = true;
            }
            if (tit) {
                seenUpper = true;
                allLower = false;
                seenLower = true;
                allCaps = false;
            } else if (up) {
                seenUpper = true;
                allLower = false;
            } else if (let) {
                seenLower = true;
                allCaps = false;
            }
            if (i != 0 || !up && !tit) continue;
            initCap = true;
        }
        if (length == 2 && initCap && period) {
            return "ACRONYM1";
        }
        if (seenUpper && allCaps && !seenDigit && period) {
            return "ACRONYM";
        }
        if (seenDigit && dash && !seenUpper && !seenLower) {
            return "DIGIT-DASH";
        }
        if (initCap && seenLower && seenDigit && dash) {
            return "CAPITALIZED-DIGIT-DASH";
        }
        if (initCap && seenLower && seenDigit) {
            return "CAPITALIZED-DIGIT";
        }
        if (initCap && seenLower && dash) {
            return "CAPITALIZED-DASH";
        }
        if (initCap && seenLower) {
            return "CAPITALIZED";
        }
        if (seenUpper && allCaps && seenDigit && dash) {
            return "ALLCAPS-DIGIT-DASH";
        }
        if (seenUpper && allCaps && seenDigit) {
            return "ALLCAPS-DIGIT";
        }
        if (seenUpper && allCaps && dash) {
            return "ALLCAPS";
        }
        if (seenUpper && allCaps) {
            return "ALLCAPS";
        }
        if (seenLower && allLower && seenDigit && dash) {
            return "LOWERCASE-DIGIT-DASH";
        }
        if (seenLower && allLower && seenDigit) {
            return "LOWERCASE-DIGIT";
        }
        if (seenLower && allLower && dash) {
            return "LOWERCASE-DASH";
        }
        if (seenLower && allLower) {
            return "LOWERCASE";
        }
        if (seenLower && seenDigit) {
            return "MIXEDCASE-DIGIT";
        }
        if (seenLower) {
            return "MIXEDCASE";
        }
        if (seenDigit) {
            return "SYMBOL-DIGIT";
        }
        return "SYMBOL";
    }

    private static String wordShapeDigits(String s) {
        char[] outChars = null;
        for (int i = 0; i < s.length(); ++i) {
            char c = s.charAt(i);
            if (!Character.isDigit(c)) continue;
            if (outChars == null) {
                outChars = s.toCharArray();
            }
            outChars[i] = 57;
        }
        if (outChars == null) {
            return s;
        }
        return new String(outChars);
    }

    private static String wordShapeCluster1(String s) {
        boolean digit = true;
        for (int i = 0; i < s.length(); ++i) {
            char c = s.charAt(i);
            if (Character.isDigit(c) || c == '.' || c == ',' || i == 0 && (c == '-' || c == '+')) continue;
            digit = false;
        }
        if (digit) {
            return "NUMBER";
        }
        String cluster = DistributionalClusters.cluster1.get(s);
        if (cluster == null) {
            cluster = "NULL";
        }
        return cluster;
    }

    private static String wordShapeChinese(String s) {
        return ChineseUtils.shapeOf(s, true, true);
    }

    public static void main(String[] args) {
        int i = 0;
        int classifierToUse = 1;
        if (args.length == 0) {
            System.out.println("edu.stanford.nlp.process.WordShapeClassifier [-wordShape name] string+");
        } else if (args[0].charAt(0) == '-') {
            if (args[0].equals("-wordShape") && args.length >= 2) {
                classifierToUse = WordShapeClassifier.lookupShaper(args[1]);
                i += 2;
            } else {
                System.err.println("Unknown flag: " + args[0]);
                ++i;
            }
        }
        while (i < args.length) {
            System.out.print(args[i] + ": ");
            System.out.println(WordShapeClassifier.wordShape(args[i], classifierToUse));
            ++i;
        }
    }

    private static class DistributionalClusters {
        public static Map<String, String> cluster1 = DistributionalClusters.loadWordClusters("/u/nlp/data/pos_tags_are_useless/egw.bnc.200", "alexClark");

        private DistributionalClusters() {
        }

        public static Map<String, String> loadWordClusters(String file, String format) {
            Timing.startDoing("Loading distsim lexicon from " + file);
            LcMap<String, String> lexicon = new LcMap<String, String>();
            if ("terryKoo".equals(format)) {
                for (String line : ObjectBank.getLineIterator(file)) {
                    String[] bits = line.split("\\t");
                    String word = bits[1];
                    word = word.toLowerCase();
                    String wordClass = bits[0];
                    lexicon.put(word, wordClass);
                }
            } else {
                for (String line : ObjectBank.getLineIterator(file)) {
                    String[] bits = line.split("\\s+");
                    String word = bits[0];
                    word = word.toLowerCase();
                    lexicon.put(word, bits[1]);
                }
            }
            Timing.endDoing();
            return lexicon;
        }

        private static class LcMap<K, V>
        extends HashMap<K, V> {
            private static final long serialVersionUID = -457913281600751901L;

            private LcMap() {
            }

            @Override
            public V get(Object key) {
                return super.get(key.toString().toLowerCase());
            }
        }
    }
}

