/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.process.stattok;

import edu.stanford.nlp.classify.ColumnDataClassifier;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.io.RuntimeIOException;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.Datum;
import edu.stanford.nlp.process.CoreLabelTokenFactory;
import edu.stanford.nlp.util.IntPair;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.RuntimeClassNotFoundException;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.ObjectInputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;

public class StatTokSent {
    ColumnDataClassifier cdc;
    CoreLabelTokenFactory factory = new CoreLabelTokenFactory();
    Map<String, String[]> multiWordRules = new HashMap<String, String[]>();
    int windowSize = 0;
    private static final Redwood.RedwoodChannels logger = Redwood.channels(StatTokSent.class);
    public static final String SENTINEL = "\u00a7";

    public StatTokSent(String modelFile, String multiWordRulesFile) {
        logger.info("Loading StatTokSent model from " + modelFile);
        if (multiWordRulesFile == null) {
            logger.info("Using default multi word rules");
        } else {
            logger.info("Using multi word rules from " + multiWordRulesFile);
            try {
                this.multiWordRules = this.readMultiWordRules(multiWordRulesFile);
            }
            catch (IOException e) {
                throw new RuntimeIOException(e);
            }
        }
        try {
            ObjectInputStream ois = IOUtils.readStreamFromString(modelFile);
            this.cdc = ColumnDataClassifier.getClassifier(ois);
            this.windowSize = ois.readInt();
        }
        catch (IOException e) {
            throw new RuntimeIOException(e);
        }
        catch (ClassNotFoundException e) {
            throw new RuntimeClassNotFoundException(e);
        }
        logger.info("Found window size of " + this.windowSize);
    }

    public StatTokSent(String modelFile) {
        this(modelFile, null);
    }

    private Map<String, String[]> readMultiWordRules(String multiWordRulesFile) throws IOException {
        String line;
        HashMap<String, String[]> multiWordRules = new HashMap<String, String[]>();
        InputStream is = IOUtils.getInputStreamFromURLOrClasspathOrFileSystem(multiWordRulesFile);
        BufferedReader reader = new BufferedReader(new InputStreamReader(is));
        while ((line = reader.readLine()) != null) {
            String[] parts = line.split("\t");
            String token = parts[0];
            String[] tokenComponents = parts[1].split(",");
            multiWordRules.put(token, tokenComponents);
        }
        return multiWordRules;
    }

    private ArrayList<Pair<String, String>> classify(List<String> featurizedText) {
        ArrayList<Pair<String, String>> classificationResults = new ArrayList<Pair<String, String>>();
        for (String line : featurizedText) {
            String character = line.split("\t")[1];
            Datum<String, String> d = this.cdc.makeDatumFromLine(line);
            String result = this.cdc.classOf(d);
            Pair<String, String> charAndResult = new Pair<String, String>(character, result);
            classificationResults.add(charAndResult);
        }
        return classificationResults;
    }

    private List<String> textToFeatures(String text, int windowSize) {
        ArrayList<String> featurizedText = new ArrayList<String>();
        String[] splitted = text.split("");
        List<String> splittedText = Arrays.asList(splitted);
        String[] window = new String[windowSize * 2 + 1];
        String toWrite = "";
        for (int i = 0; i < splittedText.size(); ++i) {
            String currentCharacter = splittedText.get(i);
            Boolean isUpperCase = Character.isUpperCase(currentCharacter.charAt(0));
            toWrite = "";
            for (int j = -windowSize; j <= windowSize; ++j) {
                try {
                    window[j + windowSize] = splittedText.get(i + j);
                    continue;
                }
                catch (ArrayIndexOutOfBoundsException e) {
                    window[j + windowSize] = SENTINEL;
                }
            }
            int index = 0;
            for (String character : window) {
                if (index == windowSize) {
                    ++index;
                    continue;
                }
                toWrite = toWrite + character + "\t";
                ++index;
            }
            toWrite = "?\t" + currentCharacter + "\t" + toWrite + Integer.toString(isUpperCase != false ? 1 : 0);
            featurizedText.add(toWrite);
        }
        return featurizedText;
    }

    private Boolean tokenToSplit(CoreLabel token, Map<String, String[]> multiWordRules) {
        if (multiWordRules.get(token.word()) != null) {
            return true;
        }
        return false;
    }

    private ArrayList<Pair<CoreLabel, String>> splitToken(Pair<CoreLabel, String> tokenAndClass, Map<String, String[]> multiWordRules) {
        ArrayList<Pair<CoreLabel, String>> splittedTokenAndClass = new ArrayList<Pair<CoreLabel, String>>();
        CoreLabel token = tokenAndClass.first();
        String originalClass = tokenAndClass.second();
        int tokenBeginPosition = token.beginPosition();
        int tokenEndPosition = token.endPosition();
        String[] splitted = multiWordRules.get(token.word());
        ArrayList<String> splittedList = new ArrayList<String>();
        for (String word : splitted) {
            splittedList.add(word);
        }
        ListIterator backwardsPartsIterator = null;
        backwardsPartsIterator = splittedList.listIterator(splittedList.size());
        while (backwardsPartsIterator.hasPrevious()) {
            int partLength;
            Pair<Object, Object> partTokenAndClass = new Pair();
            CoreLabel partToken = new CoreLabel();
            String part = (String)backwardsPartsIterator.previous();
            if (backwardsPartsIterator.hasPrevious()) {
                partLength = part.length();
                partToken = this.factory.makeToken(part, token.originalText(), tokenEndPosition - partLength, partLength + 1);
                tokenEndPosition -= partLength;
                partTokenAndClass = new Pair<CoreLabel, String>(partToken, "C");
            } else {
                partLength = part.length();
                partToken = this.factory.makeToken(part, token.originalText(), tokenBeginPosition, partLength + 1);
                partTokenAndClass = new Pair<CoreLabel, String>(partToken, originalClass);
            }
            splittedTokenAndClass.add(0, partTokenAndClass);
        }
        return splittedTokenAndClass;
    }

    private List<CoreLabel> makeSentenceTokens(ArrayList<Pair<CoreLabel, String>> tokensAndClasses) {
        ArrayList<Pair<CoreLabel, String>> multiTokensAndClasses = new ArrayList<Pair<CoreLabel, String>>();
        int index = 1;
        for (Pair<CoreLabel, String> tokenAndClass : tokensAndClasses) {
            CoreLabel token = tokenAndClass.first();
            if (this.tokenToSplit(token, this.multiWordRules).booleanValue()) {
                ArrayList<Pair<CoreLabel, String>> multiTokenAndClass = this.splitToken(tokenAndClass, this.multiWordRules);
                for (Pair<CoreLabel, String> partTokenAndClass : multiTokenAndClass) {
                    partTokenAndClass.first().setIndex(index);
                    ++index;
                    multiTokensAndClasses.add(partTokenAndClass);
                }
                continue;
            }
            tokenAndClass.first().setIndex(index);
            ++index;
            multiTokensAndClasses.add(tokenAndClass);
        }
        ArrayList<CoreLabel> sentenceTokens = new ArrayList<CoreLabel>();
        ListIterator backwardsMultiTokensAndClasses = multiTokensAndClasses.listIterator(multiTokensAndClasses.size());
        int spanLenght = 0;
        String origText = "";
        boolean changeOrigText = false;
        while (backwardsMultiTokensAndClasses.hasPrevious()) {
            Pair tokenAndClass = (Pair)backwardsMultiTokensAndClasses.previous();
            CoreLabel coreLabel = (CoreLabel)tokenAndClass.first();
            String beginClass = (String)tokenAndClass.second();
            if (beginClass.equals("C")) {
                ++spanLenght;
                if (coreLabel.word() != coreLabel.originalText()) continue;
                origText = coreLabel.word() + origText;
                changeOrigText = true;
                continue;
            }
            if (spanLenght <= 0) continue;
            int spanBegin = coreLabel.index();
            int spanEnd = coreLabel.index() + spanLenght;
            IntPair tokenSpan = new IntPair(spanBegin, spanEnd);
            origText = coreLabel.word() + origText;
            for (int i = 0; i <= spanLenght; ++i) {
                if (changeOrigText) {
                    ((CoreLabel)((Pair)multiTokensAndClasses.get(coreLabel.index() - 1 + i)).first()).set(CoreAnnotations.OriginalTextAnnotation.class, origText);
                }
                ((CoreLabel)((Pair)multiTokensAndClasses.get(coreLabel.index() - 1 + i)).first()).set(CoreAnnotations.CoNLLUTokenSpanAnnotation.class, tokenSpan);
            }
            origText = "";
            changeOrigText = false;
            spanLenght = 0;
        }
        for (Pair pair : multiTokensAndClasses) {
            CoreLabel token = (CoreLabel)pair.first();
            String beginClass = (String)pair.second();
            sentenceTokens.add(token);
        }
        return sentenceTokens;
    }

    public List<List<CoreLabel>> tokenize(String text) {
        List<String> featurizedText = this.textToFeatures(text, this.windowSize);
        ArrayList<Pair<String, String>> classificationResults = this.classify(featurizedText);
        List<Object> sentenceTokens = new ArrayList();
        ArrayList<List<CoreLabel>> ret = new ArrayList<List<CoreLabel>>();
        int i = 0;
        int beginToken = 0;
        int endToken = 0;
        String currentWord = "";
        String lastBeginChar = "";
        int tokensCounter = 0;
        ArrayList<Pair<CoreLabel, String>> sentenceTokensBase = new ArrayList<Pair<CoreLabel, String>>();
        while (i < classificationResults.size()) {
            Pair<CoreLabel, String> tokenAndClass;
            CoreLabel newToken;
            String currentChar = classificationResults.get(i).first();
            String currentClass = classificationResults.get(i).second();
            if (currentChar.equals(SENTINEL) && currentClass.equals("I")) {
                currentClass = "O";
            }
            if (currentClass.equals("S")) {
                lastBeginChar = currentClass;
                if (i == 0) {
                    currentWord = currentWord + currentChar;
                    ++i;
                    continue;
                }
                if (currentWord != "") {
                    endToken = i - 1;
                    newToken = this.factory.makeToken(currentWord, currentWord, beginToken, endToken - beginToken + 1);
                    tokenAndClass = new Pair<CoreLabel, String>(newToken, lastBeginChar);
                    sentenceTokensBase.add(tokenAndClass);
                    ++tokensCounter;
                }
                sentenceTokens = this.makeSentenceTokens(sentenceTokensBase);
                ret.add(sentenceTokens);
                sentenceTokensBase = new ArrayList();
                currentWord = "";
                beginToken = i;
                if (!currentChar.equals(SENTINEL)) {
                    currentWord = currentWord + currentChar;
                }
            }
            if (currentClass.equals("T") || currentClass.equals("C")) {
                if (currentWord != "") {
                    endToken = i - 1;
                    newToken = this.factory.makeToken(currentWord, currentWord, beginToken, endToken - beginToken + 1);
                    tokenAndClass = new Pair<CoreLabel, String>(newToken, lastBeginChar);
                    sentenceTokensBase.add(tokenAndClass);
                    ++tokensCounter;
                }
                beginToken = i;
                endToken = i;
                currentWord = "";
                if (!currentChar.equals(SENTINEL)) {
                    currentWord = currentWord + currentChar;
                }
                lastBeginChar = currentClass;
            }
            if (currentClass.equals("I")) {
                currentWord = currentWord + currentChar;
            }
            if (currentClass.equals("O")) {
                endToken = i - 1;
                newToken = this.factory.makeToken(currentWord, currentWord, beginToken, endToken - beginToken + 1);
                tokenAndClass = new Pair<CoreLabel, String>(newToken, lastBeginChar);
                sentenceTokensBase.add(tokenAndClass);
                ++tokensCounter;
                currentWord = "";
            }
            if (i == classificationResults.size() - 1) {
                endToken = i - 1;
                newToken = this.factory.makeToken(currentWord, currentWord, beginToken, endToken - beginToken + 1);
                tokenAndClass = new Pair<CoreLabel, String>(newToken, lastBeginChar);
                sentenceTokensBase.add(tokenAndClass);
                ++tokensCounter;
                sentenceTokens = this.makeSentenceTokens(sentenceTokensBase);
                ret.add(sentenceTokens);
            }
            ++i;
        }
        return ret;
    }

    public static void main(String[] args) throws Exception {
        String modelFile;
        Map<String, String[]> arguments = StringUtils.argsToMap(args);
        String textFile = null;
        int windowSize = 0;
        String multiWordRulesFile = null;
        try {
            textFile = arguments.get("-textFile")[0];
        }
        catch (NullPointerException ex) {
            System.out.println("You have not specified a text file.\nUse -textFile option.");
            ex.printStackTrace();
        }
        try {
            modelFile = arguments.get("-model")[0];
        }
        catch (NullPointerException ex) {
            System.out.println("You have not specified a model.\nUse -model option.");
            throw ex;
        }
        try {
            windowSize = Integer.parseInt(arguments.get("-windowSize")[0]);
        }
        catch (NullPointerException ex) {
            System.out.println("You have not specified a window size.\nUse -windowSize option.");
            ex.printStackTrace();
        }
        String text = "";
        try {
            String line;
            BufferedReader reader = new BufferedReader(new FileReader(textFile));
            while ((line = reader.readLine()) != null) {
                text = text + line;
            }
        }
        catch (Exception e) {
            e.printStackTrace();
        }
        try {
            multiWordRulesFile = arguments.get("-multiWordRules")[0];
        }
        catch (NullPointerException ex) {
            System.out.println("No multiWordRules file specified.");
        }
        StatTokSent tokenizer = null;
        tokenizer = multiWordRulesFile != null ? new StatTokSent(modelFile, multiWordRulesFile) : new StatTokSent(modelFile);
        List<List<CoreLabel>> sentences = tokenizer.tokenize(text);
        for (List<CoreLabel> sentence : sentences) {
            for (CoreLabel token : sentence) {
                System.out.println(token);
            }
            System.out.println("");
        }
    }
}

