package org.cogroo.analyzer;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import opennlp.tools.postag.POSTaggerME;
import opennlp.tools.util.Span;
import org.apache.log4j.Logger;
import org.cogroo.config.Analyzers;
import org.cogroo.text.Document;
import org.cogroo.text.Sentence;
import org.cogroo.text.Token;
import org.cogroo.tools.postag.GenderUtil;
import org.cogroo.util.EntityUtils;
import org.cogroo.util.TextUtils;

/* loaded from: input_file:org/cogroo/analyzer/POSTagger.class */
public class POSTagger implements Analyzer {
    private static final Logger LOGGER = Logger.getLogger(POSTagger.class);
    private POSTaggerME tagger;

    public POSTagger(POSTaggerME pOSTaggerME) {
        this.tagger = pOSTaggerME;
    }

    @Override // org.cogroo.analyzer.Analyzer
    public void analyze(Document document) {
        String[] tag;
        double[] probs;
        for (Sentence sentence : document.getSentences()) {
            List<Token> tokens = sentence.getTokens();
            String[][] additionalContext = TextUtils.additionalContext(tokens, Arrays.asList(Analyzers.CONTRACTION_FINDER, Analyzers.NAME_FINDER));
            String[] strArr = TextUtils.tokensToString(sentence.getTokens());
            synchronized (this.tagger) {
                tag = this.tagger.tag(strArr, additionalContext);
                probs = this.tagger.probs();
            }
            double computeFinalProb = computeFinalProb(probs);
            sentence.setTokensProb(computeFinalProb);
            if (LOGGER.isDebugEnabled()) {
                StringBuilder sb = new StringBuilder("Probabilidades do tagger:\n");
                for (int i = 0; i < strArr.length; i++) {
                    sb.append("[").append(strArr[i]).append("_").append(tag[i]).append(" ").append(probs[i]).append("] ");
                }
                LOGGER.debug(sb.toString());
                LOGGER.debug("Soma dos logs das probabilidades: " + computeFinalProb);
            }
            String[] removeGender = GenderUtil.removeGender(tag);
            for (int i2 = 0; i2 < removeGender.length; i2++) {
                tokens.get(i2).setPOSTag(removeGender[i2]);
                tokens.get(i2).setPOSTagProb(probs[i2]);
            }
            EntityUtils.groupTokens(sentence.getText(), tokens, createSpanList(toTokensArray(tokens), toTagsArray(tokens)));
            mergeHyphenedWords(sentence);
        }
    }

    private double computeFinalProb(double[] dArr) {
        double d = 0.0d;
        for (double d2 : dArr) {
            d += Math.log(d2);
        }
        if (dArr.length > 0) {
            d /= dArr.length;
        }
        return d;
    }

    private String[] toTokensArray(List<Token> list) {
        String[] strArr = new String[list.size()];
        for (int i = 0; i < list.size(); i++) {
            strArr[i] = list.get(i).getLexeme();
        }
        return strArr;
    }

    private String[] toTagsArray(List<Token> list) {
        String[] strArr = new String[list.size()];
        for (int i = 0; i < list.size(); i++) {
            strArr[i] = list.get(i).getPOSTag();
        }
        return strArr;
    }

    public static List<Span> createSpanList(String[] strArr, String[] strArr2) {
        ArrayList arrayList = new ArrayList(strArr.length);
        String str = "";
        int i = 0;
        boolean z = false;
        int length = strArr2.length;
        for (int i2 = 0; i2 < length; i2++) {
            String str2 = strArr2[i2];
            if (!strArr2[i2].startsWith("B-") && !strArr2[i2].startsWith("I-")) {
                str2 = "O";
            }
            if (str2.startsWith("B-") || !(str2.equals("I-" + str) || str2.equals("O"))) {
                if (z) {
                    arrayList.add(new Span(i, i2, str));
                }
                i = i2;
                str = str2.substring(2);
                z = true;
            } else if (!str2.equals("I-" + str) && z) {
                arrayList.add(new Span(i, i2, str));
                z = false;
                str = "";
            }
        }
        if (z) {
            arrayList.add(new Span(i, strArr2.length, str));
        }
        return arrayList;
    }

    private void mergeHyphenedWords(Sentence sentence) {
        List<Token> tokens = sentence.getTokens();
        boolean z = true;
        int i = 1;
        while (z) {
            z = false;
            for (int i2 = i; i2 < tokens.size() - 1 && !z; i2++) {
                if ("-".equals(tokens.get(i2).getLexeme()) && !hasCharacterBetween(tokens.get(i2 - 1), tokens.get(i2)) && !hasCharacterBetween(tokens.get(i2), tokens.get(i2 + 1))) {
                    Token token = tokens.get(i2 - 1);
                    Token token2 = tokens.get(i2 + 1);
                    if (token2.getPOSTag().startsWith("pron-")) {
                        token2.setBoundaries(token2.getStart() - 1, token2.getEnd());
                        token2.setLexeme("-" + token2.getLexeme());
                        tokens.remove(i2);
                        z = true;
                        i = i2 + 1;
                    } else {
                        String merge = merge(token.getPOSTag(), token2.getPOSTag());
                        if (merge != null) {
                            token2.setLexeme(token.getLexeme() + "-" + token2.getLexeme());
                            token2.setPOSTag(merge);
                            token2.setBoundaries(token.getStart(), token2.getEnd());
                            tokens.remove(i2);
                            tokens.remove(i2 - 1);
                            i = i2;
                            z = true;
                        }
                    }
                }
            }
        }
    }

    private String merge(String str, String str2) {
        if (isNoun(str) || isNoun(str2)) {
            return "n";
        }
        if (isNoun(str) && isAdjective(str2)) {
            return "n";
        }
        if (isVerb(str) && isNoun(str2)) {
            return "n";
        }
        if (isAdjective(str) && isAdjective(str2)) {
            return "n";
        }
        if ("prep".equals(str2) || "art".equals(str2)) {
            return str;
        }
        if ((isVerb(str) && "adv".equals(str2)) || isNoun(str2)) {
            return "n";
        }
        if (str.equals(str2)) {
            return str;
        }
        return null;
    }

    private boolean isVerb(String str) {
        return str.startsWith("v-");
    }

    private boolean isNoun(String str) {
        return "n".equals(str) || "n-adj".equals(str);
    }

    private boolean isAdjective(String str) {
        return "adj".equals(str) || "n-adj".equals(str);
    }

    private boolean hasCharacterBetween(Token token, Token token2) {
        return token.getEnd() != token2.getStart();
    }
}
