package org.cogroo.gc.cmdline.dictionary;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.util.Collections;
import java.util.HashSet;
import opennlp.tools.cmdline.BasicCmdLineTool;
import opennlp.tools.cmdline.CmdLineUtil;
import opennlp.tools.cmdline.TerminateToolException;
import opennlp.tools.postag.MyPOSDictionary;
import org.cogroo.formats.ad.ADFeaturizerSampleStream;
import org.cogroo.interpreters.FlorestaTagInterpreter;
import org.cogroo.interpreters.JspellTagInterpreter;
import org.cogroo.tools.featurizer.FeatureSample;

/* loaded from: input_file:org/cogroo/gc/cmdline/dictionary/POSDictionaryBuilderTool.class */
public class POSDictionaryBuilderTool extends BasicCmdLineTool {

    /* loaded from: input_file:org/cogroo/gc/cmdline/dictionary/POSDictionaryBuilderTool$Params.class */
    interface Params extends POSDictionaryBuilderParams {
    }

    public String getShortDescription() {
        return "builds a new POS Tag dictionary";
    }

    public String getHelp() {
        return getBasicHelp(Params.class);
    }

    public void run(String[] strArr) {
        Params params = (Params) validateAndParseParams(strArr, Params.class);
        File inputFile = params.getInputFile();
        File outputFile = params.getOutputFile();
        File corpus = params.getCorpus();
        Charset encoding = params.getEncoding();
        CmdLineUtil.checkInputFile("dictionary input file", inputFile);
        CmdLineUtil.checkOutputFile("dictionary output file", outputFile);
        CmdLineUtil.checkInputFile("corpus input file", corpus);
        InputStreamReader inputStreamReader = null;
        FileOutputStream fileOutputStream = null;
        try {
            try {
                ADFeaturizerSampleStream aDFeaturizerSampleStream = new ADFeaturizerSampleStream(new FileInputStream(corpus), "ISO-8859-1", false);
                HashSet hashSet = new HashSet();
                HashSet hashSet2 = new HashSet();
                for (FeatureSample read = aDFeaturizerSampleStream.read(); read != null; read = aDFeaturizerSampleStream.read()) {
                    Collections.addAll(hashSet, read.getFeatures());
                    Collections.addAll(hashSet2, read.getTags());
                }
                inputStreamReader = new InputStreamReader(new FileInputStream(inputFile), encoding);
                fileOutputStream = new FileOutputStream(outputFile);
                MyPOSDictionary.parseOneEntryPerLine(inputStreamReader, new JspellTagInterpreter(), new FlorestaTagInterpreter(), hashSet, hashSet2, params.getAllowInvalidFeats().booleanValue()).serialize(fileOutputStream);
                try {
                    inputStreamReader.close();
                    fileOutputStream.close();
                } catch (IOException e) {
                }
            } catch (IOException e2) {
                throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e2.getMessage());
            }
        } catch (Throwable th) {
            try {
                inputStreamReader.close();
                fileOutputStream.close();
            } catch (IOException e3) {
            }
            throw th;
        }
    }
}
