package morfologik.tools;

import com.carrotsearch.hppc.IntIntOpenHashMap;
import com.carrotsearch.hppc.cursors.IntIntCursor;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.Locale;
import java.util.Map;
import java.util.TreeMap;
import morfologik.fsa.CFSA2Serializer;
import morfologik.fsa.FSA;
import morfologik.fsa.FSA5Serializer;
import morfologik.fsa.FSABuilder;
import morfologik.fsa.FSAFlags;
import morfologik.fsa.FSAInfo;
import morfologik.fsa.FSASerializer;
import morfologik.fsa.FSAUtils;
import morfologik.fsa.IMessageLogger;
import morfologik.fsa.StateVisitor;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.lang.StringEscapeUtils;

/* loaded from: input_file:morfologik/tools/FSABuildTool.class */
public final class FSABuildTool extends Tool {
    private static final int MB = 1048576;
    private boolean printProgress;
    private FSASerializer serializer;
    private Format format;
    private boolean inputSorted;
    private boolean statistics;
    private IMessageLogger logger;
    private boolean crWarning = false;
    private FSABuilder builder = new FSABuilder();
    private long start = System.currentTimeMillis();

    /* loaded from: input_file:morfologik/tools/FSABuildTool$Format.class */
    public enum Format {
        FSA5,
        CFSA2;

        public FSASerializer getSerializer() {
            switch (this) {
                case FSA5:
                    return new FSA5Serializer();
                case CFSA2:
                    return new CFSA2Serializer();
                default:
                    throw new RuntimeException();
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:morfologik/tools/FSABuildTool$LineConsumer.class */
    public interface LineConsumer {
        byte[] process(byte[] bArr, int i);
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:morfologik/tools/FSABuildTool$TerminateProgramException.class */
    public static class TerminateProgramException extends RuntimeException {
        public TerminateProgramException(String str) {
            super(str);
        }

        @Override // java.lang.Throwable
        public synchronized Throwable fillInStackTrace() {
            return null;
        }
    }

    @Override // morfologik.tools.Tool
    protected void go(CommandLine commandLine) throws Exception {
        if (commandLine.getArgs().length != 0) {
            printUsage();
            return;
        }
        parseOptions(commandLine);
        this.logger = new WriterMessageLogger(new PrintWriter(System.err));
        this.serializer.withLogger(this.logger);
        try {
            InputStream initializeInput = initializeInput(commandLine);
            if (this.inputSorted) {
                this.logger.log("Assuming input is already sorted");
            }
            FSA processSortedInput = this.inputSorted ? processSortedInput(initializeInput) : processUnsortedInput(initializeInput);
            if (this.crWarning) {
                this.logger.log("Warning: input contained carriage returns?");
            }
            if (this.statistics) {
                this.logger.startPart("Statistics");
                FSAInfo fSAInfo = new FSAInfo(processSortedInput);
                TreeMap calculateFanOuts = FSAUtils.calculateFanOuts(processSortedInput, processSortedInput.getRootNode());
                this.logger.endPart();
                final IntIntOpenHashMap intIntOpenHashMap = new IntIntOpenHashMap();
                final FSA fsa = processSortedInput;
                processSortedInput.visitInPostOrder(new StateVisitor() { // from class: morfologik.tools.FSABuildTool.1
                    public boolean accept(int i) {
                        int i2 = 0;
                        int firstArc = fsa.getFirstArc(i);
                        while (true) {
                            int i3 = firstArc;
                            if (i3 == 0) {
                                intIntOpenHashMap.put(i, i2);
                                return true;
                            }
                            i2 += (fsa.isArcFinal(i3) ? 1 : 0) + (fsa.isArcTerminal(i3) ? 0 : intIntOpenHashMap.get(fsa.getEndNode(i3)));
                            firstArc = fsa.getNextArc(i3);
                        }
                    }
                });
                int i = 0;
                Iterator it = intIntOpenHashMap.iterator();
                while (it.hasNext()) {
                    if (((IntIntCursor) it.next()).value == 1) {
                        i++;
                    }
                }
                this.logger.log("Nodes", Integer.valueOf(fSAInfo.nodeCount));
                this.logger.log("Arcs", Integer.valueOf(fSAInfo.arcsCount));
                this.logger.log("Tail nodes", Integer.valueOf(i));
                this.logger.log("States with the given # of outgoing arcs:");
                for (Map.Entry entry : calculateFanOuts.entrySet()) {
                    this.logger.log("  #" + entry.getKey(), entry.getValue());
                }
                this.logger.log("FSA builder properties:");
                for (Map.Entry entry2 : this.builder.getInfo().entrySet()) {
                    this.logger.log(((FSABuilder.InfoEntry) entry2.getKey()).toString(), entry2.getValue());
                }
            }
            this.logger.startPart("Serializing " + this.format);
            this.serializer.serialize(processSortedInput, initializeOutput(commandLine)).close();
            this.logger.endPart();
        } catch (OutOfMemoryError e) {
            this.logger.log("Error: Out of memory. Pass -Xmx1024m argument (or more) to java.");
        }
    }

    private FSA processUnsortedInput(InputStream inputStream) throws IOException {
        this.logger.startPart("Reading input");
        ArrayList<byte[]> readInput = readInput(inputStream);
        this.logger.endPart();
        this.logger.log("Input sequences", Integer.valueOf(readInput.size()));
        this.logger.startPart("Sorting");
        Collections.sort(readInput, FSABuilder.LEXICAL_ORDERING);
        this.logger.endPart();
        this.logger.startPart("Building FSA");
        Iterator<byte[]> it = readInput.iterator();
        while (it.hasNext()) {
            byte[] next = it.next();
            this.builder.add(next, 0, next.length);
        }
        FSA complete = this.builder.complete();
        this.logger.endPart();
        return complete;
    }

    private FSA processSortedInput(InputStream inputStream) throws IOException {
        int forAllLines = forAllLines(inputStream, new LineConsumer() { // from class: morfologik.tools.FSABuildTool.2
            private byte[] current;
            private byte[] previous = null;
            private int previousLen;
            private int line;

            @Override // morfologik.tools.FSABuildTool.LineConsumer
            public byte[] process(byte[] bArr, int i) {
                this.line++;
                if (this.previous != null && FSABuilder.compare(this.previous, 0, this.previousLen, bArr, 0, i) > 0) {
                    FSABuildTool.this.logger.log("\n\nERROR: The input is not sorted: \n" + FSABuildTool.this.dumpLine(this.previous, this.previousLen) + "\n" + FSABuildTool.this.dumpLine(bArr, i));
                    throw new TerminateProgramException("Input is not sorted.");
                }
                FSABuildTool.this.builder.add(bArr, 0, i);
                this.current = this.previous != null ? this.previous : new byte[bArr.length];
                this.previous = bArr;
                this.previousLen = i;
                return this.current;
            }
        });
        this.logger.startPart("Building FSA");
        FSA complete = this.builder.complete();
        this.logger.endPart();
        this.logger.log("Input sequences", Integer.valueOf(forAllLines));
        return complete;
    }

    protected String dumpLine(byte[] bArr, int i) {
        StringBuilder sb = new StringBuilder();
        for (int i2 = 0; i2 < i; i2++) {
            if (i2 > 0) {
                sb.append(" ");
            }
            sb.append(String.format("%02x", Byte.valueOf(bArr[i2])));
        }
        sb.append(" | ");
        for (int i3 = 0; i3 < i; i3++) {
            if (Character.isLetterOrDigit(bArr[i3])) {
                sb.append((char) bArr[i3]);
            } else {
                sb.append(".");
            }
        }
        return sb.toString();
    }

    private void parseOptions(CommandLine commandLine) {
        String opt = SharedOptions.outputFormatOption.getOpt();
        if (commandLine.hasOption(opt)) {
            String optionValue = commandLine.getOptionValue(opt);
            try {
                this.format = Format.valueOf(optionValue.toUpperCase());
            } catch (IllegalArgumentException e) {
                throw new TerminateProgramException("Not a valid format: " + optionValue);
            }
        } else {
            this.format = Format.FSA5;
        }
        this.serializer = this.format.getSerializer();
        String longOpt = SharedOptions.fillerCharacterOption.getLongOpt();
        if (commandLine.hasOption(longOpt) && requiredCapability(longOpt, FSAFlags.SEPARATORS)) {
            String unescapeJava = StringEscapeUtils.unescapeJava(commandLine.getOptionValue(longOpt));
            checkSingleByte(unescapeJava);
            this.serializer.withFiller(unescapeJava.getBytes()[0]);
        }
        String longOpt2 = SharedOptions.annotationSeparatorCharacterOption.getLongOpt();
        if (commandLine.hasOption(longOpt2) && requiredCapability(longOpt2, FSAFlags.SEPARATORS)) {
            String unescapeJava2 = StringEscapeUtils.unescapeJava(commandLine.getOptionValue(longOpt2));
            checkSingleByte(unescapeJava2);
            this.serializer.withAnnotationSeparator(unescapeJava2.getBytes()[0]);
        }
        String opt2 = SharedOptions.withNumbersOption.getOpt();
        if (commandLine.hasOption(opt2) && requiredCapability(opt2, FSAFlags.NUMBERS)) {
            this.serializer.withNumbers();
        }
        if (commandLine.hasOption(SharedOptions.progressOption.getLongOpt())) {
            this.printProgress = true;
        }
        if (commandLine.hasOption(SharedOptions.inputSortedOption.getLongOpt())) {
            this.inputSorted = true;
        }
        if (commandLine.hasOption(SharedOptions.statistics.getLongOpt())) {
            this.statistics = true;
        }
    }

    private boolean requiredCapability(String str, FSAFlags fSAFlags) {
        if (this.serializer.getFlags().contains(fSAFlags)) {
            return true;
        }
        throw new RuntimeException("This serializer does not support option: " + str);
    }

    public static void checkSingleByte(String str) {
        if (str.getBytes().length != 1) {
            throw new IllegalArgumentException("Filler and annotation characters must be single-byte values, " + str + " has " + str.getBytes().length + " bytes.");
        }
    }

    private ArrayList<byte[]> readInput(InputStream inputStream) throws IOException {
        final ArrayList<byte[]> arrayList = new ArrayList<>();
        forAllLines(inputStream, new LineConsumer() { // from class: morfologik.tools.FSABuildTool.3
            @Override // morfologik.tools.FSABuildTool.LineConsumer
            public byte[] process(byte[] bArr, int i) {
                arrayList.add(Arrays.copyOf(bArr, i));
                return bArr;
            }
        });
        return arrayList;
    }

    private int forAllLines(InputStream inputStream, LineConsumer lineConsumer) throws IOException {
        int i = 0;
        byte[] bArr = new byte[0];
        int i2 = 0;
        int i3 = 0;
        while (true) {
            int read = inputStream.read();
            if (read == -1) {
                break;
            }
            if (read == 13 && !this.crWarning) {
                this.crWarning = true;
            }
            if (read == 10) {
                if (i3 > 0) {
                    bArr = lineConsumer.process(bArr, i3);
                    i3 = 0;
                    i++;
                }
                if (this.printProgress) {
                    int i4 = i2;
                    i2++;
                    if (i4 > 0 && i2 % 1000000 == 0) {
                        this.logger.log(String.format(Locale.ENGLISH, "%6.2fs, sequences: %d", Double.valueOf(elapsedTime()), Integer.valueOf(i2)));
                    }
                }
            } else {
                if (i3 >= bArr.length) {
                    bArr = Arrays.copyOf(bArr, bArr.length + 10);
                }
                int i5 = i3;
                i3++;
                bArr[i5] = (byte) read;
            }
        }
        if (i3 > 0) {
            lineConsumer.process(bArr, i3);
            i++;
        }
        return i;
    }

    private double elapsedTime() {
        return (System.currentTimeMillis() - this.start) / 1000.0d;
    }

    @Override // morfologik.tools.Tool
    protected void printUsage() {
        new HelpFormatter().printHelp(getClass().getName(), this.options, true);
    }

    @Override // morfologik.tools.Tool
    protected void initializeOptions(Options options) {
        options.addOption(SharedOptions.inputFileOption);
        options.addOption(SharedOptions.outputFileOption);
        options.addOption(SharedOptions.outputFormatOption);
        options.addOption(SharedOptions.fillerCharacterOption);
        options.addOption(SharedOptions.annotationSeparatorCharacterOption);
        options.addOption(SharedOptions.withNumbersOption);
        options.addOption(SharedOptions.progressOption);
        options.addOption(SharedOptions.inputSortedOption);
        options.addOption(SharedOptions.statistics);
    }

    private static OutputStream initializeOutput(CommandLine commandLine) throws IOException, ParseException {
        String opt = SharedOptions.outputFileOption.getOpt();
        return new BufferedOutputStream(commandLine.hasOption(opt) ? new FileOutputStream((File) commandLine.getParsedOptionValue(opt)) : System.out);
    }

    private InputStream initializeInput(CommandLine commandLine) throws IOException, ParseException {
        InputStream inputStream;
        String opt = SharedOptions.inputFileOption.getOpt();
        if (commandLine.hasOption(opt)) {
            File file = (File) commandLine.getParsedOptionValue(opt);
            if (!this.inputSorted && file.length() > 20971520) {
                this.logger.log("WARN: The input file is quite large, avoid\n      in-memory sorting by piping pre-sorted\n      input directly to fsa_build. Linux:\n      export LC_ALL=C && \\\n         sort input | \\\n         java -jar morfologik.jar fsa_build --sorted -o dict.fsa");
            }
            inputStream = new FileInputStream(file);
        } else {
            inputStream = System.in;
        }
        return new BufferedInputStream(inputStream);
    }

    public static void main(String[] strArr) throws Exception {
        new FSABuildTool().go(strArr);
    }
}
