package org.apache.nutch.indexer.extra;

import java.util.List;
import org.apache.commons.jexl.ExpressionFactory;
import org.apache.commons.jexl.JexlContext;
import org.apache.commons.jexl.JexlHelper;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.UTF8;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.crawl.Inlinks;
import org.apache.nutch.indexer.IndexingException;
import org.apache.nutch.indexer.IndexingFilter;
import org.apache.nutch.parse.Parse;
import org.apache.regexp.RE;

/* loaded from: input_file:org/apache/nutch/indexer/extra/ExtraIndexingFilter.class */
public class ExtraIndexingFilter implements IndexingFilter {
    public static final Log LOG = LogFactory.getLog(ExtraIndexingFilter.class);
    private Configuration conf;

    public Document filter(Document document, Parse parse, UTF8 utf8, CrawlDatum crawlDatum, Inlinks inlinks) throws IndexingException {
        List extraIndexerProperties = ExtraIndexingFilterConfig.getInstance().getExtraIndexerProperties();
        if (extraIndexerProperties != null && extraIndexerProperties.size() > 0) {
            for (int i = 0; i < extraIndexerProperties.size(); i++) {
                ExtraIndexingMetaData extraIndexingMetaData = (ExtraIndexingMetaData) extraIndexerProperties.get(i);
                if (isTrue(document, parse, extraIndexingMetaData.getCondition()).booleanValue()) {
                    addField(document, parse, extraIndexingMetaData);
                }
            }
        }
        return document;
    }

    private Boolean isTrue(Document document, Parse parse, String str) {
        if (str == null || str.length() == 0) {
            return true;
        }
        JexlContext createContext = JexlHelper.createContext();
        createContext.getVars().put("document", document);
        createContext.getVars().put("meta", parse.getData());
        createContext.getVars().put("text", parse.getText());
        try {
            return Boolean.valueOf(((Boolean) ExpressionFactory.createExpression(str).evaluate(createContext)).booleanValue());
        } catch (Exception e) {
            e.printStackTrace();
            return false;
        }
    }

    private Document addField(Document document, Parse parse, ExtraIndexingMetaData extraIndexingMetaData) {
        Field.Index index;
        Field.Store store;
        Field.TermVector termVector;
        String applyRegex = applyRegex(getSourceValue(document, parse, extraIndexingMetaData), extraIndexingMetaData);
        if (applyRegex != null) {
            String indexed = extraIndexingMetaData.getIndexed();
            if (indexed.equals("TOKENIZED")) {
                index = Field.Index.TOKENIZED;
            } else if (indexed.equals("UN_TOKENIZED")) {
                index = Field.Index.UN_TOKENIZED;
            } else if (indexed.equals("NO")) {
                index = Field.Index.NO;
            } else {
                index = Field.Index.TOKENIZED;
                LOG.error("Invalid indexed attribute.  Valid values are 'tokenized', 'un_tokenized' or 'no'");
            }
            String stored = extraIndexingMetaData.getStored();
            if (stored.equals("YES")) {
                store = Field.Store.YES;
            } else if (stored.equals("COMPRESS")) {
                store = Field.Store.COMPRESS;
            } else if (stored.equals("NO")) {
                store = Field.Store.NO;
            } else {
                store = Field.Store.YES;
                LOG.error("Invalid stored attribute.  Valid values are 'yes', 'compress' or 'no'");
            }
            String termVector2 = extraIndexingMetaData.getTermVector();
            if (termVector2.equals("YES")) {
                termVector = Field.TermVector.YES;
            } else if (termVector2.equals("WITH_OFFSETS")) {
                termVector = Field.TermVector.WITH_OFFSETS;
            } else if (termVector2.equals("WITH_POSITIONS")) {
                termVector = Field.TermVector.WITH_POSITIONS;
            } else if (termVector2.equals("WITH_POSITIONS_OFFSETS")) {
                termVector = Field.TermVector.WITH_POSITIONS_OFFSETS;
            } else if (termVector2.equals("NO")) {
                termVector = Field.TermVector.NO;
            } else {
                termVector = Field.TermVector.NO;
                LOG.error("Invalid termVector attribute.  Valid values are 'yes', 'with_positions', 'with_offsets', 'with_positions_offsets' or 'no'");
            }
            Field field = new Field(extraIndexingMetaData.getName(), applyRegex, store, index, termVector);
            if (!extraIndexingMetaData.getBoost().equals("1.0")) {
                field.setBoost(Float.parseFloat(extraIndexingMetaData.getBoost()));
            }
            LOG.info("Adding field " + extraIndexingMetaData.getName());
            document.add(field);
        }
        return document;
    }

    private String getSourceValue(Document document, Parse parse, ExtraIndexingMetaData extraIndexingMetaData) {
        switch (extraIndexingMetaData.getSourceType()) {
            case CONST:
                return extraIndexingMetaData.getSourceValue();
            case FIELD:
                return document.get(extraIndexingMetaData.getSourceValue());
            case JAVA:
                JexlContext createContext = JexlHelper.createContext();
                createContext.getVars().put("document", document);
                createContext.getVars().put("data", parse.getData());
                createContext.getVars().put("text", parse.getText());
                try {
                    try {
                        return ExpressionFactory.createExpression(extraIndexingMetaData.getSourceValue()).evaluate(createContext).toString();
                    } catch (Exception e) {
                        LOG.error("Exception evaluating expression: " + extraIndexingMetaData.getSourceValue());
                        LOG.error("Exception stack trace: " + e);
                        return null;
                    }
                } catch (Exception e2) {
                    LOG.error("Exception incorrect JEXL expression: " + extraIndexingMetaData.getSourceValue());
                    LOG.error("Exception stack trace: " + e2);
                    return null;
                }
            case META:
                return parse.getData().getMeta(extraIndexingMetaData.getSourceValue());
            case TEXT:
                return parse.getText();
            default:
                System.out.println(" invalid source type");
                return null;
        }
    }

    private String applyRegex(String str, ExtraIndexingMetaData extraIndexingMetaData) {
        String regexValue;
        if (str == null || (regexValue = extraIndexingMetaData.getRegexValue()) == null) {
            return str;
        }
        try {
            RE re = new RE(regexValue);
            if (re.match(str)) {
                return re.getParen(extraIndexingMetaData.getRegexNo());
            }
            return null;
        } catch (Exception e) {
            LOG.error("Exception incorrect regex: " + regexValue);
            LOG.error("Exception stack trace: " + e);
            return null;
        }
    }

    public void setConf(Configuration configuration) {
        this.conf = configuration;
    }

    public Configuration getConf() {
        return this.conf;
    }
}
