package liuslite.parser.html;

import java.io.InputStream;
import java.io.OutputStream;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import liuslite.config.Content;
import liuslite.parser.Parser;
import liuslite.utils.RegexUtils;
import org.apache.log4j.Logger;
import org.apache.oro.text.regex.MalformedPatternException;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;
import org.w3c.tidy.Tidy;

/* loaded from: input_file:liuslite/parser/html/HtmlParser.class */
public class HtmlParser extends Parser {
    static Logger logger = Logger.getRootLogger();
    private Node root = null;
    private String contentStr;
    private Map<String, Content> contentsMap;

    @Override // liuslite.parser.Parser
    public Content getContent(String str) {
        if (this.contentsMap == null || this.contentsMap.isEmpty()) {
            getContents();
        }
        return this.contentsMap.get(str);
    }

    @Override // liuslite.parser.Parser
    public List<Content> getContents() {
        if (this.contentStr == null) {
            this.contentStr = getStrContent();
        }
        List<Content> contents = getParserConfig().getContents();
        this.contentsMap = new HashMap();
        for (Content content : contents) {
            if (content.getTextSelect() != null) {
                if (content.getTextSelect().equalsIgnoreCase("fulltext")) {
                    content.setValue(this.contentStr);
                } else {
                    extractElementTxt((Element) this.root, content);
                }
            } else if (content.getRegexSelect() != null) {
                try {
                    List<String> extract = RegexUtils.extract(this.contentStr, content.getRegexSelect());
                    if (extract.size() > 0) {
                        content.setValue(extract.get(0));
                        content.setValues((String[]) extract.toArray(new String[0]));
                    }
                } catch (MalformedPatternException e) {
                    logger.error(e.getMessage());
                }
            }
            this.contentsMap.put(content.getName(), content);
        }
        return getParserConfig().getContents();
    }

    @Override // liuslite.parser.Parser
    public String getStrContent() {
        if (this.root == null) {
            this.root = getRoot(getInputStream());
        }
        this.contentStr = getTextContent(this.root);
        return this.contentStr;
    }

    private Node getRoot(InputStream inputStream) {
        Tidy tidy = new Tidy();
        tidy.setQuiet(true);
        tidy.setShowWarnings(false);
        return tidy.parseDOM(inputStream, (OutputStream) null).getDocumentElement();
    }

    private void extractElementTxt(Element element, Content content) {
        NodeList elementsByTagName = element.getElementsByTagName(content.getName());
        if (elementsByTagName == null || elementsByTagName.getLength() <= 0) {
            return;
        }
        if (elementsByTagName.getLength() == 1) {
            Text text = (Text) ((Element) elementsByTagName.item(0)).getFirstChild();
            if (text != null) {
                content.setValue(text.getData());
                return;
            }
            return;
        }
        String[] strArr = new String[100];
        for (int i = 0; i < elementsByTagName.getLength(); i++) {
            Text text2 = (Text) ((Element) elementsByTagName.item(i)).getFirstChild();
            if (text2 != null) {
                strArr[i] = text2.getData();
            }
        }
        if (strArr.length > 0) {
            content.setValue(strArr[0]);
            content.setValues(strArr);
        }
    }

    private String getTextContent(Node node) {
        NodeList childNodes = node.getChildNodes();
        StringBuffer stringBuffer = new StringBuffer();
        for (int i = 0; i < childNodes.getLength(); i++) {
            Node item = childNodes.item(i);
            switch (item.getNodeType()) {
                case 1:
                    stringBuffer.append(getTextContent(item));
                    stringBuffer.append(" ");
                    break;
                case 3:
                    stringBuffer.append(((Text) item).getData());
                    break;
            }
        }
        return stringBuffer.toString();
    }
}
