/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package org.emje.treehtmlparse.parser;
import java.util.ArrayList;
import java.util.Enumeration;
import javax.swing.tree.DefaultMutableTreeNode;
import javax.xml.namespace.QName;
import javax.xml.xpath.XPathConstants;
import org.emje.treehtmlparse.xpathtree.XPathTreeConfigInitialNodes;
import org.emje.treehtmlparse.xpathtree.nodes.XPathTreeConfigNode;
import org.emje.treehtmlparse.xpathtree.nodes.XPathTreeEntryNode;
import org.emje.treehtmlparse.xpathtree.nodes.XPathTreeFieldNode;
import org.w3c.dom.NodeList;
/**
* Creating DefaultMuttableTreeNode from XPath tree and HtmlParser for displaying the result from applying each XPath Tree node to the web page source code.
* @author shadiq
*/
public class ConfigurationResult extends DefaultMutableTreeNode {
private HtmlParse parser;
public ConfigurationResult(HtmlParse parser){
this.parser = parser;
}
public DefaultMutableTreeNode getResult(XPathTreeConfigInitialNodes tree){
DefaultMutableTreeNode rootNode = null;
DefaultMutableTreeNode currentFieldNode = null;
// Get xPathTree Structure and data
// Enumeration en = tree.postorderEnumeration();
Enumeration en = tree.preorderEnumeration();
while (en.hasMoreElements()) {
DefaultMutableTreeNode node = (DefaultMutableTreeNode) en.nextElement();
String objectName = node.getUserObject().getClass().getName();
// if config
if (objectName.matches("org.emje.treehtmlparse.xpathtree.nodes.XPathTreeConfigNode")) {
XPathTreeConfigNode conf = (XPathTreeConfigNode) node.getUserObject();
rootNode = new DefaultMutableTreeNode(conf.getName());
// System.out.println(conf.getName());
// if field
} else if (objectName.matches("org.emje.treehtmlparse.xpathtree.nodes.XPathTreeFieldNode")) {
XPathTreeFieldNode field = (XPathTreeFieldNode) node.getUserObject();
currentFieldNode = new DefaultMutableTreeNode(field.getFieldName());
StringBuilder sb = new StringBuilder();
Enumeration enu = node.preorderEnumeration();
if (field.getFieldType().equals(XPathTreeFieldNode.CONTINUOUS_TEXT)) {
// System.out.println("continous");
while (enu.hasMoreElements()) {
DefaultMutableTreeNode n = (DefaultMutableTreeNode) enu.nextElement();
if (n.getUserObject().getClass().getName().
matches("org.emje.treehtmlparse.xpathtree.nodes.XPathTreeEntryNode")){
XPathTreeEntryNode entry = (XPathTreeEntryNode) n.getUserObject();
sb.append(evaluateXPath(parser, entry.getxPathExpression(), entry.getExpressionType(),
field.getFieldType(), field.getEntryDelimiter()));
}
}
sb.delete(sb.length()-field.getEntryDelimiter().length(), sb.length()); // remove the last delimiter
currentFieldNode.add(new DefaultMutableTreeNode(sb.toString().trim()));
} else if (field.getFieldType().equals(XPathTreeFieldNode.SEGMENTED_TEXT)) {
// System.out.println("segmented");
while (enu.hasMoreElements()) {
DefaultMutableTreeNode n = (DefaultMutableTreeNode) enu.nextElement();
if (n.getUserObject().getClass().getName().
matches("org.emje.treehtmlparse.xpathtree.nodes.XPathTreeEntryNode")){
XPathTreeEntryNode entry = (XPathTreeEntryNode) n.getUserObject();
String res = evaluateXPath(parser, entry.getxPathExpression(), entry.getExpressionType(),
field.getFieldType(), field.getEntryDelimiter());
currentFieldNode.add(new DefaultMutableTreeNode(res));
// System.out.println(res);
}
}
} else if (field.getFieldType().equals(XPathTreeFieldNode.OUTLINKS)) {
// System.out.println("outlinks");
sb.append("
");
while (enu.hasMoreElements()) {
DefaultMutableTreeNode n = (DefaultMutableTreeNode) enu.nextElement();
if (n.getUserObject().getClass().getName().
matches("org.emje.treehtmlparse.xpathtree.nodes.XPathTreeEntryNode")) {
XPathTreeEntryNode entry = (XPathTreeEntryNode) n.getUserObject();
sb.append(evaluateXPath(parser, entry.getxPathExpression(), entry.getExpressionType(),
field.getFieldType(), field.getEntryDelimiter()));
}
}
sb.append("
");
// System.out.println(sb.toString());
currentFieldNode.add(new DefaultMutableTreeNode(sb.toString()));
}
rootNode.add(currentFieldNode);
} //if entry IGNORE
}
return rootNode;
}
private String evaluateXPath(HtmlParse parser, String expression,
String expressionType, String fieldType, String delimiter) {
String result = null;
// System.out.println(expression);
// Evaluate XPath Expression with different returnType
String XPathExpression = expression;
QName returnType = null;
Object xPathResult = null;
Object xPathResultOutlinkAnchor = null;
//
if(expressionType.equals("xpath.string")){
returnType = XPathConstants.STRING;
xPathResult = parser.evaluateXPathExpression(XPathExpression, returnType);
}
else if(expressionType.equals("xpath.node")){
returnType = XPathConstants.NODE;
xPathResult = parser.evaluateXPathExpression(XPathExpression, returnType);
}
else if(expressionType.equals("xpath.nodeset")){
returnType = XPathConstants.NODESET;
xPathResult = parser.evaluateXPathExpression(XPathExpression, returnType);
}
else if(expressionType.equals("xpath.number")){
returnType = XPathConstants.NUMBER;
xPathResult = parser.evaluateXPathExpression(XPathExpression, returnType);
}
else if(expressionType.equals("xpath.boolean")){
returnType = XPathConstants.BOOLEAN;
xPathResult = parser.evaluateXPathExpression(XPathExpression, returnType);
}
else if(expressionType.equals("xpath.string.normalized")){
returnType = XPathConstants.STRING;
xPathResult = parser.evaluateXPathExpression("normalize-space(" + XPathExpression + ")", returnType);
}
else if(expressionType.equals("xpath.outlink.nodeset")){
returnType = XPathConstants.NODESET;
if (XPathExpression.endsWith("/@href")) {
XPathExpression = XPathExpression.substring(0, XPathExpression.length() - 6);
}
xPathResult = parser.evaluateXPathExpression(XPathExpression + "/@href", returnType);
xPathResultOutlinkAnchor = parser.evaluateXPathExpression(XPathExpression + "/text()", returnType);
}
else if(expressionType.equals("plain.string")){
xPathResult = expression;
} //
if (xPathResult != null) {
//
if (expressionType.equals(XPathTreeEntryNode.XPATH_OUTLINK_NODESET_ENTRY)) {
NodeList list = (NodeList) xPathResult;
ArrayList urlArrayList = new ArrayList();
ArrayList anchorArrayList = new ArrayList();
// fill data
if (xPathResultOutlinkAnchor != null) {
NodeList anchorList = (NodeList) xPathResultOutlinkAnchor;
for (int i = 0; i < list.getLength(); i++) {
String value = list.item(i).getNodeValue();
String anchorValue = " ";
if (i < anchorList.getLength()) {
anchorValue = anchorList.item(i).getNodeValue().isEmpty() ? anchorList.item(i).getNodeValue() : "";
}
if (!value.matches("\\s+")) {
value = list.item(i).getNodeValue().replaceAll("\\s+", " "); // normalize space
if (!anchorValue.matches("\\s+")) {
anchorValue = anchorList.item(i).getNodeValue().replaceAll("\\s+", " "); // normalize space
urlArrayList.add(value);
anchorArrayList.add(anchorValue);
} else {
urlArrayList.add(value);
anchorArrayList.add("");
}
}
}
} else {
for (int i = 0; i < list.getLength(); i++) {
String value = list.item(i).getNodeValue();
if (!value.matches("\\s+")) {
value = list.item(i).getNodeValue().replaceAll("\\s+", " "); // normalize space
urlArrayList.add(value);
anchorArrayList.add("");
}
}
}
StringBuilder esbe = new StringBuilder();
for (int k = 0; k < urlArrayList.size(); k++) {
esbe.append("").append((String) urlArrayList.get(k)).append(" | ").append((String) anchorArrayList.get(k)).append(" |
");
}
result = esbe.toString();
}//
//
else if (expressionType.equals(XPathTreeEntryNode.XPATH_NODESET_ENTRY)) {
NodeList list = (NodeList) xPathResult;
ArrayList urlArrayList = new ArrayList();
// fill data
for (int i = 0; i < list.getLength(); i++) {
String value = list.item(i).getNodeValue();
if (value != null && !value.matches("\\s+")) {
value = list.item(i).getNodeValue().replaceAll("\\s+", " "); // normalize space
urlArrayList.add(value);
}
}
StringBuilder esbe = new StringBuilder();
if(fieldType.equals(XPathTreeFieldNode.CONTINUOUS_TEXT)){
for (int k = 0; k < urlArrayList.size(); k++) {
esbe.append((String) urlArrayList.get(k)).append(delimiter);
}
} else if (fieldType.equals(XPathTreeFieldNode.SEGMENTED_TEXT)){
esbe.append("");
for (int k = 0; k < urlArrayList.size(); k++) {
esbe.append("").append((String) urlArrayList.get(k)).append(" |
");
}
esbe.append("
");
}
result = esbe.toString();
}//
else {
if (fieldType.equals(XPathTreeFieldNode.CONTINUOUS_TEXT)) {
result = xPathResult.toString() + delimiter;
} else if (fieldType.equals(XPathTreeFieldNode.SEGMENTED_TEXT)) {
result = xPathResult.toString();
}
// result = xPathResult.toString();
}
} else {
return "";
}
return result;
}
}