# This patch file was generated by NetBeans IDE
# Following Index: paths are relative to: /home/bart/NetBeansProjects/tika/tika-parsers/src
# This patch can be applied using context Tools: Patch action on respective folder.
# It uses platform neutral UTF-8 encoding and \n newlines.
# Above lines and this line are ignored by the patching process.
Index: main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java
--- main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java Base (BASE)
+++ main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java Locally Modified (Based On LOCAL)
@@ -46,7 +46,7 @@
import static org.apache.tika.sax.XHTMLContentHandler.XHTML;
/**
- * Parser for OpenDocument content.xml
files.
+ * Parser for ODF content.xml
files.
*/
public class OpenDocumentContentParser implements Parser {
@@ -59,12 +59,21 @@
public static final String OFFICE_NS =
"urn:oasis:names:tc:opendocument:xmlns:office:1.0";
+ public static final String SVG_NS =
+ "urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0";
+
+ public static final String PRESENTATION_NS =
+ "urn:oasis:names:tc:opendocument:xmlns:presentation:1.0";
+
+ public static final String DRAW_NS =
+ "urn:oasis:names:tc:opendocument:xmlns:drawing:1.0";
+
public static final String XLINK_NS = "http://www.w3.org/1999/xlink";
protected static final char[] TAB = new char[] { '\t' };
/**
- * Mappings between OpenDocument tag names and XHTML tag names
+ * Mappings between ODF tag names and XHTML tag names
* (including attributes). All other tag names/attributes are ignored
* and left out from event stream.
*/
@@ -73,7 +82,9 @@
static {
// general mappings of text:-tags
- MAPPINGS.put(new QName(TEXT_NS, "p"), new TargetElement(XHTML, "p"));
+ MAPPINGS.put(
+ new QName(TEXT_NS, "p"),
+ new TargetElement(XHTML, "p"));
// text:h-tags are mapped specifically in startElement/endElement
MAPPINGS.put(
new QName(TEXT_NS, "line-break"),
@@ -91,12 +102,35 @@
new QName(OFFICE_NS, "annotation"),
new TargetElement(XHTML, "div"));
MAPPINGS.put(
+ new QName(PRESENTATION_NS, "notes"),
+ new TargetElement(XHTML, "div"));
+ MAPPINGS.put(
+ new QName(DRAW_NS, "object"),
+ new TargetElement(XHTML, "object"));
+ MAPPINGS.put(
+ new QName(DRAW_NS, "text-box"),
+ new TargetElement(XHTML, "div"));
+ MAPPINGS.put(
+ new QName(SVG_NS, "title"),
+ new TargetElement(XHTML, "span"));
+ MAPPINGS.put(
+ new QName(SVG_NS, "desc"),
+ new TargetElement(XHTML, "span"));
+ MAPPINGS.put(
new QName(TEXT_NS, "span"),
new TargetElement(XHTML, "span"));
+
+ final HashMap aAttsMapping =
+ new HashMap();
+ aAttsMapping.put(
+ new QName(XLINK_NS, "href"),
+ new QName("href"));
+ aAttsMapping.put(
+ new QName(XLINK_NS, "title"),
+ new QName("title"));
MAPPINGS.put(
new QName(TEXT_NS, "a"),
- new TargetElement(XHTML, "a", Collections.singletonMap(
- new QName(XLINK_NS, "href"), new QName("href"))));
+ new TargetElement(XHTML, "a", aAttsMapping));
// create HTML tables from table:-tags
MAPPINGS.put(
@@ -186,6 +220,20 @@
}
}
+ /**
+ * Check if a node is a text node
+ */
+ private boolean isTextNode(String namespaceURI, String localName) {
+ if (TEXT_NS.equals(namespaceURI)) {
+ return true;
+ }
+ if (SVG_NS.equals(namespaceURI)) {
+ return "title".equals(localName) ||
+ "desc".equals(localName);
+ }
+ return false;
+ }
+
@Override
public void startElement(
String namespaceURI, String localName, String qName,
@@ -197,8 +245,8 @@
// the depth of the current node and also marks top of stack.
assert nodeDepth >= 0;
- textNodeStack.set(nodeDepth++, TEXT_NS.equals(namespaceURI));
-
+ textNodeStack.set(nodeDepth++,
+ isTextNode(namespaceURI, localName));
// filter *all* content of some tags
assert completelyFiltered >= 0;
Index: test/java/org/apache/tika/parser/opendocument/ODFParserTest.java
--- test/java/org/apache/tika/parser/opendocument/ODFParserTest.java Base (BASE)
+++ test/java/org/apache/tika/parser/opendocument/ODFParserTest.java Locally Modified (Based On LOCAL)
@@ -24,6 +24,8 @@
import org.apache.tika.sax.BodyContentHandler;
import org.xml.sax.ContentHandler;
+import org.apache.tika.parser.odf.OpenDocumentParser;
+
public class ODFParserTest extends TestCase {
public void testXMLParser() throws Exception {
@@ -32,7 +34,7 @@
try {
Metadata metadata = new Metadata();
ContentHandler handler = new BodyContentHandler();
- new OpenOfficeParser().parse(input, handler, metadata);
+ new OpenDocumentParser().parse(input, handler, metadata);
assertEquals(
"application/vnd.oasis.opendocument.text",
@@ -42,6 +44,8 @@
assertTrue(content.contains("Tika is part of the Lucene project."));
assertTrue(content.contains("Solr"));
assertTrue(content.contains("one embedded"));
+ assertTrue(content.contains("Rectangle Title"));
+ assertTrue(content.contains("a blue background and dark border"));
\ No newline at end of file
} finally {
input.close();
}