Index: pom.xml
===================================================================
--- pom.xml (revision 544880)
+++ pom.xml (working copy)
@@ -28,6 +28,7 @@
org.apache.tikatika0.1-SNAPSHOT
+
Apache Tika
@@ -139,10 +140,39 @@
file://${basedir}/../site
-
+
+
+
+ commons-lang
+ commons-lang
+ 2.1
+ jar
+
+
+
+
+ junit
+ junit
+ 3.8.1
+ jar
+ test
+
+
+
+
-
-
+ ${basedir}/src/main/java/
+ ${basedir}/src/main/test/
+
+
+ org.apache.maven.plugins
+ maven-compiler-plugin
+
+
+ 1.5
+
+
+ maven-antrun-plugin
Index: src/main/java/org/apache/tika/metadata/CreativeCommons.java
===================================================================
--- src/main/java/org/apache/tika/metadata/CreativeCommons.java (revision 0)
+++ src/main/java/org/apache/tika/metadata/CreativeCommons.java (revision 0)
@@ -0,0 +1,35 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.metadata;
+
+/**
+ * A collection of Creative Commons properties names.
+ *
+ * @see creativecommons.org
+ *
+ * @author Chris Mattmann
+ * @author Jérôme Charron
+ */
+public interface CreativeCommons {
+
+ public final static String LICENSE_URL = "License-Url";
+
+ public final static String LICENSE_LOCATION = "License-Location";
+
+ public final static String WORK_TYPE = "Work-Type";
+
+}
Index: src/main/java/org/apache/tika/metadata/DublinCore.java
===================================================================
--- src/main/java/org/apache/tika/metadata/DublinCore.java (revision 0)
+++ src/main/java/org/apache/tika/metadata/DublinCore.java (revision 0)
@@ -0,0 +1,161 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.metadata;
+
+/**
+ * A collection of Dublin Core metadata names.
+ *
+ * @see dublincore.org
+ *
+ * @author Chris Mattmann
+ * @author Jérôme Charron
+ */
+public interface DublinCore {
+
+ /**
+ * Typically, Format may include the media-type or dimensions of the resource.
+ * Format may be used to determine the software, hardware or other equipment
+ * needed to display or operate the resource. Examples of dimensions include
+ * size and duration. Recommended best practice is to select a value from a
+ * controlled vocabulary (for example, the list of Internet Media Types [MIME]
+ * defining computer media formats).
+ */
+ public static final String FORMAT = "format";
+
+ /**
+ * Recommended best practice is to identify the resource by means of a string
+ * or number conforming to a formal identification system. Example formal
+ * identification systems include the Uniform Resource Identifier (URI)
+ * (including the Uniform Resource Locator (URL)), the Digital Object
+ * Identifier (DOI) and the International Standard Book Number (ISBN).
+ */
+ public static final String IDENTIFIER = "identifier";
+
+ /**
+ * Date on which the resource was changed.
+ */
+ public static final String MODIFIED = "modified";
+
+ /**
+ * An entity responsible for making contributions to the content of the
+ * resource. Examples of a Contributor include a person, an organisation, or a
+ * service. Typically, the name of a Contributor should be used to indicate
+ * the entity.
+ */
+ public static final String CONTRIBUTOR = "contributor";
+
+ /**
+ * The extent or scope of the content of the resource. Coverage will typically
+ * include spatial location (a place name or geographic coordinates), temporal
+ * period (a period label, date, or date range) or jurisdiction (such as a
+ * named administrative entity). Recommended best practice is to select a
+ * value from a controlled vocabulary (for example, the Thesaurus of
+ * Geographic Names [TGN]) and that, where appropriate, named places or time
+ * periods be used in preference to numeric identifiers such as sets of
+ * coordinates or date ranges.
+ */
+ public static final String COVERAGE = "coverage";
+
+ /**
+ * An entity primarily responsible for making the content of the resource.
+ * Examples of a Creator include a person, an organisation, or a service.
+ * Typically, the name of a Creator should be used to indicate the entity.
+ */
+ public static final String CREATOR = "creator";
+
+ /**
+ * A date associated with an event in the life cycle of the resource.
+ * Typically, Date will be associated with the creation or availability of the
+ * resource. Recommended best practice for encoding the date value is defined
+ * in a profile of ISO 8601 [W3CDTF] and follows the YYYY-MM-DD format.
+ */
+ public static final String DATE = "date";
+
+ /**
+ * An account of the content of the resource. Description may include but is
+ * not limited to: an abstract, table of contents, reference to a graphical
+ * representation of content or a free-text account of the content.
+ */
+ public static final String DESCRIPTION = "description";
+
+ /**
+ * A language of the intellectual content of the resource. Recommended best
+ * practice is to use RFC 3066 [RFC3066], which, in conjunction with ISO 639
+ * [ISO639], defines two- and three-letter primary language tags with optional
+ * subtags. Examples include "en" or "eng" for English, "akk" for Akkadian,
+ * and "en-GB" for English used in the United Kingdom.
+ */
+ public static final String LANGUAGE = "language";
+
+ /**
+ * An entity responsible for making the resource available. Examples of a
+ * Publisher include a person, an organisation, or a service. Typically, the
+ * name of a Publisher should be used to indicate the entity.
+ */
+ public static final String PUBLISHER = "publisher";
+
+ /**
+ * A reference to a related resource. Recommended best practice is to
+ * reference the resource by means of a string or number conforming to a
+ * formal identification system.
+ */
+ public static final String RELATION = "relation";
+
+ /**
+ * Information about rights held in and over the resource. Typically, a Rights
+ * element will contain a rights management statement for the resource, or
+ * reference a service providing such information. Rights information often
+ * encompasses Intellectual Property Rights (IPR), Copyright, and various
+ * Property Rights. If the Rights element is absent, no assumptions can be
+ * made about the status of these and other rights with respect to the
+ * resource.
+ */
+ public static final String RIGHTS = "rights";
+
+ /**
+ * A reference to a resource from which the present resource is derived. The
+ * present resource may be derived from the Source resource in whole or in
+ * part. Recommended best practice is to reference the resource by means of a
+ * string or number conforming to a formal identification system.
+ */
+ public static final String SOURCE = "source";
+
+ /**
+ * The topic of the content of the resource. Typically, a Subject will be
+ * expressed as keywords, key phrases or classification codes that describe a
+ * topic of the resource. Recommended best practice is to select a value from
+ * a controlled vocabulary or formal classification scheme.
+ */
+ public static final String SUBJECT = "subject";
+
+ /**
+ * A name given to the resource. Typically, a Title will be a name by which
+ * the resource is formally known.
+ */
+ public static final String TITLE = "title";
+
+ /**
+ * The nature or genre of the content of the resource. Type includes terms
+ * describing general categories, functions, genres, or aggregation levels for
+ * content. Recommended best practice is to select a value from a controlled
+ * vocabulary (for example, the DCMI Type Vocabulary [DCMITYPE]). To describe
+ * the physical or digital manifestation of the resource, use the Format
+ * element.
+ */
+ public static final String TYPE = "type";
+
+}
Index: src/main/java/org/apache/tika/metadata/HttpHeaders.java
===================================================================
--- src/main/java/org/apache/tika/metadata/HttpHeaders.java (revision 0)
+++ src/main/java/org/apache/tika/metadata/HttpHeaders.java (revision 0)
@@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.metadata;
+
+/**
+ * A collection of HTTP header names.
+ *
+ * @see Hypertext Transfer Protocol --
+ * HTTP/1.1 (RFC 2616)
+ *
+ * @author Chris Mattmann
+ * @author Jérôme Charron
+ */
+public interface HttpHeaders {
+
+ public final static String CONTENT_ENCODING = "Content-Encoding";
+
+ public final static String CONTENT_LANGUAGE = "Content-Language";
+
+ public final static String CONTENT_LENGTH = "Content-Length";
+
+ public final static String CONTENT_LOCATION = "Content-Location";
+
+ public static final String CONTENT_DISPOSITION = "Content-Disposition";
+
+ public final static String CONTENT_MD5 = "Content-MD5";
+
+ public final static String CONTENT_TYPE = "Content-Type";
+
+ public final static String LAST_MODIFIED = "Last-Modified";
+
+ public final static String LOCATION = "Location";
+
+}
Index: src/main/java/org/apache/tika/metadata/Metadata.java
===================================================================
--- src/main/java/org/apache/tika/metadata/Metadata.java (revision 0)
+++ src/main/java/org/apache/tika/metadata/Metadata.java (revision 0)
@@ -0,0 +1,216 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.metadata;
+
+// JDK imports
+import java.util.Enumeration;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Properties;
+
+/**
+ * A multi-valued metadata container.
+ *
+ * @author Chris Mattmann
+ * @author Jérôme Charron
+ *
+ */
+public class Metadata implements CreativeCommons, DublinCore, HttpHeaders,
+ Office {
+
+ /**
+ * A map of all metadata attributes.
+ */
+ private Map metadata = null;
+
+ /**
+ * Constructs a new, empty metadata.
+ */
+ public Metadata() {
+ metadata = new HashMap();
+ }
+
+ /**
+ * Returns true if named value is multivalued.
+ *
+ * @param name
+ * name of metadata
+ * @return true is named value is multivalued, false if single value or null
+ */
+ public boolean isMultiValued(final String name) {
+ return metadata.get(name) != null && metadata.get(name).length > 1;
+ }
+
+ /**
+ * Returns an array of the names contained in the metadata.
+ *
+ * @return Metadata names
+ */
+ public String[] names() {
+ return metadata.keySet().toArray(new String[metadata.keySet().size()]);
+ }
+
+ /**
+ * Get the value associated to a metadata name. If many values are assiociated
+ * to the specified name, then the first one is returned.
+ *
+ * @param name
+ * of the metadata.
+ * @return the value associated to the specified metadata name.
+ */
+ public String get(final String name) {
+ String[] values = metadata.get(name);
+ if (values == null) {
+ return null;
+ } else {
+ return values[0];
+ }
+ }
+
+ /**
+ * Get the values associated to a metadata name.
+ *
+ * @param name
+ * of the metadata.
+ * @return the values associated to a metadata name.
+ */
+ public String[] getValues(final String name) {
+ return _getValues(name);
+ }
+
+ private String[] _getValues(final String name) {
+ String[] values = metadata.get(name);
+ if (values == null) {
+ values = new String[0];
+ }
+ return values;
+ }
+
+ /**
+ * Add a metadata name/value mapping. Add the specified value to the list of
+ * values associated to the specified metadata name.
+ *
+ * @param name
+ * the metadata name.
+ * @param value
+ * the metadata value.
+ */
+ public void add(final String name, final String value) {
+ String[] values = metadata.get(name);
+ if (values == null) {
+ set(name, value);
+ } else {
+ String[] newValues = new String[values.length + 1];
+ System.arraycopy(values, 0, newValues, 0, values.length);
+ newValues[newValues.length - 1] = value;
+ metadata.put(name, newValues);
+ }
+ }
+
+ /**
+ * Copy All key-value pairs from properties.
+ *
+ * @param properties
+ * properties to copy from
+ */
+ public void setAll(Properties properties) {
+ Enumeration names = properties.propertyNames();
+ while (names.hasMoreElements()) {
+ String name = (String) names.nextElement();
+ metadata.put(name, new String[] { properties.getProperty(name) });
+ }
+ }
+
+ /**
+ * Set metadata name/value. Associate the specified value to the specified
+ * metadata name. If some previous values were associated to this name, they
+ * are removed.
+ *
+ * @param name
+ * the metadata name.
+ * @param value
+ * the metadata value.
+ */
+ public void set(String name, String value) {
+ metadata.put(name, new String[] { value });
+ }
+
+ /**
+ * Remove a metadata and all its associated values.
+ *
+ * @param name
+ * metadata name to remove
+ */
+ public void remove(String name) {
+ metadata.remove(name);
+ }
+
+ /**
+ * Returns the number of metadata names in this metadata.
+ *
+ * @return number of metadata names
+ */
+ public int size() {
+ return metadata.size();
+ }
+
+ public boolean equals(Object o) {
+
+ if (o == null) {
+ return false;
+ }
+
+ Metadata other = null;
+ try {
+ other = (Metadata) o;
+ } catch (ClassCastException cce) {
+ return false;
+ }
+
+ if (other.size() != size()) {
+ return false;
+ }
+
+ String[] names = names();
+ for (int i = 0; i < names.length; i++) {
+ String[] otherValues = other._getValues(names[i]);
+ String[] thisValues = _getValues(names[i]);
+ if (otherValues.length != thisValues.length) {
+ return false;
+ }
+ for (int j = 0; j < otherValues.length; j++) {
+ if (!otherValues[j].equals(thisValues[j])) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ public String toString() {
+ StringBuffer buf = new StringBuffer();
+ String[] names = names();
+ for (int i = 0; i < names.length; i++) {
+ String[] values = _getValues(names[i]);
+ for (int j = 0; j < values.length; j++) {
+ buf.append(names[i]).append("=").append(values[j]).append(" ");
+ }
+ }
+ return buf.toString();
+ }
+
+}
Index: src/main/java/org/apache/tika/metadata/Office.java
===================================================================
--- src/main/java/org/apache/tika/metadata/Office.java (revision 0)
+++ src/main/java/org/apache/tika/metadata/Office.java (revision 0)
@@ -0,0 +1,51 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.metadata;
+
+/**
+ * A collection of "Office" documents properties names.
+ *
+ * @author Chris Mattmann
+ * @author Jérôme Charron
+ */
+public interface Office {
+
+ public static final String KEYWORDS = "Keywords";
+
+ public static final String COMMENTS = "Comments";
+
+ public static final String LAST_AUTHOR = "Last-Author";
+
+ public static final String APPLICATION_NAME = "Application-Name";
+
+ public static final String CHARACTER_COUNT = "Character Count";
+
+ public static final String LAST_PRINTED = "Last-Printed";
+
+ public static final String LAST_SAVED = "Last-Save-Date";
+
+ public static final String PAGE_COUNT = "Page-Count";
+
+ public static final String REVISION_NUMBER = "Revision-Number";
+
+ public static final String WORD_COUNT = "Word-Count";
+
+ public static final String TEMPLATE = "Template";
+
+ public static final String AUTHOR = "Author";
+
+}
Index: src/main/java/org/apache/tika/metadata/SpellCheckedMetadata.java
===================================================================
--- src/main/java/org/apache/tika/metadata/SpellCheckedMetadata.java (revision 0)
+++ src/main/java/org/apache/tika/metadata/SpellCheckedMetadata.java (revision 0)
@@ -0,0 +1,152 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.metadata;
+
+//JDK imports
+import java.lang.reflect.Field;
+import java.lang.reflect.Modifier;
+import java.util.HashMap;
+import java.util.Map;
+
+//Apache imports
+import org.apache.commons.lang.StringUtils;
+
+/**
+ * A decorator to Metadata that adds spellchecking capabilities to property
+ * names. Currently used spelling vocabulary contains just the httpheaders from
+ * {@link HttpHeaders} class.
+ *
+ */
+public class SpellCheckedMetadata extends Metadata {
+
+ /**
+ * Treshold divider.
+ *
+ * threshold = searched.length() / TRESHOLD_DIVIDER;
+ */
+ private static final int TRESHOLD_DIVIDER = 3;
+
+ /**
+ * Normalized name to name mapping.
+ */
+ private final static Map NAMES_IDX = new HashMap();
+
+ /**
+ * Array holding map keys.
+ */
+ private static String[] normalized = null;
+
+ static {
+
+ // Uses following array to fill the metanames index and the
+ // metanames list.
+ Class[] spellthese = {HttpHeaders.class};
+
+ for (Class spellCheckedNames : spellthese) {
+ for (Field field : spellCheckedNames.getFields()) {
+ int mods = field.getModifiers();
+ if (Modifier.isFinal(mods) && Modifier.isPublic(mods)
+ && Modifier.isStatic(mods) && field.getType().equals(String.class)) {
+ try {
+ String val = (String) field.get(null);
+ NAMES_IDX.put(normalize(val), val);
+ } catch (Exception e) {
+ // Simply ignore...
+ }
+ }
+ }
+ }
+ normalized = NAMES_IDX.keySet().toArray(new String[NAMES_IDX.size()]);
+ }
+
+ /**
+ * Normalizes String.
+ *
+ * @param str
+ * the string to normalize
+ * @return normalized String
+ */
+ private static String normalize(final String str) {
+ char c;
+ StringBuffer buf = new StringBuffer();
+ for (int i = 0; i < str.length(); i++) {
+ c = str.charAt(i);
+ if (Character.isLetter(c)) {
+ buf.append(Character.toLowerCase(c));
+ }
+ }
+ return buf.toString();
+ }
+
+ /**
+ * Get the normalized name of metadata attribute name. This method tries to
+ * find a well-known metadata name (one of the metadata names defined in this
+ * class) that matches the specified name. The matching is error tolerent. For
+ * instance,
+ *
+ *
content-type gives Content-Type
+ *
CoNtEntType gives Content-Type
+ *
ConTnTtYpe gives Content-Type
+ *
+ * If no matching with a well-known metadata name is found, then the original
+ * name is returned.
+ *
+ * @param name
+ * Name to normalize
+ * @return normalized name
+ */
+ public static String getNormalizedName(final String name) {
+ String searched = normalize(name);
+ String value = NAMES_IDX.get(searched);
+
+ if ((value == null) && (normalized != null)) {
+ int threshold = searched.length() / TRESHOLD_DIVIDER;
+ for (int i = 0; i < normalized.length && value == null; i++) {
+ if (StringUtils.getLevenshteinDistance(searched, normalized[i]) < threshold) {
+ value = NAMES_IDX.get(normalized[i]);
+ }
+ }
+ }
+ return (value != null) ? value : name;
+ }
+
+ @Override
+ public void remove(final String name) {
+ super.remove(getNormalizedName(name));
+ }
+
+ @Override
+ public void add(final String name, final String value) {
+ super.add(getNormalizedName(name), value);
+ }
+
+ @Override
+ public String[] getValues(final String name) {
+ return super.getValues(getNormalizedName(name));
+ }
+
+ @Override
+ public String get(final String name) {
+ return super.get(getNormalizedName(name));
+ }
+
+ @Override
+ public void set(final String name, final String value) {
+ super.set(getNormalizedName(name), value);
+ }
+
+}
Index: src/main/java/org/apache/tika/metadata/package.html
===================================================================
--- src/main/java/org/apache/tika/metadata/package.html (revision 0)
+++ src/main/java/org/apache/tika/metadata/package.html (revision 0)
@@ -0,0 +1,6 @@
+
+
+A Multi-valued Metadata container, and set
+of constant fields for Tika Metadata.
+
+
Index: src/main/test/org/apache/tika/metadata/TestMetadata.java
===================================================================
--- src/main/test/org/apache/tika/metadata/TestMetadata.java (revision 0)
+++ src/main/test/org/apache/tika/metadata/TestMetadata.java (revision 0)
@@ -0,0 +1,211 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.metadata;
+
+// JDK imports
+import java.util.Properties;
+
+// Junit imports
+import junit.framework.Test;
+import junit.framework.TestCase;
+import junit.framework.TestSuite;
+import junit.textui.TestRunner;
+
+/**
+ * JUnit based tests of class {@link org.apache.tika.metadata.Metadata}.
+ */
+public class TestMetadata extends TestCase {
+
+ private static final String CONTENTTYPE = "contenttype";
+
+ public TestMetadata(String testName) {
+ super(testName);
+ }
+
+ public static Test suite() {
+ return new TestSuite(TestMetadata.class);
+ }
+
+ public static void main(String[] args) {
+ TestRunner.run(suite());
+ }
+
+ /** Test for the add(String, String) method. */
+ public void testAdd() {
+ String[] values = null;
+ Metadata meta = new Metadata();
+
+ values = meta.getValues(CONTENTTYPE);
+ assertEquals(0, values.length);
+
+ meta.add(CONTENTTYPE, "value1");
+ values = meta.getValues(CONTENTTYPE);
+ assertEquals(1, values.length);
+ assertEquals("value1", values[0]);
+
+ meta.add(CONTENTTYPE, "value2");
+ values = meta.getValues(CONTENTTYPE);
+ assertEquals(2, values.length);
+ assertEquals("value1", values[0]);
+ assertEquals("value2", values[1]);
+
+ // NOTE : For now, the same value can be added many times.
+ // Should it be changed?
+ meta.add(CONTENTTYPE, "value1");
+ values = meta.getValues(CONTENTTYPE);
+ assertEquals(3, values.length);
+ assertEquals("value1", values[0]);
+ assertEquals("value2", values[1]);
+ assertEquals("value1", values[2]);
+ }
+
+ /** Test for the set(String, String) method. */
+ public void testSet() {
+ String[] values = null;
+ Metadata meta = new Metadata();
+
+ values = meta.getValues(CONTENTTYPE);
+ assertEquals(0, values.length);
+
+ meta.set(CONTENTTYPE, "value1");
+ values = meta.getValues(CONTENTTYPE);
+ assertEquals(1, values.length);
+ assertEquals("value1", values[0]);
+
+ meta.set(CONTENTTYPE, "value2");
+ values = meta.getValues(CONTENTTYPE);
+ assertEquals(1, values.length);
+ assertEquals("value2", values[0]);
+
+ meta.set(CONTENTTYPE, "new value 1");
+ meta.add("contenttype", "new value 2");
+ values = meta.getValues(CONTENTTYPE);
+ assertEquals(2, values.length);
+ assertEquals("new value 1", values[0]);
+ assertEquals("new value 2", values[1]);
+ }
+
+ /** Test for setAll(Properties) method. */
+ public void testSetProperties() {
+ String[] values = null;
+ Metadata meta = new Metadata();
+ Properties props = new Properties();
+
+ meta.setAll(props);
+ assertEquals(0, meta.size());
+
+ props.setProperty("name-one", "value1.1");
+ meta.setAll(props);
+ assertEquals(1, meta.size());
+ values = meta.getValues("name-one");
+ assertEquals(1, values.length);
+ assertEquals("value1.1", values[0]);
+
+ props.setProperty("name-two", "value2.1");
+ meta.setAll(props);
+ assertEquals(2, meta.size());
+ values = meta.getValues("name-one");
+ assertEquals(1, values.length);
+ assertEquals("value1.1", values[0]);
+ values = meta.getValues("name-two");
+ assertEquals(1, values.length);
+ assertEquals("value2.1", values[0]);
+ }
+
+ /** Test for get(String) method. */
+ public void testGet() {
+ Metadata meta = new Metadata();
+ assertNull(meta.get("a-name"));
+ meta.add("a-name", "value-1");
+ assertEquals("value-1", meta.get("a-name"));
+ meta.add("a-name", "value-2");
+ assertEquals("value-1", meta.get("a-name"));
+ }
+
+ /** Test for isMultiValued() method. */
+ public void testIsMultiValued() {
+ Metadata meta = new Metadata();
+ assertFalse(meta.isMultiValued("key"));
+ meta.add("key", "value1");
+ assertFalse(meta.isMultiValued("key"));
+ meta.add("key", "value2");
+ assertTrue(meta.isMultiValued("key"));
+ }
+
+ /** Test for names method. */
+ public void testNames() {
+ String[] names = null;
+ Metadata meta = new Metadata();
+ names = meta.names();
+ assertEquals(0, names.length);
+
+ meta.add("name-one", "value");
+ names = meta.names();
+ assertEquals(1, names.length);
+ assertEquals("name-one", names[0]);
+ meta.add("name-two", "value");
+ names = meta.names();
+ assertEquals(2, names.length);
+ }
+
+ /** Test for remove(String) method. */
+ public void testRemove() {
+ Metadata meta = new Metadata();
+ meta.remove("name-one");
+ assertEquals(0, meta.size());
+ meta.add("name-one", "value-1.1");
+ meta.add("name-one", "value-1.2");
+ meta.add("name-two", "value-2.2");
+ assertEquals(2, meta.size());
+ assertNotNull(meta.get("name-one"));
+ assertNotNull(meta.get("name-two"));
+ meta.remove("name-one");
+ assertEquals(1, meta.size());
+ assertNull(meta.get("name-one"));
+ assertNotNull(meta.get("name-two"));
+ meta.remove("name-two");
+ assertEquals(0, meta.size());
+ assertNull(meta.get("name-one"));
+ assertNull(meta.get("name-two"));
+ }
+
+ /** Test for equals(Object) method. */
+ public void testObject() {
+ Metadata meta1 = new Metadata();
+ Metadata meta2 = new Metadata();
+ assertFalse(meta1.equals(null));
+ assertFalse(meta1.equals("String"));
+ assertTrue(meta1.equals(meta2));
+ meta1.add("name-one", "value-1.1");
+ assertFalse(meta1.equals(meta2));
+ meta2.add("name-one", "value-1.1");
+ assertTrue(meta1.equals(meta2));
+ meta1.add("name-one", "value-1.2");
+ assertFalse(meta1.equals(meta2));
+ meta2.add("name-one", "value-1.2");
+ assertTrue(meta1.equals(meta2));
+ meta1.add("name-two", "value-2.1");
+ assertFalse(meta1.equals(meta2));
+ meta2.add("name-two", "value-2.1");
+ assertTrue(meta1.equals(meta2));
+ meta1.add("name-two", "value-2.2");
+ assertFalse(meta1.equals(meta2));
+ meta2.add("name-two", "value-2.x");
+ assertFalse(meta1.equals(meta2));
+ }
+
+}
Index: src/main/test/org/apache/tika/metadata/TestSpellCheckedMetadata.java
===================================================================
--- src/main/test/org/apache/tika/metadata/TestSpellCheckedMetadata.java (revision 0)
+++ src/main/test/org/apache/tika/metadata/TestSpellCheckedMetadata.java (revision 0)
@@ -0,0 +1,262 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.metadata;
+
+// JDK imports
+import java.util.Properties;
+
+// Junit imports
+import junit.framework.Test;
+import junit.framework.TestCase;
+import junit.framework.TestSuite;
+import junit.textui.TestRunner;
+
+/**
+ * JUnit based tests of class
+ * {@link org.apache.tika.metadata.SpellCheckedMetadata}.
+ *
+ * @author Chris Mattmann
+ * @author Jérôme Charron
+ */
+public class TestSpellCheckedMetadata extends TestCase {
+
+ private static final int NUM_ITERATIONS = 10000;
+
+ public TestSpellCheckedMetadata(String testName) {
+ super(testName);
+ }
+
+ public static Test suite() {
+ return new TestSuite(TestSpellCheckedMetadata.class);
+ }
+
+ public static void main(String[] args) {
+ TestRunner.run(suite());
+ }
+
+ /** Test for the getNormalizedName(String) method. */
+ public void testGetNormalizedName() {
+ assertEquals("Content-Type", SpellCheckedMetadata
+ .getNormalizedName("Content-Type"));
+ assertEquals("Content-Type", SpellCheckedMetadata
+ .getNormalizedName("ContentType"));
+ assertEquals("Content-Type", SpellCheckedMetadata
+ .getNormalizedName("Content-type"));
+ assertEquals("Content-Type", SpellCheckedMetadata
+ .getNormalizedName("contenttype"));
+ assertEquals("Content-Type", SpellCheckedMetadata
+ .getNormalizedName("contentype"));
+ assertEquals("Content-Type", SpellCheckedMetadata
+ .getNormalizedName("contntype"));
+ }
+
+ /** Test for the add(String, String) method. */
+ public void testAdd() {
+ String[] values = null;
+ SpellCheckedMetadata meta = new SpellCheckedMetadata();
+
+ values = meta.getValues("contentype");
+ assertEquals(0, values.length);
+
+ meta.add("contentype", "value1");
+ values = meta.getValues("contentype");
+ assertEquals(1, values.length);
+ assertEquals("value1", values[0]);
+
+ meta.add("Content-Type", "value2");
+ values = meta.getValues("contentype");
+ assertEquals(2, values.length);
+ assertEquals("value1", values[0]);
+ assertEquals("value2", values[1]);
+
+ // NOTE : For now, the same value can be added many times.
+ // Should it be changed?
+ meta.add("ContentType", "value1");
+ values = meta.getValues("Content-Type");
+ assertEquals(3, values.length);
+ assertEquals("value1", values[0]);
+ assertEquals("value2", values[1]);
+ assertEquals("value1", values[2]);
+ }
+
+ /** Test for the set(String, String) method. */
+ public void testSet() {
+ String[] values = null;
+ SpellCheckedMetadata meta = new SpellCheckedMetadata();
+
+ values = meta.getValues("contentype");
+ assertEquals(0, values.length);
+
+ meta.set("contentype", "value1");
+ values = meta.getValues("contentype");
+ assertEquals(1, values.length);
+ assertEquals("value1", values[0]);
+
+ meta.set("Content-Type", "value2");
+ values = meta.getValues("contentype");
+ assertEquals(1, values.length);
+ assertEquals("value2", values[0]);
+
+ meta.set("contenttype", "new value 1");
+ meta.add("contenttype", "new value 2");
+ values = meta.getValues("contentype");
+ assertEquals(2, values.length);
+ assertEquals("new value 1", values[0]);
+ assertEquals("new value 2", values[1]);
+ }
+
+ /** Test for setAll(Properties) method. */
+ public void testSetProperties() {
+ String[] values = null;
+ SpellCheckedMetadata meta = new SpellCheckedMetadata();
+ Properties props = new Properties();
+
+ meta.setAll(props);
+ assertEquals(0, meta.size());
+
+ props.setProperty("name-one", "value1.1");
+ meta.setAll(props);
+ assertEquals(1, meta.size());
+ values = meta.getValues("name-one");
+ assertEquals(1, values.length);
+ assertEquals("value1.1", values[0]);
+
+ props.setProperty("name-two", "value2.1");
+ meta.setAll(props);
+ assertEquals(2, meta.size());
+ values = meta.getValues("name-one");
+ assertEquals(1, values.length);
+ assertEquals("value1.1", values[0]);
+ values = meta.getValues("name-two");
+ assertEquals(1, values.length);
+ assertEquals("value2.1", values[0]);
+ }
+
+ /** Test for get(String) method. */
+ public void testGet() {
+ SpellCheckedMetadata meta = new SpellCheckedMetadata();
+ assertNull(meta.get("a-name"));
+
+ meta.add("a-name", "value-1");
+ assertEquals("value-1", meta.get("a-name"));
+ meta.add("a-name", "value-2");
+ assertEquals("value-1", meta.get("a-name"));
+ }
+
+ /** Test for isMultiValued() method. */
+ public void testIsMultiValued() {
+ SpellCheckedMetadata meta = new SpellCheckedMetadata();
+ assertFalse(meta.isMultiValued("key"));
+ meta.add("key", "value1");
+ assertFalse(meta.isMultiValued("key"));
+ meta.add("key", "value2");
+ assertTrue(meta.isMultiValued("key"));
+ }
+
+ /** Test for names method. */
+ public void testNames() {
+ String[] names = null;
+ SpellCheckedMetadata meta = new SpellCheckedMetadata();
+ names = meta.names();
+ assertEquals(0, names.length);
+
+ meta.add("name-one", "value");
+ names = meta.names();
+ assertEquals(1, names.length);
+ assertEquals("name-one", names[0]);
+ meta.add("name-two", "value");
+ names = meta.names();
+ assertEquals(2, names.length);
+ }
+
+ /** Test for remove(String) method. */
+ public void testRemove() {
+ SpellCheckedMetadata meta = new SpellCheckedMetadata();
+ meta.remove("name-one");
+ assertEquals(0, meta.size());
+ meta.add("name-one", "value-1.1");
+ meta.add("name-one", "value-1.2");
+ meta.add("name-two", "value-2.2");
+ assertEquals(2, meta.size());
+ assertNotNull(meta.get("name-one"));
+ assertNotNull(meta.get("name-two"));
+ meta.remove("name-one");
+ assertEquals(1, meta.size());
+ assertNull(meta.get("name-one"));
+ assertNotNull(meta.get("name-two"));
+ meta.remove("name-two");
+ assertEquals(0, meta.size());
+ assertNull(meta.get("name-one"));
+ assertNull(meta.get("name-two"));
+ }
+
+ /** Test for equals(Object) method. */
+ public void testObject() {
+ SpellCheckedMetadata meta1 = new SpellCheckedMetadata();
+ SpellCheckedMetadata meta2 = new SpellCheckedMetadata();
+ assertFalse(meta1.equals(null));
+ assertFalse(meta1.equals("String"));
+ assertTrue(meta1.equals(meta2));
+ meta1.add("name-one", "value-1.1");
+ assertFalse(meta1.equals(meta2));
+ meta2.add("name-one", "value-1.1");
+ assertTrue(meta1.equals(meta2));
+ meta1.add("name-one", "value-1.2");
+ assertFalse(meta1.equals(meta2));
+ meta2.add("name-one", "value-1.2");
+ assertTrue(meta1.equals(meta2));
+ meta1.add("name-two", "value-2.1");
+ assertFalse(meta1.equals(meta2));
+ meta2.add("name-two", "value-2.1");
+ assertTrue(meta1.equals(meta2));
+ meta1.add("name-two", "value-2.2");
+ assertFalse(meta1.equals(meta2));
+ meta2.add("name-two", "value-2.x");
+ assertFalse(meta1.equals(meta2));
+ }
+
+ /**
+ * IO Test method, usable only when you plan to do changes in metadata to
+ * measure relative performance impact.
+ */
+ public final void testHandlingSpeed() {
+ SpellCheckedMetadata result;
+ long start = System.currentTimeMillis();
+ for (int i = 0; i < NUM_ITERATIONS; i++) {
+ SpellCheckedMetadata scmd = constructSpellCheckedMetadata();
+ }
+ System.out.println(NUM_ITERATIONS + " spellchecked metadata I/O time:"
+ + (System.currentTimeMillis() - start) + "ms.");
+ }
+
+ /**
+ * Assembles a Spellchecked metadata Object.
+ */
+ public static final SpellCheckedMetadata constructSpellCheckedMetadata() {
+ SpellCheckedMetadata scmd = new SpellCheckedMetadata();
+ scmd.add("Content-type", "foo/bar");
+ scmd.add("Connection", "close");
+ scmd.add("Last-Modified", "Sat, 09 Dec 2006 15:09:57 GMT");
+ scmd.add("Server", "Foobar");
+ scmd.add("Date", "Sat, 09 Dec 2006 18:07:20 GMT");
+ scmd.add("Accept-Ranges", "bytes");
+ scmd.add("ETag", "\"1234567-89-01234567\"");
+ scmd.add("Content-Length", "123");
+ return scmd;
+ }
+
+}