### Eclipse Workspace Patch 1.0
#P tika-parsers
#
# The sample image contains the following metadata (exiv2 output):
#Iptc.Application2.RecordVersion              Short       1  4
#Iptc.Application2.ObjectName                 String     17  Tosteberga Ängar
#Iptc.Application2.Keywords                   String     10  grazelands
#Iptc.Application2.Keywords                   String     14  nature reserve
#Iptc.Application2.Keywords                   String     13  bird watching
#Iptc.Application2.Keywords                   String      5  coast
#Iptc.Application2.Byline                     String     12  Some Tourist
#Iptc.Application2.Caption                    String     53  Bird site in north eastern Skåne, Sweden.#(new line)
#
# The test image was tagged using ACDSee. Photoshop produces the same fields
# but the characters encoding was not understood by exiv2 or Metadata Extractor.
#
# Tested with international characters and newline. The strings in the JUnit test use
# raw bytes instead because the Tika project has no source encoding setting in maven.
#
Index: src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java
===================================================================
--- src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java	(revision 979538)
+++ src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java	(working copy)
@@ -73,4 +73,24 @@
         assertEquals("2009/10/02 23:02:49", metadata.get(Metadata.DATE));
         assertEquals("canon-55-250 moscow-birds serbor", metadata.get(Metadata.KEYWORDS));
     }
+    
+    public void testJPEGTitleAndDescription() throws Exception {
+        Metadata metadata = new Metadata();
+        metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
+        InputStream stream =
+            getClass().getResourceAsStream("/test-documents/testJPEG_commented.jpg");
+        parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+          
+        // embedded comments with non-ascii characters
+        //assertEquals("Tosteberga \u00C4ngar", metadata.get(Metadata.TITLE));
+        assertEquals("Tosteberga " + new String(new byte[]{-61, -124}) + "ngar", metadata.get(Metadata.TITLE));
+        //assertEquals("Bird site in north eastern Sk\u00E5ne, Sweden.\n(new line)", metadata.get(Metadata.DESCRIPTION));
+        assertEquals("Bird site in north eastern Sk" + new String(new byte[]{-61, -91}) + 
+        		"ne, Sweden.\n(new line)", metadata.get(Metadata.DESCRIPTION));
+        assertEquals("Some Tourist", metadata.get(Metadata.AUTHOR));
+        // xmp handles spaces in keywords, returns "bird watching, nature reserve, coast, grazelands"
+        //assertEquals("bird watching nature reserve coast grazelands", metadata.get(Metadata.KEYWORDS));
+        // ordering is odd when returned from parser as one string
+        assertEquals("grazelands nature reserve bird watching coast", metadata.get(Metadata.KEYWORDS));
+    }
 }
Index: src/main/java/org/apache/tika/parser/image/TiffExtractor.java
===================================================================
--- src/main/java/org/apache/tika/parser/image/TiffExtractor.java	(revision 979538)
+++ src/main/java/org/apache/tika/parser/image/TiffExtractor.java	(working copy)
@@ -152,6 +152,25 @@
 	    return;
 	}
 	
+	// File info
+	// Metadata Extractor does not read XMP so we need to use the values from Iptc or EXIF
+	if("Iptc".equals(tag.getDirectoryName())) {
+		if ("Object Name".equals(tag.getTagName())) {
+			metadata.set(Metadata.TITLE, tag.getDescription());
+			return;
+		}
+		if ("By-line".equals(tag.getTagName())) {
+			metadata.set(Metadata.AUTHOR, tag.getDescription());
+			return;
+		}		
+		if ("Caption/Abstract".equals(tag.getTagName())) {
+			// Looks like metadata extractor returns IPTC newlines as a single carriage return,
+			// but the exiv2 command does not so we change to line feed here because that is less surprising to users
+			metadata.set(Metadata.DESCRIPTION, tag.getDescription().replaceAll("\r\n?", "\n"));
+			return;
+		}
+	}
+	
 	// EXIF / TIFF Tags
 	Property key = null;
 	if(tag.getTagName().equals("Image Width") ||
Index: src/test/resources/test-documents/testJPEG_commented.jpg
===================================================================
Cannot display: file marked as a binary type.
svn:mime-type = image/jpeg

Property changes on: src/test/resources/test-documents/testJPEG_commented.jpg
___________________________________________________________________
Added: svn:mime-type
   + image/jpeg

