Index: CHANGES.txt
===================================================================
--- CHANGES.txt	(revision 1658019)
+++ CHANGES.txt	(working copy)
@@ -1,5 +1,8 @@
 Release 1.8 - Current Development
 
+  * A basic wrapper around the UNIX file command was 
+    added to extract Strings. (TIKA-1541)
+
   * Add test files and detection mechanism for Gridded
     Binary (GRIB) files. (TIKA-1539)
 
Index: tika-parsers/src/main/java/org/apache/tika/parser/strings/StringsConfig.java
===================================================================
--- tika-parsers/src/main/java/org/apache/tika/parser/strings/StringsConfig.java	(revision 0)
+++ tika-parsers/src/main/java/org/apache/tika/parser/strings/StringsConfig.java	(working copy)
@@ -0,0 +1,120 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.strings;
+
+import java.io.File;
+import java.io.Serializable;
+
+/**
+ * Configuration for the "strings" (or strings-alternative) command.
+ * 
+ */
+public class StringsConfig implements Serializable {
+	/**
+	 * Serial version UID
+	 */
+	private static final long serialVersionUID = -1465227101645003594L;
+
+	private String stringsCmd = "strings";
+
+	private String stringsPath = "";
+
+	private int timeout = 120;
+
+	/**
+	 * Default constructor.
+	 */
+	public StringsConfig() {
+		// TODO
+	}
+
+	/**
+	 * Returns the "strings" installation folder.
+	 * 
+	 * @return the "strings" installation folder.
+	 */
+	public String getStringsPath() {
+		return this.stringsPath;
+	}
+
+	/**
+	 * Returns the "strings" command name.
+	 * 
+	 * @return the "strings" command name.
+	 */
+	public String getStringsCommand() {
+		return this.stringsCmd;
+	}
+
+	/**
+	 * Returns the command-line options for the "strings" command.
+	 * 
+	 * @return the command-line options for the "strings" command.
+	 */
+	public String getOptions() {
+		// TODO
+		return null;
+	}
+
+	/**
+	 * Returns the maximum time (in seconds) to wait for the "strings" command
+	 * to terminate.
+	 * 
+	 * @return the maximum time (in seconds) to wait for the "strings" command
+	 *         to terminate.
+	 */
+	public int getTimeout() {
+		return this.timeout;
+	}
+
+	/**
+	 * Sets the "strings" installation folder.
+	 * 
+	 * @param path the "strings" installation folder.
+	 */
+	public void setStringsPath(String path) {
+		char lastChar = path.charAt(path.length() - 1);
+
+		if (lastChar != File.separatorChar) {
+			path += File.separatorChar;
+		}
+		this.stringsPath = path;
+	}
+
+	/**
+	 * Sets the "strings" (or strings-alternative) command name. It allows to
+	 * use a strings-alternative command, if available.
+	 * 
+	 * @param command the "strings" (or strings-alternative) command name.
+	 */
+	public void setStringsCommand(String command) {
+		this.stringsCmd = command;
+	}
+
+	/**
+	 * Sets command-line options for the "strings" command.
+	 * @param options the command-line options for the "strings" command.
+	 */
+	public void setOptions(String options) {
+		// TODO
+	}
+
+	/**
+	 * Sets the maximum time (in seconds) to wait for the "strings" command to terminate.
+	 * @param timeout the maximum time (in seconds) to wait for the "strings" command to terminate.
+	 */
+	public void setTimeout(int timeout) {
+		this.timeout = timeout;
+	}
+}
Index: tika-parsers/src/main/java/org/apache/tika/parser/strings/StringsParser.java
===================================================================
--- tika-parsers/src/main/java/org/apache/tika/parser/strings/StringsParser.java	(revision 0)
+++ tika-parsers/src/main/java/org/apache/tika/parser/strings/StringsParser.java	(working copy)
@@ -0,0 +1,254 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.strings;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.Collections;
+import java.util.Set;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.FutureTask;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AbstractParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.external.ExternalParser;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+/**
+ * Parser that uses the "strings" (or strings-alternative) command to find the
+ * printable strings in a object, or other binary, file
+ * (application/octet-stream). Useful as "best-effort" parser for files detected
+ * as application/octet-stream.
+ * 
+ */
+public class StringsParser extends AbstractParser {
+	/**
+	 * Serial version UID
+	 */
+	private static final long serialVersionUID = 802566634661575025L;
+
+	private static final Set<MediaType> SUPPORTED_TYPES = Collections
+			.singleton(MediaType.OCTET_STREAM);
+
+	private static final StringsConfig DEFAULT_CONFIG = new StringsConfig();
+
+	@Override
+	public Set<MediaType> getSupportedTypes(ParseContext context) {
+		return SUPPORTED_TYPES;
+	}
+
+	@Override
+	public void parse(InputStream stream, ContentHandler handler,
+			Metadata metadata, ParseContext context) throws IOException,
+			SAXException, TikaException {
+		StringsConfig config = context.get(StringsConfig.class, DEFAULT_CONFIG);
+
+		if (!hasStrings(config)) {
+			return;
+		}
+
+		TikaInputStream tis = TikaInputStream.get(stream);
+		File input = tis.getFile();
+
+		// Metadata
+		metadata.set(Metadata.CONTENT_TYPE, "application/octet-stream");
+		metadata.set("strings:command", config.getStringsCommand());
+		metadata.set("strings:options", config.getOptions());
+		metadata.set("strings:file_output", doFile(input));
+
+		int totalBytes = 0;
+
+		// Content
+		XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+
+		xhtml.startDocument();
+
+		totalBytes = doStrings(input, config, xhtml);
+
+		xhtml.endDocument();
+
+		// Metadata
+		metadata.set("strings:length", "" + totalBytes);
+	}
+
+	/**
+	 * Checks if the "strings" command is supported.
+	 * 
+	 * @param config
+	 *            {@see StringsConfig} object used for testing the strings
+	 *            command.
+	 * @return Returns returns {@code true} if the strings command is supported.
+	 */
+	private boolean hasStrings(StringsConfig config) {
+		String stringsProg = config.getStringsPath()
+				+ config.getStringsCommand();
+		String[] checkCmd = { stringsProg, "--version" };
+
+		boolean hasStrings = ExternalParser.check(checkCmd);
+
+		return hasStrings;
+	}
+
+	/**
+	 * Runs the "strings" command on the given file.
+	 * 
+	 * @param input
+	 *            {@see File} object that represents the file to parse.
+	 * @param config
+	 *            {@see StringsConfig} object including the strings
+	 *            configuration.
+	 * @param xhtml
+	 *            {@see XHTMLContentHandler} object.
+	 * @return the total number of bytes read using the strings command.
+	 * @throws IOException
+	 *             if any I/O error occurs.
+	 * @throws TikaException
+	 *             if the parsing process has been interrupted.
+	 * @throws SAXException
+	 */
+	private int doStrings(File input, StringsConfig config,
+			XHTMLContentHandler xhtml) throws IOException, TikaException,
+			SAXException {
+		String[] cmd = { config.getStringsPath() + config.getStringsCommand(),
+				input.getPath() };
+
+		ProcessBuilder pb = new ProcessBuilder(cmd);
+		final Process process = pb.start();
+
+		InputStream out = process.getInputStream();
+
+		FutureTask<Integer> waitTask = new FutureTask<Integer>(
+				new Callable<Integer>() {
+					public Integer call() throws Exception {
+						return process.waitFor();
+					}
+				});
+
+		Thread waitThread = new Thread(waitTask);
+		waitThread.start();
+
+		// Reads content printed out by "strings" command
+		int totalBytes = 0;
+		totalBytes = extractOutput(out, xhtml);
+
+		try {
+			waitTask.get(config.getTimeout(), TimeUnit.SECONDS);
+
+		} catch (InterruptedException ie) {
+			waitThread.interrupt();
+			process.destroy();
+			Thread.currentThread().interrupt();
+			throw new TikaException(StringsParser.class.getName()
+					+ " interrupted", ie);
+
+		} catch (ExecutionException ee) {
+			// should not be thrown
+
+		} catch (TimeoutException te) {
+			waitThread.interrupt();
+			process.destroy();
+			throw new TikaException(StringsParser.class.getName() + " timeout",
+					te);
+		}
+
+		return totalBytes;
+	}
+
+	/**
+	 * Extracts ASCII strings using the "strings" command.
+	 * 
+	 * @param stream
+	 *            {@see InputStream} object used for reading the binary file.
+	 * @param xhtml
+	 *            {@see XHTMLContentHandler} object.
+	 * @return the total number of bytes read using the "strings" command.
+	 * @throws SAXException
+	 *             if the content element could not be written.
+	 * @throws IOException
+	 *             if any I/O error occurs.
+	 */
+	private int extractOutput(InputStream stream, XHTMLContentHandler xhtml)
+			throws SAXException, IOException {
+
+		char[] buffer = new char[1024];
+		BufferedReader reader = null;
+		int totalBytes = 0;
+
+		try {
+			reader = new BufferedReader(new InputStreamReader(stream));
+
+			int n = 0;
+			while ((n = reader.read(buffer)) != -1) {
+				if (n > 0) {
+					xhtml.characters(buffer, 0, n);
+				}
+				totalBytes += n;
+			}
+
+		} finally {
+			reader.close();
+		}
+
+		return totalBytes;
+	}
+
+	/**
+	 * Runs the "file" command on the given file that aims at providing an
+	 * alternative way to determine the file type.
+	 * 
+	 * @param input
+	 *            {@see File} object that represents the file to detect.
+	 * @return the file type provided by the "file" command using the "-b"
+	 *         option (it stands for "brief mode").
+	 * @throws IOException if any I/O error occurs.
+	 */
+	private String doFile(File input) throws IOException {
+		String[] cmd = { "file", "-b", input.getPath() };
+
+		ProcessBuilder pb = new ProcessBuilder(cmd);
+		final Process process = pb.start();
+
+		InputStream out = process.getInputStream();
+
+		BufferedReader reader = null;
+		String fileOutput = null;
+
+		try {
+			reader = new BufferedReader(new InputStreamReader(out));
+			fileOutput = reader.readLine();
+
+		} catch (IOException ioe) {
+			// TODO
+			System.err
+					.println("An error occurred in reading output of the file command: "
+							+ ioe.getMessage());
+		} finally {
+			reader.close();
+		}
+
+		return fileOutput;
+	}
+}
