liuslite.parser
Class Parser

java.lang.Object
  extended by liuslite.parser.Parser
Direct Known Subclasses:
HtmlParser, MsExcelParser, MsPowerPointParser, MsWordParser, OpenOfficeParser, PDFParser, RTFParser, TXTParser, XMLParser

public abstract class Parser
extends java.lang.Object

Abstract class Parser

Author:
Rida Benjelloun (ridabenjelloun@apache.org)

Constructor Summary
Parser()
           
 
Method Summary
 void configure(LiusConfig config)
          Configure parsers from mimetypes
abstract  Content getContent(java.lang.String name)
          Get a content object, this object is configured from the LiusConfig Xml.
abstract  java.util.List<Content> getContents()
          Get a List of contents objects, this objects are configured from the LiusConfig Xml file.
 java.io.InputStream getInputStream()
           
 java.lang.String getMimeType()
          Get document mime type
 ParserConfig getParserConfig()
          Return parser specific config
abstract  java.lang.String getStrContent()
          Get the string content of the document
 void setInputStream(java.io.InputStream is)
           
 void setMimeType(java.lang.String mimeType)
          Set document mime type
 
Methods inherited from class java.lang.Object
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Constructor Detail

Parser

public Parser()
Method Detail

setInputStream

public void setInputStream(java.io.InputStream is)

getInputStream

public java.io.InputStream getInputStream()

configure

public void configure(LiusConfig config)
               throws LiusException
Configure parsers from mimetypes

Throws:
LiusException

getParserConfig

public ParserConfig getParserConfig()
Return parser specific config


getMimeType

public java.lang.String getMimeType()
Get document mime type


setMimeType

public void setMimeType(java.lang.String mimeType)
Set document mime type


getStrContent

public abstract java.lang.String getStrContent()
Get the string content of the document


getContent

public abstract Content getContent(java.lang.String name)
Get a content object, this object is configured from the LiusConfig Xml. It could be a document metadata, XPath selection, regex selection or fulltext


getContents

public abstract java.util.List<Content> getContents()
Get a List of contents objects, this objects are configured from the LiusConfig Xml file. It could be a document metadata, XPath selection, regex selection or fulltext