package gov.nist.core; import java.util.Hashtable; import java.util.Vector; import java.text.ParseException; /** A lexical analyzer that is used by all parsers in our implementation. * * *@author M. Ranganathan */ public class LexerCore extends StringTokenizer { // IMPORTANT - All keyword matches should be between START and END public static final int START = 2048; public static final int END = START + 2048; // IMPORTANT -- This should be < END public static final int ID = END - 1; public static final int SAFE = END - 2; // Individial token classes. public static final int WHITESPACE = END + 1; public static final int DIGIT = END + 2; public static final int ALPHA = END + 3; public static final int BACKSLASH = (int) '\\'; public static final int QUOTE = (int) '\''; public static final int AT = (int) '@'; public static final int SP = (int) ' '; public static final int HT = (int) '\t'; public static final int COLON = (int) ':'; public static final int STAR = (int) '*'; public static final int DOLLAR = (int) '$'; public static final int PLUS = (int) '+'; public static final int POUND = (int) '#'; public static final int MINUS = (int) '-'; public static final int DOUBLEQUOTE = (int) '\"'; public static final int TILDE = (int) '~'; public static final int BACK_QUOTE = (int) '`'; public static final int NULL = (int) '\0'; public static final int EQUALS = (int) '='; public static final int SEMICOLON = (int) ';'; public static final int SLASH = (int) '/'; public static final int L_SQUARE_BRACKET = (int) '['; public static final int R_SQUARE_BRACKET = (int) ']'; public static final int R_CURLY = (int) '}'; public static final int L_CURLY = (int) '{'; public static final int HAT = (int) '^'; public static final int BAR = (int) '|'; public static final int DOT = (int) '.'; public static final int EXCLAMATION = (int) '!'; public static final int LPAREN = (int) '('; public static final int RPAREN = (int) ')'; public static final int GREATER_THAN = (int) '>'; public static final int LESS_THAN = (int) '<'; public static final int PERCENT = (int) '%'; public static final int QUESTION = (int) '?'; public static final int AND = (int) '&'; public static final int UNDERSCORE = (int) '_'; protected static Hashtable globalSymbolTable; protected static Hashtable lexerTables; protected Hashtable currentLexer; protected String currentLexerName; protected Token currentMatch; static { globalSymbolTable = new Hashtable(); lexerTables = new Hashtable(); } protected void addKeyword(String name, int value) { // System.out.println("addKeyword " + name + " value = " + value); // new Exception().printStackTrace(); Integer val = new Integer(value); currentLexer.put(name, val); if (!globalSymbolTable.containsKey(val)) globalSymbolTable.put(val, name); } public String lookupToken(int value) { if (value > START) { return (String) globalSymbolTable.get(new Integer(value)); } else { Character ch = new Character((char) value); return ch.toString(); } } protected Hashtable addLexer(String lexerName) { currentLexer = (Hashtable) lexerTables.get(lexerName); if (currentLexer == null) { currentLexer = new Hashtable(); lexerTables.put(lexerName, currentLexer); } return currentLexer; } //public abstract void selectLexer(String lexerName); public void selectLexer(String lexerName) { this.currentLexerName = lexerName; } protected LexerCore() { this.currentLexer = new Hashtable(); this.currentLexerName = "charLexer"; } public LexerCore(String lexerName) { this(); this.currentLexerName = lexerName; } /** Initialize the lexer with a buffer. */ public LexerCore(String lexerName, String buffer) { this(lexerName); this.buffer = buffer; } /** Peek the next id but dont move the buffer pointer forward. */ public String peekNextId() { int oldPtr = ptr; String retval = ttoken(); savedPtr = ptr; ptr = oldPtr; return retval; } /** Get the next id. */ public String getNextId() { return ttoken(); } // call this after you call match public Token getNextToken() { return this.currentMatch; } /** Look ahead for one token. */ public Token peekNextToken() throws ParseException { return (Token) peekNextToken(1).elementAt(0); } public Vector peekNextToken(int ntokens) throws ParseException { int old = ptr; Vector retval = new Vector(); for (int i = 0; i < ntokens; i++) { Token tok = new Token(); if (startsId()) { String id = ttoken(); tok.tokenValue = id; if (currentLexer.containsKey(id.toUpperCase())) { Integer type = (Integer) currentLexer.get(id.toUpperCase()); tok.tokenType = type.intValue(); } else tok.tokenType = ID; } else { char nextChar = getNextChar(); tok.tokenValue = new StringBuffer().append(nextChar).toString(); if (isAlpha(nextChar)) { tok.tokenType = ALPHA; } else if (isDigit(nextChar)) { tok.tokenType = DIGIT; } else tok.tokenType = (int) nextChar; } retval.addElement(tok); } savedPtr = ptr; ptr = old; return retval; } /** Match the given token or throw an exception if no such token * can be matched. */ public Token match(int tok) throws ParseException { if (Debug.parserDebug) { Debug.println("match " + tok); } if (tok > START && tok < END) { if (tok == ID) { // Generic ID sought. if (!startsId()) throw new ParseException(buffer + "\nID expected", ptr); String id = getNextId(); this.currentMatch = new Token(); this.currentMatch.tokenValue = id; this.currentMatch.tokenType = ID; } else if (tok == SAFE) { if (!startsSafeToken()) throw new ParseException(buffer + "\nID expected", ptr); String id = ttokenSafe(); this.currentMatch = new Token(); this.currentMatch.tokenValue = id; this.currentMatch.tokenType = SAFE; } else { String nexttok = getNextId(); Integer cur = (Integer) currentLexer.get(nexttok.toUpperCase()); if (cur == null || cur.intValue() != tok) throw new ParseException( buffer + "\nUnexpected Token : " + nexttok, ptr); this.currentMatch = new Token(); this.currentMatch.tokenValue = nexttok; this.currentMatch.tokenType = tok; } } else if (tok > END) { // Character classes. char next = lookAhead(0); if (tok == DIGIT) { if (!isDigit(next)) throw new ParseException(buffer + "\nExpecting DIGIT", ptr); this.currentMatch = new Token(); this.currentMatch.tokenValue = new StringBuffer().append(next).toString(); this.currentMatch.tokenType = tok; consume(1); } else if (tok == ALPHA) { if (!isAlpha(next)) throw new ParseException(buffer + "\nExpecting ALPHA", ptr); this.currentMatch = new Token(); this.currentMatch.tokenValue = new StringBuffer().append(next).toString(); this.currentMatch.tokenType = tok; consume(1); } } else { // This is a direct character spec. Character ch = new Character((char) tok); char next = lookAhead(0); if (next == ch.charValue()) { this.currentMatch = new Token(); this.currentMatch.tokenValue = new StringBuffer().append(ch.charValue()).toString(); this.currentMatch.tokenType = tok; consume(1); } else throw new ParseException( buffer + "\nExpecting >>>" + ch.charValue() + "<<< got >>>" + next + "<<<", ptr); } return this.currentMatch; } public void SPorHT() { try { while (lookAhead(0) == ' ' || lookAhead(0) == '\t') consume(1); } catch (ParseException ex) { // Ignore } } public boolean startsId() { try { char nextChar = lookAhead(0); return ( isAlpha(nextChar) || isDigit(nextChar) || nextChar == '_' || nextChar == '+' || nextChar == '-' || nextChar == '!' || nextChar == '`' || nextChar == '\'' || nextChar == '~' || nextChar == '%' // bug fix by Bruno Konik || nextChar == '.' || nextChar == '*'); } catch (ParseException ex) { return false; } } public boolean startsSafeToken() { try { char nextChar = lookAhead(0); return ( isAlpha(nextChar) || isDigit(nextChar) || nextChar == '_' || nextChar == '+' || nextChar == '-' || nextChar == '!' || nextChar == '`' || nextChar == '\'' || nextChar == '~' || nextChar == '.' || nextChar == '/' || nextChar == '}' || nextChar == '{' || nextChar == ']' || nextChar == '[' || nextChar == '^' || nextChar == '|' || nextChar == '~' || nextChar == '#' || nextChar == '@' || nextChar == '$' || nextChar == ':' || nextChar == ';' || nextChar == '?' || nextChar == '\"' || nextChar == '*' ) ; } catch (ParseException ex) { return false; } } public String ttoken() { StringBuffer nextId = new StringBuffer(); try { while (hasMoreChars()) { char nextChar = lookAhead(0); //Debug.println("nextChar = " + nextChar); if (isAlpha(nextChar) || isDigit(nextChar) || nextChar == '_' || nextChar == '+' || nextChar == '-' || nextChar == '!' || nextChar == '`' || nextChar == '\'' || nextChar == '~' || nextChar == '%' // bug fix by Bruno Konik || nextChar == '.' || nextChar == '*') { consume(1); nextId.append(nextChar); } else break; } return nextId.toString(); } catch (ParseException ex) { return nextId.toString(); } } public String ttokenAllowSpace() { StringBuffer nextId = new StringBuffer(); try { while (hasMoreChars()) { char nextChar = lookAhead(0); //Debug.println("nextChar = " + nextChar); if (isAlpha(nextChar) || isDigit(nextChar) || nextChar == '_' || nextChar == '+' || nextChar == '-' || nextChar == '!' || nextChar == '`' || nextChar == '\'' || nextChar == '~' || nextChar == '.' || nextChar == ' ' || nextChar == '\t' || nextChar == '*') { nextId.append(nextChar); consume(1); } else break; } return nextId.toString(); } catch (ParseException ex) { return nextId.toString(); } } public String ttokenSafe() { StringBuffer nextId = new StringBuffer(); try { while (hasMoreChars()) { char nextChar = lookAhead(0); if (isAlpha(nextChar) || isDigit(nextChar) || nextChar == '_' || nextChar == '+' || nextChar == '-' || nextChar == '!' || nextChar == '`' || nextChar == '\'' || nextChar == '~' || nextChar == '.' || nextChar == '/' || nextChar == '}' || nextChar == '{' || nextChar == ']' || nextChar == '[' || nextChar == '^' || nextChar == '|' || nextChar == '~' || nextChar == '#' || nextChar == '@' || nextChar == '$' || nextChar == ':' || nextChar == ';' || nextChar == '?' || nextChar == '\"' || nextChar == '*' ) { nextId.append(nextChar); consume(1); } else break; } return nextId.toString(); } catch (ParseException ex) { return nextId.toString(); } } /** Parse a comment string cursor is at a ". Leave cursor at closing " *@return the substring containing the quoted string excluding the * closing quote. */ public String quotedString() throws ParseException { StringBuffer retval = new StringBuffer(); if (lookAhead(0) != '\"') return null; consume(1); while (true) { char next = getNextChar(); if (next == '\"') { // Got to the terminating quote. break; } else if (next == '\0') { throw new ParseException( this.buffer + " :unexpected EOL", this.ptr); } else if (next == '\\') { retval.append(next); next = getNextChar(); retval.append(next); } else { retval.append(next); } } return retval.toString(); } /** Parse a comment string cursor is at a "(". Leave cursor at ) *@return the substring containing the comment excluding the * closing brace. */ public String comment() throws ParseException { StringBuffer retval = new StringBuffer(); if (lookAhead(0) != '(') return null; consume(1); while (true) { char next = getNextChar(); if (next == ')') { break; } else if (next == '\0') { throw new ParseException( this.buffer + " :unexpected EOL", this.ptr); } else if (next == '\\') { retval.append(next); next = getNextChar(); if (next == '\0') throw new ParseException( this.buffer + " : unexpected EOL", this.ptr); retval.append(next); } else { retval.append(next); } } return retval.toString(); } /** Return a substring containing no semicolons. *@return a substring containing no semicolons. */ public String byteStringNoSemicolon() { StringBuffer retval = new StringBuffer(); try { while (true) { char next = lookAhead(0); // bug fix from Ben Evans. if (next == '\0' || next == '\n' || next == ';' || next == ',' ) { break; } else { consume(1); retval.append(next); } } } catch (ParseException ex) { return retval.toString(); } return retval.toString(); } /** Return a substring containing no commas *@return a substring containing no commas. */ public String byteStringNoComma() { StringBuffer retval = new StringBuffer(); try { while (true) { char next = lookAhead(0); if (next == '\n' || next == ',') { break; } else { consume(1); retval.append(next); } } } catch (ParseException ex) { } return retval.toString(); } public static String charAsString(char ch) { return new Character(ch).toString(); } /** Lookahead in the inputBuffer for n chars and return as a string. * Do not consume the input. */ public String charAsString(int nchars) { StringBuffer retval = new StringBuffer(); try { for (int i = 0; i < nchars; i++) { retval.append(lookAhead(i)); } return retval.toString(); } catch (ParseException ex) { return retval.toString(); } } /** Get and consume the next number. *@return a substring corresponding to a number *(i.e. sequence of digits). */ public String number() throws ParseException { StringBuffer retval = new StringBuffer(); try { if (!isDigit(lookAhead(0))) { throw new ParseException( buffer + ": Unexpected token at " + lookAhead(0), ptr); } retval.append(lookAhead(0)); consume(1); while (true) { char next = lookAhead(0); if (isDigit(next)) { retval.append(next); consume(1); } else break; } return retval.toString(); } catch (ParseException ex) { return retval.toString(); } } /** Mark the position for backtracking. *@return the current location of the pointer. */ public int markInputPosition() { return ptr; } /** Rewind the input ptr to the marked position. *@param position - the position to rewind the parser to. */ public void rewindInputPosition(int position) { this.ptr = position; } /** Get the rest of the String * @return rest of the buffer. */ public String getRest() { if (ptr >= buffer.length()) return null; else return buffer.substring(ptr); } /** Get the sub-String until the character is encountered * @param c the character to match * @return the substring that matches. */ public String getString(char c) throws ParseException { StringBuffer retval = new StringBuffer(); while (true) { char next = lookAhead(0); //System.out.println(" next = [" + next + ']' + "ptr = " + ptr); //System.out.println(next == '\0'); if (next == '\0') { throw new ParseException( this.buffer + "unexpected EOL", this.ptr); } else if (next == c) { consume(1); break; } else if (next == '\\') { consume(1); char nextchar = lookAhead(0); if (nextchar == '\0') { throw new ParseException( this.buffer + "unexpected EOL", this.ptr); } else { consume(1); retval.append(nextchar); } } else { consume(1); retval.append(next); } } return retval.toString(); } /** Get the read pointer. */ public int getPtr() { return this.ptr; } /** Get the buffer. */ public String getBuffer() { return this.buffer; } /** Create a parse exception. */ public ParseException createParseException() { return new ParseException(this.buffer, this.ptr); } } /* * $Log: LexerCore.java,v $ * Revision 1.1 2005/10/05 20:54:21 mranga * * Added * * Revision 1.11 2004/10/21 14:57:16 mranga * Reviewed by: mranga * * Fixed origin field parser for sdp. * * Revision 1.10 2004/08/10 23:15:50 mranga * Reviewed by: mranga * improved error message * * Revision 1.9 2004/03/10 03:37:25 mranga * Submitted by: Ben Evans * Reviewed by: mranga * Bug in parsing of via header when there is a number of comma separated headers. * * Revision 1.8 2004/02/06 20:15:55 mranga * Submitted by: Bruno Konik * Reviewed by: mranga * Fixed character sets for parsing of urls and methods. * * Revision 1.7 2004/01/22 14:23:45 mranga * Reviewed by: mranga * Fixed some minor formatting issues. * * Revision 1.6 2004/01/22 13:26:27 sverker * Issue number: * Obtained from: * Submitted by: sverker * Reviewed by: mranga * * Major reformat of code to conform with style guide. Resolved compiler and javadoc warnings. Added CVS tags. * * CVS: ---------------------------------------------------------------------- * CVS: Issue number: * CVS: If this change addresses one or more issues, * CVS: then enter the issue number(s) here. * CVS: Obtained from: * CVS: If this change has been taken from another system, * CVS: then name the system in this line, otherwise delete it. * CVS: Submitted by: * CVS: If this code has been contributed to the project by someone else; i.e., * CVS: they sent us a patch or a set of diffs, then include their name/email * CVS: address here. If this is your work then delete this line. * CVS: Reviewed by: * CVS: If we are doing pre-commit code reviews and someone else has * CVS: reviewed your changes, include their name(s) here. * CVS: If you have not had it reviewed then delete this line. * */