*
+ * ------------------------- */
+
+ private void readPlainContent(URL url) throws IOException {
+ String page = HttpWebClient.getHtmlPage(url.toString(), conf);
+
+ content = page.getBytes("UTF-8");
+ }
+
+ private int parseStatusLine(PushbackInputStream in, StringBuffer line) throws IOException, HttpException {
+ readLine(in, line, false);
+
+ int codeStart = line.indexOf(" ");
+ int codeEnd = line.indexOf(" ", codeStart + 1);
+
+ // handle lines with no plaintext result code, ie:
+ // "HTTP/1.1 200" vs "HTTP/1.1 200 OK"
+ if (codeEnd == -1)
+ codeEnd = line.length();
+
+ int code;
+ try {
+ code = Integer.parseInt(line.substring(codeStart + 1, codeEnd));
+ } catch (NumberFormatException e) {
+ throw new HttpException("bad status line '" + line + "': " + e.getMessage(), e);
+ }
+
+ return code;
+ }
+
+ private void processHeaderLine(StringBuffer line) throws IOException, HttpException {
+
+ int colonIndex = line.indexOf(":"); // key is up to colon
+ if (colonIndex == -1) {
+ int i;
+ for (i = 0; i < line.length(); i++)
+ if (!Character.isWhitespace(line.charAt(i)))
+ break;
+ if (i == line.length())
+ return;
+ throw new HttpException("No colon in header:" + line);
+ }
+ String key = line.substring(0, colonIndex);
+
+ int valueStart = colonIndex + 1; // skip whitespace
+ while (valueStart < line.length()) {
+ int c = line.charAt(valueStart);
+ if (c != ' ' && c != '\t')
+ break;
+ valueStart++;
+ }
+ String value = line.substring(valueStart);
+ headers.set(key, value);
+ }
+
+ // Adds headers to our headers Metadata
+ private void parseHeaders(PushbackInputStream in, StringBuffer line) throws IOException, HttpException {
+
+ while (readLine(in, line, true) != 0) {
+
+ // handle HTTP responses with missing blank line after headers
+ int pos;
+ if (((pos = line.indexOf(" 0) {
+ // at EOL -- check for continued line if the current
+ // (possibly continued) line wasn't blank
+ if (allowContinuedLine)
+ switch (peek(in)) {
+ case ' ':
+ case '\t': // line is continued
+ in.read();
+ continue;
+ }
+ }
+ return line.length(); // else complete
+ default:
+ line.append((char) c);
+ }
+ }
+ throw new EOFException();
+ }
+
+ private static int peek(PushbackInputStream in) throws IOException {
+ int value = in.read();
+ in.unread(value);
+ return value;
+ }
+}
Index: src/plugin/protocol-selenium/src/java/org/apache/nutch/protocol/selenium/package.html
===================================================================
--- src/plugin/protocol-selenium/src/java/org/apache/nutch/protocol/selenium/package.html (revision 0)
+++ src/plugin/protocol-selenium/src/java/org/apache/nutch/protocol/selenium/package.html (working copy)
@@ -0,0 +1,5 @@
+
+
+Protocol plugin which supports retrieving documents via selenium.
+
+
Index: src/plugin/protocol-selenium/src/target/classes/org/apache/nutch/protocol/htmlunit/package.html
===================================================================
--- src/plugin/protocol-selenium/src/target/classes/org/apache/nutch/protocol/htmlunit/package.html (revision 0)
+++ src/plugin/protocol-selenium/src/target/classes/org/apache/nutch/protocol/htmlunit/package.html (working copy)
@@ -0,0 +1,5 @@
+
+
+Protocol plugin which supports retrieving documents via the htmlunit.
+
+