/** * Copyright 2005 The Apache Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.nutch.parse.html; import junit.framework.TestCase; import org.apache.nutch.parse.html.RobotsMetaProcessor.*; import java.io.ByteArrayInputStream; import java.net.URL; import org.cyberneko.html.parsers.*; import org.xml.sax.*; import org.w3c.dom.*; import org.apache.html.dom.*; /** Unit tests for RobotsMetaProcessor. */ public class TestRobotsMetaProcessor extends TestCase { public TestRobotsMetaProcessor(String name) { super(name); } /* some sample tags: */ public static String[] tests= { "test page" + " " + " " + "" + " some text" + "", "test page" + " " + " " + "" + " some text" + "", "test page" + " " + " " + "" + " some text" + "", "test page" + " " + "" + " some text" + "", "test page" + " " + "" + " some text" + "", "test page" + " " + "" + " some text" + "", "test page" + " " + "" + " some text" + "", "test page" + " " + "" + "" + " some text" + "", "test page" + " " + "" + "" + " some text" + "", }; public static final boolean[][] answers= { {true, true, true}, // NONE {false, false, true}, // all {true, true, true}, // nOnE {true, true, false}, // none {true, true, false}, // noindex,nofollow {true, false, false}, // noindex,follow {false, true, false}, // index,nofollow {false, false, false}, // index,follow {false, false, false}, // missing! }; private URL[][] currURLsAndAnswers; public void testRobotsMetaProcessor() { DOMFragmentParser parser= new DOMFragmentParser();; try { currURLsAndAnswers= new URL[][] { {new URL("http://www.nutch.org"), null}, {new URL("http://www.nutch.org"), null}, {new URL("http://www.nutch.org"), null}, {new URL("http://www.nutch.org"), null}, {new URL("http://www.nutch.org"), null}, {new URL("http://www.nutch.org"), null}, {new URL("http://www.nutch.org"), null}, {new URL("http://www.nutch.org/foo/"), new URL("http://www.nutch.org/")}, {new URL("http://www.nutch.org"), new URL("http://www.nutch.org/base/")} }; } catch (Exception e) { assertTrue("couldn't make test URLs!", false); } for (int i= 0; i < tests.length; i++) { byte[] bytes= tests[i].getBytes(); DocumentFragment node = new HTMLDocumentImpl().createDocumentFragment(); try { parser.parse(new InputSource(new ByteArrayInputStream(bytes)), node); } catch (Exception e) { e.printStackTrace(); } RobotsMetaIndicator robotsMeta= new RobotsMetaIndicator(); RobotsMetaProcessor.getRobotsMetaDirectives(robotsMeta, node, currURLsAndAnswers[i][0]); assertTrue("got index wrong on test " + i, robotsMeta.getNoIndex() == answers[i][0]); assertTrue("got follow wrong on test " + i, robotsMeta.getNoFollow() == answers[i][1]); assertTrue("got cache wrong on test " + i, robotsMeta.getNoCache() == answers[i][2]); assertTrue("got base href wrong on test " + i + " (got " + robotsMeta.getBaseHref() + ")", ( (robotsMeta.getBaseHref() == null) && (currURLsAndAnswers[i][1] == null) ) || ( (robotsMeta.getBaseHref() != null) && robotsMeta.getBaseHref().equals( currURLsAndAnswers[i][1]) ) ); } } }