*** nutch.bak/src/java/org/apache/nutch/net/URLFilters.java	Thu Oct 12 06:48:42 2006
--- nutch/src/java/org/apache/nutch/net/URLFilters.java	Sat Nov 25 13:22:58 2006
***************
*** 76,85 ****
  
    /** Run all defined filters. Assume logical AND. */
    public String filter(String urlString) throws URLFilterException {
      for (int i = 0; i < this.filters.length; i++) {
!       if (urlString == null)
          return null;
!       urlString = this.filters[i].filter(urlString);
      }
      return urlString;
    }
--- 76,94 ----
  
    /** Run all defined filters. Assume logical AND. */
    public String filter(String urlString) throws URLFilterException {
+     String tmp;
      for (int i = 0; i < this.filters.length; i++) {
!       tmp = this.filters[i].filter(urlString);
!       if (tmp == null) {
          return null;
!       }
!       else if (tmp.equals("_PASS_")) {
!         // "_PASS_" is a magic cookie that short-circuits the remaining tests.
!         return urlString;
!       }
!       else {
!         urlString = tmp;
!       }
      }
      return urlString;
    }
*** nutch.bak/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java	Thu Oct 12 06:48:42 2006
--- nutch/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java	Sat Nov 25 13:23:18 2006
***************
*** 14,20 ****
   * limitations under the License.
   */
  
! // $Id: PrefixURLFilter.java,v 1.2 2005/02/07 19:10:37 cutting Exp $
  
  package org.apache.nutch.urlfilter.prefix;
  
--- 14,20 ----
   * limitations under the License.
   */
  
! //$Id: PrefixURLFilter.java,v 1.2 2005/02/07 19:10:37 cutting Exp $
  
  package org.apache.nutch.urlfilter.prefix;
  
***************
*** 53,60 ****
  
    // read in attribute "file" of this plugin.
    private static String attributeFile = null;
  
!   private TrieStringMatcher trie;
  
    private Configuration conf;
  
--- 53,62 ----
  
    // read in attribute "file" of this plugin.
    private static String attributeFile = null;
+   private static String shortCircuitPrefix = "SHORTCIRCUIT:";
  
!   private TrieStringMatcher trie;					// "regular" trie
!   private TrieStringMatcher scTrie = null;		// trie for short-circuited matches
  
    private Configuration conf;
  
***************
*** 63,83 ****
    }
  
    public PrefixURLFilter(String filename) throws IOException {
!     trie = readConfigurationFile(new FileReader(filename));
    }
  
    public String filter(String url) {
!     if (trie.shortestMatch(url) == null)
!       return null;
!     else
!       return url;
    }
  
!   private TrieStringMatcher readConfigurationFile(Reader reader)
      throws IOException {
      
      BufferedReader in=new BufferedReader(reader);
      List urlprefixes = new ArrayList();
      String line;
  
      while((line=in.readLine())!=null) {
--- 65,86 ----
    }
  
    public PrefixURLFilter(String filename) throws IOException {
!     readConfigurationFile(new FileReader(filename));
    }
  
    public String filter(String url) {
!     if (scTrie != null && scTrie.shortestMatch(url) != null) {
!       return "_PASS_"; 					// short-circuit match
!     }
!     return (trie.shortestMatch(url) == null) ? null : url;
    }
  
!   private void readConfigurationFile(Reader reader)
    throws IOException {
  
      BufferedReader in=new BufferedReader(reader);
      List urlprefixes = new ArrayList();
+     List scurlprefixes = new ArrayList();
      String line;
  
      while((line=in.readLine())!=null) {
***************
*** 89,99 ****
        case ' ' : case '\n' : case '#' :           // skip blank & comment lines
          continue;
        default :
  	urlprefixes.add(line);
        }
      }
  
!     return new PrefixStringMatcher(urlprefixes);
    }
  
    public static void main(String args[])
--- 92,116 ----
        case ' ' : case '\n' : case '#' :           // skip blank & comment lines
          continue;
        default :
+         if (shortCircuitPrefix.equals(line.subSequence(0,shortCircuitPrefix.length()))) {
+           // Beginning of line matches SHORTCIRCUIT: token. Make this a short circuit prefix
+           line = line.subSequence(shortCircuitPrefix.length(), line.length()).toString();
+           if (LOG.isInfoEnabled()) {
+             LOG.info("adding short circuit prefix " + line);
+           }
+           scurlprefixes.add(line);
+         }
+         else {
+           if (LOG.isInfoEnabled()) {
+             LOG.info("adding regular prefix " + line);
+           }
            urlprefixes.add(line);
          }
        }
+     }
  
!     trie = new PrefixStringMatcher(urlprefixes);
!     scTrie = new PrefixStringMatcher(scurlprefixes);
    }
  
    public static void main(String args[])
***************
*** 130,164 ****
      }
      if (attributeFile != null && attributeFile.trim().equals(""))
        attributeFile = null;
!     if (attributeFile != null) {
        if (LOG.isInfoEnabled()) {
          LOG.info("Attribute \"file\" is defined for plugin " + pluginName
              + " as " + attributeFile);
        }
-     } else {
-       // if (LOG.isWarnEnabled()) {
-       //   LOG.warn("Attribute \"file\" is not defined in plugin.xml for
-       //   plugin "+pluginName);
-       // }
      }
  
      String file = conf.get("urlfilter.prefix.file");
      // attribute "file" takes precedence if defined
      if (attributeFile != null)
        file = attributeFile;
      Reader reader = conf.getConfResourceAsReader(file);
  
      if (reader == null) {
        trie = new PrefixStringMatcher(new String[0]);
      } else {
        try {
!         trie = readConfigurationFile(reader);
        } catch (IOException e) {
          if (LOG.isFatalEnabled()) { LOG.fatal(e.getMessage()); }
          // TODO mb@media-style.com: throw Exception? Because broken api.
          throw new RuntimeException(e.getMessage(), e);
        }
      }
    }
  
    public Configuration getConf() {
--- 147,180 ----
      }
      if (attributeFile != null && attributeFile.trim().equals(""))
        attributeFile = null;
! 
      if (LOG.isInfoEnabled()) {
+       if (attributeFile != null) {
          LOG.info("Attribute \"file\" is defined for plugin " + pluginName
              + " as " + attributeFile);
        }
      }
  
      String file = conf.get("urlfilter.prefix.file");
      // attribute "file" takes precedence if defined
      if (attributeFile != null)
        file = attributeFile;
+ 
      Reader reader = conf.getConfResourceAsReader(file);
  
      if (reader == null) {
        trie = new PrefixStringMatcher(new String[0]);
      } else {
        try {
!         readConfigurationFile(reader);
        } catch (IOException e) {
          if (LOG.isFatalEnabled()) { LOG.fatal(e.getMessage()); }
          // TODO mb@media-style.com: throw Exception? Because broken api.
          throw new RuntimeException(e.getMessage(), e);
        }
      }
+ 
+ 
    }
  
    public Configuration getConf() {
