Index: src/test/org/apache/nutch/util/TestURLUtil.java
===================================================================
--- src/test/org/apache/nutch/util/TestURLUtil.java	(revision 923970)
+++ src/test/org/apache/nutch/util/TestURLUtil.java	(working copy)
@@ -213,4 +213,50 @@
     assertEquals(aDotCom, URLUtil.chooseRepr(aDotCom, aSubDotCom, true));
   }
 
+  // from RFC3986 section 5.4.1
+  private static String baseString = "http://a/b/c/d;p?q";
+  private static String[][] targets = new String[][] {
+    // unknown protocol {"g:h"           ,  "g:h"},
+    {"g"             ,  "http://a/b/c/g"},
+    { "./g"           ,  "http://a/b/c/g"},
+    { "g/"            ,  "http://a/b/c/g/"},
+    { "/g"            ,  "http://a/g"},
+    { "//g"           ,  "http://g"},
+    { "?y"            ,  "http://a/b/c/d;p?y"},
+    { "g?y"           ,  "http://a/b/c/g?y"},
+    { "#s"            ,  "http://a/b/c/d;p?q#s"},
+    { "g#s"           ,  "http://a/b/c/g#s"},
+    { "g?y#s"         ,  "http://a/b/c/g?y#s"},
+    { ";x"            ,  "http://a/b/c/;x"},
+    { "g;x"           ,  "http://a/b/c/g;x"},
+    { "g;x?y#s"       ,  "http://a/b/c/g;x?y#s"},
+    { ""              ,  "http://a/b/c/d;p?q"},
+    { "."             ,  "http://a/b/c/"},
+    { "./"            ,  "http://a/b/c/"},
+    { ".."            ,  "http://a/b/"},
+    { "../"           ,  "http://a/b/"},
+    { "../g"          ,  "http://a/b/g"},
+    { "../.."         ,  "http://a/"},
+    { "../../"        ,  "http://a/"},
+    { "../../g"       ,  "http://a/g"}
+  };
+
+  public void testResolveURL() throws Exception {
+    // test NUTCH-436
+    URL u436 = new URL("http://a/b/c/d;p?q#f");
+    assertEquals("http://a/b/c/d;p?q#f", u436.toString());
+    URL abs = URLUtil.resolveURL(u436, "?y");
+    assertEquals("http://a/b/c/d;p?y", abs.toString());
+    // test NUTCH-566
+    URL u566 = new URL("http://www.fleurie.org/entreprise.asp");
+    abs = URLUtil.resolveURL(u566, "?id_entrep=111");
+    assertEquals("http://www.fleurie.org/entreprise.asp?id_entrep=111", abs.toString());
+    URL base = new URL(baseString);
+    assertEquals("base url parsing", baseString, base.toString());
+    for (int i = 0; i < targets.length; i++) {
+      URL u = URLUtil.resolveURL(base, targets[i][0]);
+      assertEquals(targets[i][1], targets[i][1], u.toString());
+    }
+  }
+
 }
Index: src/java/org/apache/nutch/util/URLUtil.java
===================================================================
--- src/java/org/apache/nutch/util/URLUtil.java	(revision 923970)
+++ src/java/org/apache/nutch/util/URLUtil.java	(working copy)
@@ -26,7 +26,107 @@
 
 /** Utility class for URL analysis */
 public class URLUtil {
+  
+  /**
+   * Resolve relative URL-s and fix a few java.net.URL errors
+   * in handling of URLs with embedded params and pure query
+   * targets.
+   * @param base base url
+   * @param target target url (may be relative)
+   * @return resolved absolute url.
+   * @throws MalformedURLException
+   */
+  public static URL resolveURL(URL base, String target)
+          throws MalformedURLException {
+    /* this is probably not needed anymore - see NUTCH-797.
+    // handle params that are embedded into the base url - move them to target
+    // so URL class constructs the new url class properly
+    if (base.toString().indexOf(';') > 0)
+      return fixEmbeddedParams(base, target);
+    */
+    
+    // handle the case that there is a target that is a pure query,
+    // for example
+    // http://careers3.accenture.com/Careers/ASPX/Search.aspx?co=0&sk=0
+    // It has urls in the page of the form href="?co=0&sk=0&pg=1", and by
+    // default
+    // URL constructs the base+target combo as
+    // http://careers3.accenture.com/Careers/ASPX/?co=0&sk=0&pg=1, incorrectly
+    // dropping the Search.aspx target
+    //
+    // Browsers handle these just fine, they must have an exception similar to
+    // this
+    if (target.startsWith("?")) {
+      return fixPureQueryTargets(base, target);
+    }
 
+    return new URL(base, target);
+  }
+
+  /** Handle the case in RFC3986 section 5.4.1 example 7, and similar. */
+   static URL fixPureQueryTargets(URL base, String target)
+          throws MalformedURLException {
+    if (!target.startsWith("?")) return new URL(base, target);
+
+    String basePath = base.getPath();
+    String baseRightMost = "";
+    int baseRightMostIdx = basePath.lastIndexOf("/");
+    if (baseRightMostIdx != -1) {
+      baseRightMost = basePath.substring(baseRightMostIdx + 1);
+    }
+
+    if (target.startsWith("?")) target = baseRightMost + target;
+
+    return new URL(base, target);
+  }
+
+  /**
+   * Handles cases where the url param information is encoded into the base url
+   * as opposed to the target.
+   * <p>
+   * If the taget contains params (i.e. ';xxxx') information then the target
+   * params information is assumed to be correct and any base params information
+   * is ignored. If the base contains params information but the tareget does
+   * not, then the params information is moved to the target allowing it to be
+   * correctly determined by the java.net.URL class.
+   * 
+   * @param base
+   *          The base URL.
+   * @param target
+   *          The target path from the base URL.
+   * 
+   * @return URL A URL with the params information correctly encoded.
+   * 
+   * @throws MalformedURLException
+   *           If the url is not a well formed URL.
+   */
+  private static URL fixEmbeddedParams(URL base, String target)
+          throws MalformedURLException {
+
+    // the target contains params information or the base doesn't then no
+    // conversion necessary, return regular URL
+    if (target.indexOf(';') >= 0 || base.toString().indexOf(';') == -1) {
+      return new URL(base, target);
+    }
+
+    // get the base url and it params information
+    String baseURL = base.toString();
+    int startParams = baseURL.indexOf(';');
+    String params = baseURL.substring(startParams);
+
+    // if the target has a query string then put the params information after
+    // any path but before the query string, otherwise just append to the path
+    int startQS = target.indexOf('?');
+    if (startQS >= 0) {
+      target = target.substring(0, startQS) + params
+              + target.substring(startQS);
+    } else {
+      target += params;
+    }
+
+    return new URL(base, target);
+  }
+
   private static Pattern IP_PATTERN = Pattern.compile("(\\d{1,3}\\.){3}(\\d{1,3})");
 
   /** Returns the domain name of the url. The domain name of a url is
Index: src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/DOMContentUtils.java
===================================================================
--- src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/DOMContentUtils.java	(revision 923970)
+++ src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/DOMContentUtils.java	(working copy)
@@ -26,6 +26,7 @@
 import org.apache.hadoop.conf.Configuration;
 import org.apache.nutch.parse.Outlink;
 import org.apache.nutch.util.NodeWalker;
+import org.apache.nutch.util.URLUtil;
 import org.w3c.dom.NamedNodeMap;
 import org.w3c.dom.Node;
 import org.w3c.dom.NodeList;
@@ -300,51 +301,6 @@
   }
   
   /**
-   * Handles cases where the url param information is encoded into the base
-   * url as opposed to the target.
-   * <p>
-   * If the taget contains params (i.e. ';xxxx') information then the target 
-   * params information is assumed to be correct and any base params information
-   * is ignored.  If the base contains params information but the tareget does
-   * not, then the params information is moved to the target allowing it to be
-   * correctly determined by the java.net.URL class.
-   * 
-   * @param base The base URL.
-   * @param target The target path from the base URL.
-   * 
-   * @return URL A URL with the params information correctly encoded.
-   * 
-   * @throws MalformedURLException If the url is not a well formed URL.
-   */
-  private URL fixEmbeddedParams(URL base, String target) 
-    throws MalformedURLException{
-    
-    // the target contains params information or the base doesn't then no
-    // conversion necessary, return regular URL
-    if (target.indexOf(';') >= 0 || base.toString().indexOf(';') == -1) {
-      return new URL(base, target);
-    }
-    
-    // get the base url and it params information
-    String baseURL = base.toString();
-    int startParams = baseURL.indexOf(';');
-    String params = baseURL.substring(startParams);
-    
-    // if the target has a query string then put the params information after
-    // any path but before the query string, otherwise just append to the path
-    int startQS = target.indexOf('?');
-    if (startQS >= 0) {
-      target = target.substring(0, startQS) + params + 
-        target.substring(startQS);
-    }
-    else {
-      target += params;
-    }
-    
-    return new URL(base, target);
-  }
-
-  /**
    * This method finds all anchors below the supplied DOM
    * <code>node</code>, and creates appropriate {@link Outlink}
    * records for each (relative to the supplied <code>base</code>
@@ -400,8 +356,7 @@
             if (target != null && !noFollow && !post)
               try {
                 
-                URL url = (base.toString().indexOf(';') > 0) ? 
-                  fixEmbeddedParams(base, target) :  new URL(base, target);
+                URL url = URLUtil.resolveURL(base, target);
                 outlinks.add(new Outlink(url.toString(),
                                          linkText.toString().trim()));
               } catch (MalformedURLException e) {
Index: src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMContentUtils.java
===================================================================
--- src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMContentUtils.java	(revision 923970)
+++ src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMContentUtils.java	(working copy)
@@ -26,6 +26,7 @@
 
 import org.apache.nutch.parse.Outlink;
 import org.apache.nutch.util.NodeWalker;
+import org.apache.nutch.util.URLUtil;
 import org.apache.hadoop.conf.Configuration;
 
 import org.w3c.dom.*;
@@ -316,33 +317,6 @@
    * 
    * @throws MalformedURLException If the url is not a well formed URL.
    */
-  private URL fixEmbeddedParams(URL base, String target) 
-    throws MalformedURLException{
-    
-    // the target contains params information or the base doesn't then no
-    // conversion necessary, return regular URL
-    if (target.indexOf(';') >= 0 || base.toString().indexOf(';') == -1) {
-      return new URL(base, target);
-    }
-    
-    // get the base url and it params information
-    String baseURL = base.toString();
-    int startParams = baseURL.indexOf(';');
-    String params = baseURL.substring(startParams);
-    
-    // if the target has a query string then put the params information after
-    // any path but before the query string, otherwise just append to the path
-    int startQS = target.indexOf('?');
-    if (startQS >= 0) {
-      target = target.substring(0, startQS) + params + 
-        target.substring(startQS);
-    }
-    else {
-      target += params;
-    }
-    
-    return new URL(base, target);
-  }
 
   /**
    * This method finds all anchors below the supplied DOM
@@ -400,8 +374,7 @@
             if (target != null && !noFollow && !post)
               try {
                 
-                URL url = (base.toString().indexOf(';') > 0) ? 
-                  fixEmbeddedParams(base, target) :  new URL(base, target);
+                URL url = URLUtil.resolveURL(base, target);
                 outlinks.add(new Outlink(url.toString(),
                                          linkText.toString().trim()));
               } catch (MalformedURLException e) {
