Index: conf/regex-normalize.xml.template
===================================================================
--- conf/regex-normalize.xml.template (revision 1359909)
+++ conf/regex-normalize.xml.template (working copy)
@@ -29,7 +29,7 @@
- ([;_]?((?i)l|j|bv_)?((?i)sid|phpsessid|sessionid)=.*?)(\?|&|#|$)
+ ([;_]?\b((?i)l|j|bv_)?((?i)sid|phpsessid|sessionid)=.*?)(\?|&|#|$)
$4
Index: src/plugin/urlnormalizer-regex/sample/regex-normalize-default.xml
===================================================================
--- src/plugin/urlnormalizer-regex/sample/regex-normalize-default.xml (revision 1359909)
+++ src/plugin/urlnormalizer-regex/sample/regex-normalize-default.xml (working copy)
@@ -13,7 +13,7 @@
- ([;_]?((?i)l|j|bv_)?((?i)sid|phpsessid|sessionid)=.*?)(\?|&|#|$)
+ ([;_]?\b((?i)l|j|bv_)?((?i)sid|phpsessid|sessionid)=.*?)(\?|&|#|$)
$4
Index: src/plugin/urlnormalizer-regex/sample/regex-normalize-default.test
===================================================================
--- src/plugin/urlnormalizer-regex/sample/regex-normalize-default.test (revision 1359909)
+++ src/plugin/urlnormalizer-regex/sample/regex-normalize-default.test (working copy)
@@ -11,6 +11,8 @@
http://www.foo.com/foo.html?param=1&another=2;jsessionid=1E6FEC0D14D044541DD84D2D013D29ED http://www.foo.com/foo.html?param=1&another=2
http://www.foo.com/foo.html;jsessionid=1E6FEC0D14D044541DD84D2D013D29ED?param=1&another=2 http://www.foo.com/foo.html?param=1&another=2
http://www.foo.com/foo.php?&x=1&sid=xyz&something=1 http://www.foo.com/foo.php?x=1&something=1
+# but NewsId is not a session id (NUTCH-706, NUTCH-1328)
+http://www.foo.com/fa/newsdetail.aspx?NewsID=1567539 http://www.foo.com/fa/newsdetail.aspx?NewsID=1567539
# test removal default pages
http://www.foo.com/home/index.html http://www.foo.com/home/