Index: eclipse-codeformat.xml
===================================================================
--- eclipse-codeformat.xml	(revision 0)
+++ eclipse-codeformat.xml	(revision 0)
@@ -0,0 +1,269 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<profiles version="11">
+<profile kind="CodeFormatterProfile" name="Nutch" version="11">
+<setting id="org.eclipse.jdt.core.formatter.comment.insert_new_line_before_root_tags" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_annotation" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_parameters" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_type_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_arguments" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_anonymous_type_declaration" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_case" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_brace_in_array_initializer" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_annotation_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_closing_brace_in_array_initializer" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_annotation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_field" value="0"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_while" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_annotation_type_member_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_else_in_if_statement" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_prefix_operator" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.keep_else_statement_on_same_line" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_ellipsis" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.insert_new_line_for_parameter" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_annotation_type_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_breaks_compare_to_cases" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_multiple_fields" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_conditional_expression" value="80"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_for" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_binary_operator" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_question_in_wildcard" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_array_initializer" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_enum_constant" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_finally_in_try_statement" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_local_variable" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_catch_in_try_statement" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_while" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_after_package" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_parameters" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.continuation_indentation" value="2"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_postfix_operator" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_method_invocation" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_arguments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_superinterfaces" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_new_chunk" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_binary_operator" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_package" value="0"/>
+<setting id="org.eclipse.jdt.core.compiler.source" value="1.5"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_constant_arguments" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_constructor_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_arguments" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.format_line_comments" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_declarations" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.join_wrapped_lines" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_block" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_explicit_constructor_call" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_invocation_arguments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_member_type" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.align_type_members_on_columns" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_enum_constant" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_for" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_method_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_selector_in_method_invocation" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_switch" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_unary_operator" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_case" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.indent_parameter_description" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_switch" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_parameters" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_type_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_block_comment" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.lineSplit" value="80"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_if" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_brackets_in_array_type_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_parenthesized_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_explicitconstructorcall_arguments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_constructor_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_first_class_body_declaration" value="0"/>
+<setting id="org.eclipse.jdt.core.formatter.indentation.size" value="2"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_enum_constant" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_superclass_in_type_declaration" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_assignment" value="0"/>
+<setting id="org.eclipse.jdt.core.compiler.problem.assertIdentifier" value="error"/>
+<setting id="org.eclipse.jdt.core.formatter.tabulation.char" value="space"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_parameters" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_prefix_operator" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_statements_compare_to_body" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_method" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.format_guardian_clause_on_one_line" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_for" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_cast" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_parameters_in_constructor_declaration" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_labeled_statement" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_annotation_type_declaration" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_method_body" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_invocation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_allocation_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_constant" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_annotation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation_type_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_throws" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_if" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_switch" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_throws" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_return" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_question_in_conditional" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_question_in_wildcard" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_allocation_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_throw" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_arguments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.compiler.problem.enumIdentifier" value="error"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_switch" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_ellipsis" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_block" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_inits" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_method_declaration" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.compact_else_if" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_array_initializer" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_increments" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_enum_constant" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.indent_root_tags" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_declarations" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_explicitconstructorcall_arguments" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_switch" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_superinterfaces" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_parameters" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_allocation_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.tabulation.size" value="2"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_type_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_opening_brace_in_array_initializer" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_brace_in_block" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_constant" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_arguments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_constructor_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_if" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_throws" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_javadoc_comment" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_constructor_declaration" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_assignment_operator" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_assignment_operator" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_empty_lines" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_synchronized" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_paren_in_cast" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_parameters" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_block_in_case" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.number_of_empty_lines_to_preserve" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_catch" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_constructor_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_invocation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_and_in_type_parameter" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_qualified_allocation_expression" value="16"/>
+<setting id="org.eclipse.jdt.core.compiler.compliance" value="1.5"/>
+<setting id="org.eclipse.jdt.core.formatter.continuation_indentation_for_array_initializer" value="2"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_brackets_in_array_allocation_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_at_in_annotation_type_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_cast" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_unary_operator" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_parameterized_type_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_anonymous_type_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.keep_empty_array_initializer_on_one_line" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.keep_imple_if_on_one_line" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_parameters" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_parameters" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_at_end_of_file_if_missing" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_for" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_labeled_statement" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_parameterized_type_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_type_declaration" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_binary_expression" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_enum_declaration" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_while" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode" value="enabled"/>
+<setting id="org.eclipse.jdt.core.formatter.put_empty_statement_on_new_line" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_parameter" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_parameters" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_invocation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_while_in_do_statement" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.format_javadoc_comments" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.line_length" value="80"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_between_import_groups" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_constant_arguments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_semicolon" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_constructor_declaration" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.number_of_blank_lines_at_beginning_of_method_body" value="0"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_conditional" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_type_header" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation_type_member_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.wrap_before_binary_operator" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_declaration_header" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_between_type_declarations" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_synchronized" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_statements_compare_to_block" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.join_lines_in_comments" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_question_in_conditional" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_field_declarations" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_compact_if" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_inits" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_cases" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_array_initializer" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_default" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_and_in_type_parameter" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_constructor_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_assert" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_imports" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.format_html" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_method_declaration" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_parameters" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_allocation_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_anonymous_type_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_conditional" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_parameterized_type_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_for" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_postfix_operator" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.format_source_code" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_synchronized" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_allocation_expression" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_throws" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_parameters_in_method_declaration" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_brace_in_array_initializer" value="insert"/>
+<setting id="org.eclipse.jdt.core.compiler.codegen.targetPlatform" value="1.5"/>
+<setting id="org.eclipse.jdt.core.formatter.use_tabs_only_for_leading_indentations" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_member" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.format_header" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.format_block_comments" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_enum_constant" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_enum_constants" value="0"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_block" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_annotation_declaration_header" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_parenthesized_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_parenthesized_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_catch" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_local_declarations" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_switch" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_increments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_invocation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_assert" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_type_declaration" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_array_initializer" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_braces_in_array_initializer" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_for" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_catch" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_parameterized_type_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_field_declarations" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_annotation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_parameterized_type_reference" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_invocation_arguments" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_after_imports" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_local_declarations" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_constant_header" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_for" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.never_indent_line_comments_on_first_column" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_arguments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.never_indent_block_comments_on_first_column" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.keep_then_statement_on_same_line" value="false"/>
+</profile>
+</profiles>
Index: src/test/org/apache/nutch/fetcher/TestFetcher.java
===================================================================
--- src/test/org/apache/nutch/fetcher/TestFetcher.java	(revision 1188252)
+++ src/test/org/apache/nutch/fetcher/TestFetcher.java	(working copy)
@@ -37,18 +37,15 @@
 import junit.framework.TestCase;
 
 /**
- * Basic fetcher test
- * 1. generate seedlist
- * 2. inject
- * 3. generate
- * 3. fetch
- * 4. Verify contents
+ * Basic fetcher test 1. generate seedlist 2. inject 3. generate 3. fetch 4.
+ * Verify contents
+ * 
  * @author nutch-dev <nutch-dev at lucene.apache.org>
- *
+ * 
  */
 public class TestFetcher extends TestCase {
 
-  final static Path testdir=new Path("build/test/fetch-test");
+  final static Path testdir = new Path("build/test/fetch-test");
   Configuration conf;
   FileSystem fs;
   Path crawldbPath;
@@ -56,109 +53,115 @@
   Path urlPath;
   Server server;
 
-  protected void setUp() throws Exception{
-    conf=CrawlDBTestUtil.createConfiguration();
-    fs=FileSystem.get(conf);
+  protected void setUp() throws Exception {
+    conf = CrawlDBTestUtil.createConfiguration();
+    fs = FileSystem.get(conf);
     fs.delete(testdir, true);
-    urlPath=new Path(testdir,"urls");
-    crawldbPath=new Path(testdir,"crawldb");
-    segmentsPath=new Path(testdir,"segments");
-    server=CrawlDBTestUtil.getServer(conf.getInt("content.server.port",50000), "build/test/data/fetch-test-site");
+    urlPath = new Path(testdir, "urls");
+    crawldbPath = new Path(testdir, "crawldb");
+    segmentsPath = new Path(testdir, "segments");
+    server = CrawlDBTestUtil.getServer(
+        conf.getInt("content.server.port", 50000),
+        "build/test/data/fetch-test-site");
     server.start();
   }
 
-  protected void tearDown() throws Exception{
+  protected void tearDown() throws Exception {
     server.stop();
     fs.delete(testdir, true);
   }
-  
+
   public void testFetch() throws IOException {
-    
-    //generate seedlist
-    ArrayList<String> urls=new ArrayList<String>();
-    
-    addUrl(urls,"index.html");
-    addUrl(urls,"pagea.html");
-    addUrl(urls,"pageb.html");
-    addUrl(urls,"dup_of_pagea.html");
-    addUrl(urls,"nested_spider_trap.html");
-    addUrl(urls,"exception.html");
-    
+
+    // generate seedlist
+    ArrayList<String> urls = new ArrayList<String>();
+
+    addUrl(urls, "index.html");
+    addUrl(urls, "pagea.html");
+    addUrl(urls, "pageb.html");
+    addUrl(urls, "dup_of_pagea.html");
+    addUrl(urls, "nested_spider_trap.html");
+    addUrl(urls, "exception.html");
+
     CrawlDBTestUtil.generateSeedList(fs, urlPath, urls);
-    
-    //inject
-    Injector injector=new Injector(conf);
+
+    // inject
+    Injector injector = new Injector(conf);
     injector.inject(crawldbPath, urlPath);
 
-    //generate
-    Generator g=new Generator(conf);
+    // generate
+    Generator g = new Generator(conf);
     Path[] generatedSegment = g.generate(crawldbPath, segmentsPath, 1,
         Long.MAX_VALUE, Long.MAX_VALUE, false, false);
 
-    long time=System.currentTimeMillis();
-    //fetch
-    Fetcher fetcher=new Fetcher(conf);
+    long time = System.currentTimeMillis();
+    // fetch
+    Fetcher fetcher = new Fetcher(conf);
 
     // Set fetcher.parse to true
     conf.setBoolean("fetcher.parse", true);
 
     fetcher.fetch(generatedSegment[0], 1);
 
-    time=System.currentTimeMillis()-time;
-    
-    //verify politeness, time taken should be more than (num_of_pages +1)*delay
-    int minimumTime=(int) ((urls.size()+1)*1000*conf.getFloat("fetcher.server.delay",5));
+    time = System.currentTimeMillis() - time;
+
+    // verify politeness, time taken should be more than (num_of_pages +1)*delay
+    int minimumTime = (int) ((urls.size() + 1) * 1000 * conf.getFloat(
+        "fetcher.server.delay", 5));
     assertTrue(time > minimumTime);
-    
-    //verify content
-    Path content=new Path(new Path(generatedSegment[0], Content.DIR_NAME),"part-00000/data");
-    SequenceFile.Reader reader=new SequenceFile.Reader(fs, content, conf);
-    
-    ArrayList<String> handledurls=new ArrayList<String>();
-    
-    READ_CONTENT:
-      do {
-      Text key=new Text();
-      Content value=new Content();
-      if(!reader.next(key, value)) break READ_CONTENT;
-      String contentString=new String(value.getContent());
-      if(contentString.indexOf("Nutch fetcher test page")!=-1) { 
+
+    // verify content
+    Path content = new Path(new Path(generatedSegment[0], Content.DIR_NAME),
+        "part-00000/data");
+    SequenceFile.Reader reader = new SequenceFile.Reader(fs, content, conf);
+
+    ArrayList<String> handledurls = new ArrayList<String>();
+
+    READ_CONTENT: do {
+      Text key = new Text();
+      Content value = new Content();
+      if (!reader.next(key, value))
+        break READ_CONTENT;
+      String contentString = new String(value.getContent());
+      if (contentString.indexOf("Nutch fetcher test page") != -1) {
         handledurls.add(key.toString());
       }
-    } while(true);
+    } while (true);
 
     reader.close();
 
     Collections.sort(urls);
     Collections.sort(handledurls);
 
-    //verify that enough pages were handled
+    // verify that enough pages were handled
     assertEquals(urls.size(), handledurls.size());
 
-    //verify that correct pages were handled
+    // verify that correct pages were handled
     assertTrue(handledurls.containsAll(urls));
     assertTrue(urls.containsAll(handledurls));
-    
+
     handledurls.clear();
 
-    //verify parse data
-    Path parseData = new Path(new Path(generatedSegment[0], ParseData.DIR_NAME),"part-00000/data");
+    // verify parse data
+    Path parseData = new Path(
+        new Path(generatedSegment[0], ParseData.DIR_NAME), "part-00000/data");
     reader = new SequenceFile.Reader(fs, parseData, conf);
-    
-    READ_PARSE_DATA:
-      do {
+
+    READ_PARSE_DATA: do {
       Text key = new Text();
       ParseData value = new ParseData();
-      if(!reader.next(key, value)) break READ_PARSE_DATA;
-      // make sure they all contain "nutch.segment.name" and "nutch.content.digest" 
+      if (!reader.next(key, value))
+        break READ_PARSE_DATA;
+      // make sure they all contain "nutch.segment.name" and
+      // "nutch.content.digest"
       // keys in parse metadata
       Metadata contentMeta = value.getContentMeta();
-      if (contentMeta.get(Nutch.SEGMENT_NAME_KEY) != null 
-            && contentMeta.get(Nutch.SIGNATURE_KEY) != null) {
+      if (contentMeta.get(Nutch.SEGMENT_NAME_KEY) != null
+          && contentMeta.get(Nutch.SIGNATURE_KEY) != null) {
         handledurls.add(key.toString());
       }
-    } while(true);
-    
+    } while (true);
+
     Collections.sort(handledurls);
 
     assertEquals(urls.size(), handledurls.size());
@@ -168,9 +171,10 @@
   }
 
   private void addUrl(ArrayList<String> urls, String page) {
-    urls.add("http://127.0.0.1:" + server.getConnectors()[0].getPort() + "/" + page);
+    urls.add("http://127.0.0.1:" + server.getConnectors()[0].getPort() + "/"
+        + page);
   }
-  
+
   public void testAgentNameCheck() {
 
     boolean failedNoAgentName = false;
Index: src/test/org/apache/nutch/metadata/TestMetadata.java
===================================================================
--- src/test/org/apache/nutch/metadata/TestMetadata.java	(revision 1188252)
+++ src/test/org/apache/nutch/metadata/TestMetadata.java	(working copy)
@@ -45,7 +45,7 @@
   public static void main(String[] args) {
     TestRunner.run(suite());
   }
-  
+
   /**
    * Test to ensure that only non-null values get written when the
    * {@link Metadata} object is written using a Writeable.
@@ -282,4 +282,3 @@
   }
 
 }
-
Index: src/test/org/apache/nutch/metadata/TestSpellCheckedMetadata.java
===================================================================
--- src/test/org/apache/nutch/metadata/TestSpellCheckedMetadata.java	(revision 1188252)
+++ src/test/org/apache/nutch/metadata/TestSpellCheckedMetadata.java	(working copy)
@@ -30,7 +30,7 @@
 /**
  * JUnit based tests of class
  * {@link org.apache.nutch.metadata.SpellCheckedMetadata}.
- *
+ * 
  * @author Chris Mattmann
  * @author J&eacute;r&ocirc;me Charron
  */
@@ -52,20 +52,20 @@
 
   /** Test for the <code>getNormalizedName(String)</code> method. */
   public void testGetNormalizedName() {
-    assertEquals("Content-Type", SpellCheckedMetadata
-        .getNormalizedName("Content-Type"));
-    assertEquals("Content-Type", SpellCheckedMetadata
-        .getNormalizedName("ContentType"));
-    assertEquals("Content-Type", SpellCheckedMetadata
-        .getNormalizedName("Content-type"));
-    assertEquals("Content-Type", SpellCheckedMetadata
-        .getNormalizedName("contenttype"));
-    assertEquals("Content-Type", SpellCheckedMetadata
-        .getNormalizedName("contentype"));
-    assertEquals("Content-Type", SpellCheckedMetadata
-        .getNormalizedName("contntype"));
+    assertEquals("Content-Type",
+        SpellCheckedMetadata.getNormalizedName("Content-Type"));
+    assertEquals("Content-Type",
+        SpellCheckedMetadata.getNormalizedName("ContentType"));
+    assertEquals("Content-Type",
+        SpellCheckedMetadata.getNormalizedName("Content-type"));
+    assertEquals("Content-Type",
+        SpellCheckedMetadata.getNormalizedName("contenttype"));
+    assertEquals("Content-Type",
+        SpellCheckedMetadata.getNormalizedName("contentype"));
+    assertEquals("Content-Type",
+        SpellCheckedMetadata.getNormalizedName("contntype"));
   }
-  
+
   /** Test for the <code>add(String, String)</code> method. */
   public void testAdd() {
     String[] values = null;
@@ -256,8 +256,8 @@
   }
 
   /**
-   * IO Test method, usable only when you plan to do changes in metadata
-   * to measure relative performance impact.
+   * IO Test method, usable only when you plan to do changes in metadata to
+   * measure relative performance impact.
    */
   public final void testHandlingSpeed() {
     SpellCheckedMetadata result;
Index: src/test/org/apache/nutch/protocol/TestProtocolFactory.java
===================================================================
--- src/test/org/apache/nutch/protocol/TestProtocolFactory.java	(revision 1188252)
+++ src/test/org/apache/nutch/protocol/TestProtocolFactory.java	(working copy)
@@ -26,55 +26,56 @@
 
   Configuration conf;
   ProtocolFactory factory;
-  
+
   protected void setUp() throws Exception {
     conf = NutchConfiguration.create();
     conf.set("plugin.includes", ".*");
     conf.set("http.agent.name", "test-bot");
-    factory=new ProtocolFactory(conf);
+    factory = new ProtocolFactory(conf);
   }
 
-  public void testGetProtocol(){
+  public void testGetProtocol() {
 
-    //non existing protocol
+    // non existing protocol
     try {
       factory.getProtocol("xyzxyz://somehost");
       fail("Must throw ProtocolNotFound");
     } catch (ProtocolNotFound e) {
-      //all is ok
-    } catch (Exception ex){
+      // all is ok
+    } catch (Exception ex) {
       fail("Must not throw any other exception");
     }
-    
-    Protocol httpProtocol=null;
-    
-    //existing protocol
+
+    Protocol httpProtocol = null;
+
+    // existing protocol
     try {
-      httpProtocol=factory.getProtocol("http://somehost");
+      httpProtocol = factory.getProtocol("http://somehost");
       assertNotNull(httpProtocol);
-    } catch (Exception ex){
+    } catch (Exception ex) {
       fail("Must not throw any other exception");
     }
 
-    //cache key
-    Object protocol = ObjectCache.get(conf).getObject(Protocol.X_POINT_ID + "http");
+    // cache key
+    Object protocol = ObjectCache.get(conf).getObject(
+        Protocol.X_POINT_ID + "http");
     assertNotNull(protocol);
     assertEquals(httpProtocol, protocol);
-    
-    //test same object instance
+
+    // test same object instance
     try {
-      assertTrue(httpProtocol==factory.getProtocol("http://somehost"));
+      assertTrue(httpProtocol == factory.getProtocol("http://somehost"));
     } catch (ProtocolNotFound e) {
       fail("Must not throw any exception");
     }
   }
-  
-  public void testContains(){
+
+  public void testContains() {
     assertTrue(factory.contains("http", "http"));
     assertTrue(factory.contains("http", "http,ftp"));
     assertTrue(factory.contains("http", "   http ,   ftp"));
     assertTrue(factory.contains("smb", "ftp,smb,http"));
     assertFalse(factory.contains("smb", "smbb"));
   }
-  
+
 }
Index: src/test/org/apache/nutch/protocol/TestContent.java
===================================================================
--- src/test/org/apache/nutch/protocol/TestContent.java	(revision 1188252)
+++ src/test/org/apache/nutch/protocol/TestContent.java	(working copy)
@@ -26,14 +26,15 @@
 
 import junit.framework.TestCase;
 
-
 /** Unit tests for Content. */
 
 public class TestContent extends TestCase {
 
   private static Configuration conf = NutchConfiguration.create();
 
-  public TestContent(String name) { super(name); }
+  public TestContent(String name) {
+    super(name);
+  }
 
   public void testContent() throws Exception {
 
@@ -46,7 +47,7 @@
     metaData.add("Content-Type", "text/html");
 
     Content r = new Content(url, url, page.getBytes("UTF8"), "text/html",
-                            metaData, conf);
+        metaData, conf);
 
     WritableTestUtils.testWritable(r);
     assertEquals("text/html", r.getMetadata().get("Content-Type"));
@@ -59,52 +60,36 @@
     Content c = null;
     Metadata p = new Metadata();
 
-    c = new Content("http://www.foo.com/",
-                    "http://www.foo.com/",
-                    "".getBytes("UTF8"),
-                    "text/html; charset=UTF-8", p, conf);
+    c = new Content("http://www.foo.com/", "http://www.foo.com/",
+        "".getBytes("UTF8"), "text/html; charset=UTF-8", p, conf);
     assertEquals("text/html", c.getContentType());
 
-    c = new Content("http://www.foo.com/foo.html",
-                    "http://www.foo.com/",
-                    "".getBytes("UTF8"),
-                    "", p, conf);
+    c = new Content("http://www.foo.com/foo.html", "http://www.foo.com/",
+        "".getBytes("UTF8"), "", p, conf);
     assertEquals("text/html", c.getContentType());
 
-    c = new Content("http://www.foo.com/foo.html",
-                    "http://www.foo.com/",
-                    "".getBytes("UTF8"),
-                    null, p, conf);
+    c = new Content("http://www.foo.com/foo.html", "http://www.foo.com/",
+        "".getBytes("UTF8"), null, p, conf);
     assertEquals("text/html", c.getContentType());
 
-    c = new Content("http://www.foo.com/",
-                    "http://www.foo.com/",
-                    "<html></html>".getBytes("UTF8"),
-                    "", p, conf);
+    c = new Content("http://www.foo.com/", "http://www.foo.com/",
+        "<html></html>".getBytes("UTF8"), "", p, conf);
     assertEquals("text/html", c.getContentType());
 
-    c = new Content("http://www.foo.com/foo.html",
-                    "http://www.foo.com/",
-                    "<html></html>".getBytes("UTF8"),
-                    "text/plain", p, conf);
+    c = new Content("http://www.foo.com/foo.html", "http://www.foo.com/",
+        "<html></html>".getBytes("UTF8"), "text/plain", p, conf);
     assertEquals("text/html", c.getContentType());
 
-    c = new Content("http://www.foo.com/foo.png",
-                    "http://www.foo.com/",
-                    "<html></html>".getBytes("UTF8"),
-                    "text/plain", p, conf);
+    c = new Content("http://www.foo.com/foo.png", "http://www.foo.com/",
+        "<html></html>".getBytes("UTF8"), "text/plain", p, conf);
     assertEquals("text/html", c.getContentType());
 
-    c = new Content("http://www.foo.com/",
-                    "http://www.foo.com/",
-                    "".getBytes("UTF8"),
-                    "", p, conf);
+    c = new Content("http://www.foo.com/", "http://www.foo.com/",
+        "".getBytes("UTF8"), "", p, conf);
     assertEquals(MimeTypes.OCTET_STREAM, c.getContentType());
 
-    c = new Content("http://www.foo.com/",
-                    "http://www.foo.com/",
-                    "".getBytes("UTF8"),
-                    null, p, conf);
+    c = new Content("http://www.foo.com/", "http://www.foo.com/",
+        "".getBytes("UTF8"), null, p, conf);
     assertNotNull(c.getContentType());
   }
 
Index: src/test/org/apache/nutch/segment/TestSegmentMerger.java
===================================================================
--- src/test/org/apache/nutch/segment/TestSegmentMerger.java	(revision 1188252)
+++ src/test/org/apache/nutch/segment/TestSegmentMerger.java	(working copy)
@@ -38,12 +38,13 @@
   Path seg2;
   Path out;
   int countSeg1, countSeg2;
-  
+
   public void setUp() throws Exception {
     conf = NutchConfiguration.create();
     fs = FileSystem.get(conf);
     long blkSize = fs.getDefaultBlockSize();
-    testDir = new Path(conf.get("hadoop.tmp.dir"), "merge-" + System.currentTimeMillis());
+    testDir = new Path(conf.get("hadoop.tmp.dir"), "merge-"
+        + System.currentTimeMillis());
     seg1 = new Path(testDir, "seg1");
     seg2 = new Path(testDir, "seg2");
     out = new Path(testDir, "out");
@@ -52,7 +53,8 @@
     DecimalFormat df = new DecimalFormat("0000000");
     Text k = new Text();
     Path ptPath = new Path(new Path(seg1, ParseText.DIR_NAME), "part-00000");
-    MapFile.Writer w = new MapFile.Writer(conf, fs, ptPath.toString(), Text.class, ParseText.class);
+    MapFile.Writer w = new MapFile.Writer(conf, fs, ptPath.toString(),
+        Text.class, ParseText.class);
     long curSize = 0;
     countSeg1 = 0;
     while (curSize < blkSize * 2) {
@@ -65,7 +67,8 @@
     System.err.println(" - done: " + countSeg1 + " records.");
     System.err.println("Creating large segment 2...");
     ptPath = new Path(new Path(seg2, ParseText.DIR_NAME), "part-00000");
-    w = new MapFile.Writer(conf, fs, ptPath.toString(), Text.class, ParseText.class);
+    w = new MapFile.Writer(conf, fs, ptPath.toString(), Text.class,
+        ParseText.class);
     curSize = 0;
     countSeg2 = 0;
     while (curSize < blkSize * 2) {
@@ -77,14 +80,14 @@
     w.close();
     System.err.println(" - done: " + countSeg2 + " records.");
   }
-  
+
   public void tearDown() throws Exception {
     fs.delete(testDir, true);
   }
-  
+
   public void testLargeMerge() throws Exception {
     SegmentMerger merger = new SegmentMerger(conf);
-    merger.merge(out, new Path[]{seg1, seg2}, false, false, -1);
+    merger.merge(out, new Path[] { seg1, seg2 }, false, false, -1);
     // verify output
     FileStatus[] stats = fs.listStatus(out);
     // there should be just one path
@@ -92,7 +95,8 @@
     Path outSeg = stats[0].getPath();
     Text k = new Text();
     ParseText v = new ParseText();
-    MapFile.Reader[] readers = MapFileOutputFormat.getReaders(fs, new Path(outSeg, ParseText.DIR_NAME), conf);
+    MapFile.Reader[] readers = MapFileOutputFormat.getReaders(fs, new Path(
+        outSeg, ParseText.DIR_NAME), conf);
     int cnt1 = 0, cnt2 = 0;
     for (MapFile.Reader r : readers) {
       while (r.next(k, v)) {
@@ -111,5 +115,4 @@
     assertEquals(countSeg1, cnt1);
     assertEquals(countSeg2, cnt2);
   }
-
 }
Index: src/test/org/apache/nutch/net/TestURLNormalizers.java
===================================================================
--- src/test/org/apache/nutch/net/TestURLNormalizers.java	(revision 1188252)
+++ src/test/org/apache/nutch/net/TestURLNormalizers.java	(working copy)
@@ -30,30 +30,38 @@
     String clazz1 = "org.apache.nutch.net.urlnormalizer.regex.RegexURLNormalizer";
     String clazz2 = "org.apache.nutch.net.urlnormalizer.basic.BasicURLNormalizer";
     conf.set("urlnormalizer.order", clazz1 + " " + clazz2);
-    
-    URLNormalizers normalizers = new URLNormalizers(conf, URLNormalizers.SCOPE_DEFAULT);
-    
+
+    URLNormalizers normalizers = new URLNormalizers(conf,
+        URLNormalizers.SCOPE_DEFAULT);
+
     assertNotNull(normalizers);
     try {
-      normalizers.normalize("http://www.example.com/", URLNormalizers.SCOPE_DEFAULT);
+      normalizers.normalize("http://www.example.com/",
+          URLNormalizers.SCOPE_DEFAULT);
     } catch (MalformedURLException mue) {
       fail(mue.toString());
     }
 
     // NUTCH-1011 - Get rid of superfluous slashes
     try {
-      String normalizedSlashes = normalizers.normalize("http://www.example.org//path/to//somewhere.html", URLNormalizers.SCOPE_DEFAULT);
-      assertEquals(normalizedSlashes, "http://www.example.org/path/to/somewhere.html");
+      String normalizedSlashes = normalizers.normalize(
+          "http://www.example.org//path/to//somewhere.html",
+          URLNormalizers.SCOPE_DEFAULT);
+      assertEquals(normalizedSlashes,
+          "http://www.example.org/path/to/somewhere.html");
     } catch (MalformedURLException mue) {
       fail(mue.toString());
     }
 
     // check the order
     int pos1 = -1, pos2 = -1;
-    URLNormalizer[] impls = normalizers.getURLNormalizers(URLNormalizers.SCOPE_DEFAULT);
+    URLNormalizer[] impls = normalizers
+        .getURLNormalizers(URLNormalizers.SCOPE_DEFAULT);
     for (int i = 0; i < impls.length; i++) {
-      if (impls[i].getClass().getName().equals(clazz1)) pos1 = i;
-      if (impls[i].getClass().getName().equals(clazz2)) pos2 = i;
+      if (impls[i].getClass().getName().equals(clazz1))
+        pos1 = i;
+      if (impls[i].getClass().getName().equals(clazz2))
+        pos2 = i;
     }
     if (pos1 != -1 && pos2 != -1) {
       assertTrue("RegexURLNormalizer before BasicURLNormalizer", pos1 < pos2);
Index: src/test/org/apache/nutch/net/TestURLFilters.java
===================================================================
--- src/test/org/apache/nutch/net/TestURLFilters.java	(revision 1188252)
+++ src/test/org/apache/nutch/net/TestURLFilters.java	(working copy)
@@ -25,6 +25,7 @@
 
   /**
    * Testcase for NUTCH-325.
+   * 
    * @throws URLFilterException
    */
   public void testNonExistingUrlFilter() throws URLFilterException {
Index: src/test/org/apache/nutch/crawl/TestCrawlDbMerger.java
===================================================================
--- src/test/org/apache/nutch/crawl/TestCrawlDbMerger.java	(revision 1188252)
+++ src/test/org/apache/nutch/crawl/TestCrawlDbMerger.java	(working copy)
@@ -32,18 +32,15 @@
 import junit.framework.TestCase;
 
 public class TestCrawlDbMerger extends TestCase {
-  private static final Logger LOG = Logger.getLogger(CrawlDbMerger.class.getName());
-  
+  private static final Logger LOG = Logger.getLogger(CrawlDbMerger.class
+      .getName());
+
   String url10 = "http://example.com/";
   String url11 = "http://example.com/foo";
   String url20 = "http://example.com/";
   String url21 = "http://example.com/bar";
-  String[] urls_expected = new String[] {
-          url10,
-          url11,
-          url21
-  };
-  
+  String[] urls_expected = new String[] { url10, url11, url21 };
+
   TreeSet init1 = new TreeSet();
   TreeSet init2 = new TreeSet();
   HashMap expected = new HashMap();
@@ -52,7 +49,7 @@
   FileSystem fs;
   Path testDir;
   CrawlDbReader reader;
-  
+
   public void setUp() throws Exception {
     init1.add(url10);
     init1.add(url11);
@@ -78,19 +75,20 @@
     expected.put(url21, cd2);
     conf = NutchConfiguration.create();
     fs = FileSystem.get(conf);
-    testDir = new Path("test-crawldb-" +
-            new java.util.Random().nextInt());
+    testDir = new Path("test-crawldb-" + new java.util.Random().nextInt());
     fs.mkdirs(testDir);
   }
-  
+
   public void tearDown() {
     try {
       if (fs.exists(testDir))
         fs.delete(testDir);
-    } catch (Exception e) { }
+    } catch (Exception e) {
+    }
     try {
       reader.close();
-    } catch (Exception e) { }
+    } catch (Exception e) {
+    }
   }
 
   public void testMerge() throws Exception {
@@ -101,15 +99,15 @@
     createCrawlDb(conf, fs, crawldb2, init2, cd2);
     CrawlDbMerger merger = new CrawlDbMerger(conf);
     LOG.fine("* merging crawldbs to " + output);
-    merger.merge(output, new Path[]{crawldb1, crawldb2}, false, false);
+    merger.merge(output, new Path[] { crawldb1, crawldb2 }, false, false);
     LOG.fine("* reading crawldb: " + output);
     reader = new CrawlDbReader();
     String crawlDb = output.toString();
     Iterator it = expected.keySet().iterator();
     while (it.hasNext()) {
-      String url = (String)it.next();
+      String url = (String) it.next();
       LOG.fine("url=" + url);
-      CrawlDatum cd = (CrawlDatum)expected.get(url);
+      CrawlDatum cd = (CrawlDatum) expected.get(url);
       CrawlDatum res = reader.get(crawlDb, url, conf);
       LOG.fine(" -> " + res);
       System.out.println("url=" + url);
@@ -122,14 +120,16 @@
     reader.close();
     fs.delete(testDir);
   }
-  
-  private void createCrawlDb(Configuration config, FileSystem fs, Path crawldb, TreeSet init, CrawlDatum cd) throws Exception {
+
+  private void createCrawlDb(Configuration config, FileSystem fs, Path crawldb,
+      TreeSet init, CrawlDatum cd) throws Exception {
     LOG.fine("* creating crawldb: " + crawldb);
     Path dir = new Path(crawldb, CrawlDb.CURRENT_NAME);
-    MapFile.Writer writer = new MapFile.Writer(config, fs, new Path(dir, "part-00000").toString(), Text.class, CrawlDatum.class);
+    MapFile.Writer writer = new MapFile.Writer(config, fs, new Path(dir,
+        "part-00000").toString(), Text.class, CrawlDatum.class);
     Iterator it = init.iterator();
     while (it.hasNext()) {
-      String key = (String)it.next();
+      String key = (String) it.next();
       writer.append(new Text(key), cd);
     }
     writer.close();
Index: src/test/org/apache/nutch/crawl/DummyWritable.java
===================================================================
--- src/test/org/apache/nutch/crawl/DummyWritable.java	(revision 1188252)
+++ src/test/org/apache/nutch/crawl/DummyWritable.java	(working copy)
@@ -21,12 +21,12 @@
 
 public class DummyWritable extends IntWritable {
 
-    public DummyWritable() {
+  public DummyWritable() {
 
-    }
+  }
 
-    public DummyWritable(int i) {
-        super(i);
-    }
+  public DummyWritable(int i) {
+    super(i);
+  }
 
 }
Index: src/test/org/apache/nutch/crawl/TestLinkDbMerger.java
===================================================================
--- src/test/org/apache/nutch/crawl/TestLinkDbMerger.java	(revision 1188252)
+++ src/test/org/apache/nutch/crawl/TestLinkDbMerger.java	(working copy)
@@ -33,41 +33,28 @@
 import junit.framework.TestCase;
 
 public class TestLinkDbMerger extends TestCase {
-  private static final Logger LOG = Logger.getLogger(TestLinkDbMerger.class.getName());
-  
+  private static final Logger LOG = Logger.getLogger(TestLinkDbMerger.class
+      .getName());
+
   String url10 = "http://example.com/foo";
-  String[] urls10 = new String[] {
-          "http://example.com/100",
-          "http://example.com/101"
-        };
+  String[] urls10 = new String[] { "http://example.com/100",
+      "http://example.com/101" };
 
   String url11 = "http://example.com/";
-  String[] urls11 = new String[] {
-          "http://example.com/110",
-          "http://example.com/111"
-        };
-  
+  String[] urls11 = new String[] { "http://example.com/110",
+      "http://example.com/111" };
+
   String url20 = "http://example.com/";
-  String[] urls20 = new String[] {
-          "http://foo.com/200",
-          "http://foo.com/201"
-        };
+  String[] urls20 = new String[] { "http://foo.com/200", "http://foo.com/201" };
   String url21 = "http://example.com/bar";
-  String[] urls21 = new String[] {
-          "http://foo.com/210",
-          "http://foo.com/211"
-        };
-  
+  String[] urls21 = new String[] { "http://foo.com/210", "http://foo.com/211" };
+
   String[] urls10_expected = urls10;
-  String[] urls11_expected = new String[] {
-          urls11[0],
-          urls11[1],
-          urls20[0],
-          urls20[1]
-  };
+  String[] urls11_expected = new String[] { urls11[0], urls11[1], urls20[0],
+      urls20[1] };
   String[] urls20_expected = urls11_expected;
   String[] urls21_expected = urls21;
-  
+
   TreeMap init1 = new TreeMap();
   TreeMap init2 = new TreeMap();
   HashMap expected = new HashMap();
@@ -75,7 +62,7 @@
   Path testDir;
   FileSystem fs;
   LinkDbReader reader;
-  
+
   public void setUp() throws Exception {
     init1.put(url10, urls10);
     init1.put(url11, urls11);
@@ -87,19 +74,21 @@
     expected.put(url21, urls21_expected);
     conf = NutchConfiguration.create();
     fs = FileSystem.get(conf);
-    testDir = new Path("build/test/test-linkdb-" +
-            new java.util.Random().nextInt());
+    testDir = new Path("build/test/test-linkdb-"
+        + new java.util.Random().nextInt());
     fs.mkdirs(testDir);
   }
-  
+
   public void tearDown() {
     try {
       if (fs.exists(testDir))
         fs.delete(testDir, true);
-    } catch (Exception e) { }
+    } catch (Exception e) {
+    }
     try {
       reader.close();
-    } catch (Exception e) { }
+    } catch (Exception e) {
+    }
   }
 
   public void testMerge() throws Exception {
@@ -113,21 +102,21 @@
     createLinkDb(conf, fs, linkdb2, init2);
     LinkDbMerger merger = new LinkDbMerger(conf);
     LOG.fine("* merging linkdbs to " + output);
-    merger.merge(output, new Path[]{linkdb1, linkdb2}, false, false);
+    merger.merge(output, new Path[] { linkdb1, linkdb2 }, false, false);
     LOG.fine("* reading linkdb: " + output);
     reader = new LinkDbReader(conf, output);
     Iterator it = expected.keySet().iterator();
     while (it.hasNext()) {
-      String url = (String)it.next();
+      String url = (String) it.next();
       LOG.fine("url=" + url);
-      String[] vals = (String[])expected.get(url);
+      String[] vals = (String[]) expected.get(url);
       Inlinks inlinks = reader.getInlinks(new Text(url));
       // may not be null
       assertNotNull(inlinks);
       ArrayList links = new ArrayList();
       Iterator it2 = inlinks.iterator();
       while (it2.hasNext()) {
-        Inlink in = (Inlink)it2.next();
+        Inlink in = (Inlink) it2.next();
         links.add(in.getFromUrl());
       }
       for (int i = 0; i < vals.length; i++) {
@@ -138,16 +127,18 @@
     reader.close();
     fs.delete(testDir, true);
   }
-  
-  private void createLinkDb(Configuration config, FileSystem fs, Path linkdb, TreeMap init) throws Exception {
+
+  private void createLinkDb(Configuration config, FileSystem fs, Path linkdb,
+      TreeMap init) throws Exception {
     LOG.fine("* creating linkdb: " + linkdb);
     Path dir = new Path(linkdb, LinkDb.CURRENT_NAME);
-    MapFile.Writer writer = new MapFile.Writer(config, fs, new Path(dir, "part-00000").toString(), Text.class, Inlinks.class);
+    MapFile.Writer writer = new MapFile.Writer(config, fs, new Path(dir,
+        "part-00000").toString(), Text.class, Inlinks.class);
     Iterator it = init.keySet().iterator();
     while (it.hasNext()) {
-      String key = (String)it.next();
+      String key = (String) it.next();
       Inlinks inlinks = new Inlinks();
-      String[] vals = (String[])init.get(key);
+      String[] vals = (String[]) init.get(key);
       for (int i = 0; i < vals.length; i++) {
         Inlink in = new Inlink(vals[i], vals[i]);
         inlinks.add(in);
Index: src/test/org/apache/nutch/crawl/TestGenerator.java
===================================================================
--- src/test/org/apache/nutch/crawl/TestGenerator.java	(revision 1188252)
+++ src/test/org/apache/nutch/crawl/TestGenerator.java	(working copy)
@@ -34,9 +34,9 @@
  * Basic generator test. 1. Insert entries in crawldb 2. Generates entries to
  * fetch 3. Verifies that number of generated urls match 4. Verifies that
  * highest scoring urls are generated
- *
+ * 
  * @author nutch-dev <nutch-dev at lucene.apache.org>
- *
+ * 
  */
 public class TestGenerator extends TestCase {
 
@@ -69,7 +69,7 @@
 
   /**
    * Test that generator generates fetchlish ordered by score (desc).
-   *
+   * 
    * @throws Exception
    */
   public void testGenerateHighest() throws Exception {
@@ -79,8 +79,7 @@
     ArrayList<URLCrawlDatum> list = new ArrayList<URLCrawlDatum>();
 
     for (int i = 0; i <= 100; i++) {
-      list.add(createURLCrawlDatum("http://aaa/" + pad(i),
-          1, i));
+      list.add(createURLCrawlDatum("http://aaa/" + pad(i), 1, i));
     }
 
     createCrawlDB(list);
@@ -91,7 +90,7 @@
         CrawlDatum.GENERATE_DIR_NAME), "part-00000");
 
     ArrayList<URLCrawlDatum> l = readContents(fetchlist);
-    
+
     // sort urls by score desc
     Collections.sort(l, new ScoreComparator());
 
@@ -129,17 +128,15 @@
 
   /**
    * Test that generator obeys the property "generate.max.per.host".
-   * @throws Exception 
+   * 
+   * @throws Exception
    */
-  public void testGenerateHostLimit() throws Exception{
+  public void testGenerateHostLimit() throws Exception {
     ArrayList<URLCrawlDatum> list = new ArrayList<URLCrawlDatum>();
 
-    list.add(createURLCrawlDatum("http://www.example.com/index1.html",
-        1, 1));
-    list.add(createURLCrawlDatum("http://www.example.com/index2.html",
-        1, 1));
-    list.add(createURLCrawlDatum("http://www.example.com/index3.html",
-        1, 1));
+    list.add(createURLCrawlDatum("http://www.example.com/index1.html", 1, 1));
+    list.add(createURLCrawlDatum("http://www.example.com/index2.html", 1, 1));
+    list.add(createURLCrawlDatum("http://www.example.com/index3.html", 1, 1));
 
     createCrawlDB(list);
 
@@ -186,9 +183,10 @@
   /**
    * Test that generator obeys the property "generator.max.count" and
    * "generator.count.per.domain".
-   * @throws Exception 
+   * 
+   * @throws Exception
    */
-  public void testGenerateDomainLimit() throws Exception{
+  public void testGenerateDomainLimit() throws Exception {
     ArrayList<URLCrawlDatum> list = new ArrayList<URLCrawlDatum>();
 
     list.add(createURLCrawlDatum("http://a.example.com/index.html", 1, 1));
@@ -199,7 +197,8 @@
 
     Configuration myConfiguration = new Configuration(conf);
     myConfiguration.setInt(Generator.GENERATOR_MAX_COUNT, 1);
-    myConfiguration.set(Generator.GENERATOR_COUNT_MODE, Generator.GENERATOR_COUNT_VALUE_DOMAIN);
+    myConfiguration.set(Generator.GENERATOR_COUNT_MODE,
+        Generator.GENERATOR_COUNT_VALUE_DOMAIN);
 
     Path generatedSegment = generateFetchlist(Integer.MAX_VALUE,
         myConfiguration, false);
@@ -214,7 +213,8 @@
 
     myConfiguration = new Configuration(myConfiguration);
     myConfiguration.setInt(Generator.GENERATOR_MAX_COUNT, 2);
-    generatedSegment = generateFetchlist(Integer.MAX_VALUE, myConfiguration, false);
+    generatedSegment = generateFetchlist(Integer.MAX_VALUE, myConfiguration,
+        false);
 
     fetchlistPath = new Path(new Path(generatedSegment,
         CrawlDatum.GENERATE_DIR_NAME), "part-00000");
@@ -240,10 +240,11 @@
 
   /**
    * Test generator obeys the filter setting.
-   * @throws Exception 
-   * @throws IOException 
+   * 
+   * @throws Exception
+   * @throws IOException
    */
-  public void testFilter() throws IOException, Exception{
+  public void testFilter() throws IOException, Exception {
 
     ArrayList<URLCrawlDatum> list = new ArrayList<URLCrawlDatum>();
 
@@ -261,7 +262,8 @@
 
     assertNull("should be null (0 entries)", generatedSegment);
 
-    generatedSegment = generateFetchlist(Integer.MAX_VALUE, myConfiguration, false);
+    generatedSegment = generateFetchlist(Integer.MAX_VALUE, myConfiguration,
+        false);
 
     Path fetchlistPath = new Path(new Path(generatedSegment,
         CrawlDatum.GENERATE_DIR_NAME), "part-00000");
@@ -273,14 +275,16 @@
 
   }
 
-
   /**
    * Read contents of fetchlist.
-   * @param fetchlist  path to Generated fetchlist
+   * 
+   * @param fetchlist
+   *          path to Generated fetchlist
    * @return Generated {@link URLCrawlDatum} objects
    * @throws IOException
    */
-  private ArrayList<URLCrawlDatum> readContents(Path fetchlist) throws IOException {
+  private ArrayList<URLCrawlDatum> readContents(Path fetchlist)
+      throws IOException {
     // verify results
     SequenceFile.Reader reader = new SequenceFile.Reader(fs, fetchlist, conf);
 
@@ -301,8 +305,11 @@
 
   /**
    * Generate Fetchlist.
-   * @param numResults number of results to generate
-   * @param config Configuration to use
+   * 
+   * @param numResults
+   *          number of results to generate
+   * @param config
+   *          Configuration to use
    * @return path to generated segment
    * @throws IOException
    */
@@ -312,14 +319,16 @@
     Generator g = new Generator(config);
     Path[] generatedSegment = g.generate(dbDir, segmentsDir, -1, numResults,
         Long.MAX_VALUE, filter, false);
-    if (generatedSegment==null) return null;
+    if (generatedSegment == null)
+      return null;
     return generatedSegment[0];
   }
 
   /**
    * Creates CrawlDB.
-   *
-   * @param list database contents
+   * 
+   * @param list
+   *          database contents
    * @throws IOException
    * @throws Exception
    */
@@ -336,9 +345,13 @@
 
   /**
    * Constructs new {@link URLCrawlDatum} from submitted parameters.
-   * @param url url to use
-   * @param fetchInterval {@link CrawlDatum#setFetchInterval(float)}
-   * @param score {@link CrawlDatum#setScore(float)}
+   * 
+   * @param url
+   *          url to use
+   * @param fetchInterval
+   *          {@link CrawlDatum#setFetchInterval(float)}
+   * @param score
+   *          {@link CrawlDatum#setScore(float)}
    * @return Constructed object
    */
   private URLCrawlDatum createURLCrawlDatum(final String url,
Index: src/test/org/apache/nutch/crawl/TestSignatureFactory.java
===================================================================
--- src/test/org/apache/nutch/crawl/TestSignatureFactory.java	(revision 1188252)
+++ src/test/org/apache/nutch/crawl/TestSignatureFactory.java	(working copy)
@@ -24,9 +24,9 @@
 public class TestSignatureFactory extends TestCase {
 
   public void testGetSignature() {
-    Configuration conf=NutchConfiguration.create();
-    Signature signature1=SignatureFactory.getSignature(conf);
-    Signature signature2=SignatureFactory.getSignature(conf);
+    Configuration conf = NutchConfiguration.create();
+    Signature signature1 = SignatureFactory.getSignature(conf);
+    Signature signature2 = SignatureFactory.getSignature(conf);
     assertNotNull(signature1);
     assertNotNull(signature2);
     assertEquals(signature1, signature2);
Index: src/test/org/apache/nutch/crawl/CrawlDBTestUtil.java
===================================================================
--- src/test/org/apache/nutch/crawl/CrawlDBTestUtil.java	(revision 1188252)
+++ src/test/org/apache/nutch/crawl/CrawlDBTestUtil.java	(working copy)
@@ -35,10 +35,10 @@
 import org.mortbay.jetty.handler.ContextHandler;
 import org.mortbay.jetty.handler.ResourceHandler;
 
-
 public class CrawlDBTestUtil {
 
-  private static final Logger LOG = LoggerFactory.getLogger(CrawlDBTestUtil.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(CrawlDBTestUtil.class);
 
   /**
    * Creates synthetic crawldb
@@ -51,12 +51,12 @@
    *          urls to be inserted, objects are of type URLCrawlDatum
    * @throws Exception
    */
-  public static void createCrawlDb(Configuration conf, FileSystem fs, Path crawldb, List<URLCrawlDatum> init)
-      throws Exception {
+  public static void createCrawlDb(Configuration conf, FileSystem fs,
+      Path crawldb, List<URLCrawlDatum> init) throws Exception {
     LOG.trace("* creating crawldb: " + crawldb);
     Path dir = new Path(crawldb, CrawlDb.CURRENT_NAME);
-    MapFile.Writer writer = new MapFile.Writer(conf, fs, new Path(dir, "part-00000")
-        .toString(), Text.class, CrawlDatum.class);
+    MapFile.Writer writer = new MapFile.Writer(conf, fs, new Path(dir,
+        "part-00000").toString(), Text.class, CrawlDatum.class);
     Iterator<URLCrawlDatum> it = init.iterator();
     while (it.hasNext()) {
       URLCrawlDatum row = it.next();
@@ -68,24 +68,24 @@
 
   /**
    * For now we need to manually construct our Configuration, because we need to
-   * override the default one and it is currently not possible to use dynamically
-   * set values.
+   * override the default one and it is currently not possible to use
+   * dynamically set values.
    * 
    * @return
    * @deprecated Use {@link #createConfiguration()} instead
    */
-  public static Configuration create(){
+  public static Configuration create() {
     return createConfiguration();
   }
 
   /**
    * For now we need to manually construct our Configuration, because we need to
-   * override the default one and it is currently not possible to use dynamically
-   * set values.
+   * override the default one and it is currently not possible to use
+   * dynamically set values.
    * 
    * @return
    */
-  public static Configuration createConfiguration(){
+  public static Configuration createConfiguration() {
     Configuration conf = new Configuration();
     conf.addResource("nutch-default.xml");
     conf.addResource("crawl-tests.xml");
@@ -103,34 +103,39 @@
       this.datum = datum;
     }
   }
-  
+
   /**
    * Generate seedlist
-   * @throws IOException 
+   * 
+   * @throws IOException
    */
-  public static void generateSeedList(FileSystem fs, Path urlPath, List<String> contents) throws IOException{
+  public static void generateSeedList(FileSystem fs, Path urlPath,
+      List<String> contents) throws IOException {
     FSDataOutputStream out;
-    Path file=new Path(urlPath,"urls.txt");
+    Path file = new Path(urlPath, "urls.txt");
     fs.mkdirs(urlPath);
-    out=fs.create(file);
-    Iterator<String> iterator=contents.iterator();
-    while(iterator.hasNext()){
-      String url=iterator.next();
+    out = fs.create(file);
+    Iterator<String> iterator = contents.iterator();
+    while (iterator.hasNext()) {
+      String url = iterator.next();
       out.writeBytes(url);
       out.writeBytes("\n");
     }
     out.flush();
     out.close();
   }
-  
+
   /**
    * Creates a new JettyServer with one static root context
    * 
-   * @param port port to listen to
-   * @param staticContent folder where static content lives
-   * @throws UnknownHostException 
+   * @param port
+   *          port to listen to
+   * @param staticContent
+   *          folder where static content lives
+   * @throws UnknownHostException
    */
-  public static Server getServer(int port, String staticContent) throws UnknownHostException{
+  public static Server getServer(int port, String staticContent)
+      throws UnknownHostException {
     Server webServer = new org.mortbay.jetty.Server();
     SocketConnector listener = new SocketConnector();
     listener.setPort(port);
Index: src/test/org/apache/nutch/crawl/TestInjector.java
===================================================================
--- src/test/org/apache/nutch/crawl/TestInjector.java	(revision 1188252)
+++ src/test/org/apache/nutch/crawl/TestInjector.java	(working copy)
@@ -30,12 +30,9 @@
 import junit.framework.TestCase;
 
 /**
- * Basic injector test:
- * 1. Creates a text file with urls
- * 2. Injects them into crawldb
- * 3. Reads crawldb entries and verifies contents
- * 4. Injects more urls into webdb
- * 5. Reads crawldb entries and verifies contents
+ * Basic injector test: 1. Creates a text file with urls 2. Injects them into
+ * crawldb 3. Reads crawldb entries and verifies contents 4. Injects more urls
+ * into webdb 5. Reads crawldb entries and verifies contents
  * 
  * @author nutch-dev <nutch-dev at lucene.apache.org>
  */
@@ -43,80 +40,82 @@
 
   private Configuration conf;
   private FileSystem fs;
-  final static Path testdir=new Path("build/test/inject-test");
+  final static Path testdir = new Path("build/test/inject-test");
   Path crawldbPath;
   Path urlPath;
-  
+
   protected void setUp() throws Exception {
     conf = CrawlDBTestUtil.createConfiguration();
-    urlPath=new Path(testdir,"urls");
-    crawldbPath=new Path(testdir,"crawldb");
-    fs=FileSystem.get(conf);
-    if (fs.exists(urlPath)) fs.delete(urlPath, false);
-    if (fs.exists(crawldbPath)) fs.delete(crawldbPath, true);
+    urlPath = new Path(testdir, "urls");
+    crawldbPath = new Path(testdir, "crawldb");
+    fs = FileSystem.get(conf);
+    if (fs.exists(urlPath))
+      fs.delete(urlPath, false);
+    if (fs.exists(crawldbPath))
+      fs.delete(crawldbPath, true);
   }
-  
-  protected void tearDown() throws IOException{
+
+  protected void tearDown() throws IOException {
     fs.delete(testdir, true);
   }
 
   public void testInject() throws IOException {
-    ArrayList<String> urls=new ArrayList<String>();
-    for(int i=0;i<100;i++) {
+    ArrayList<String> urls = new ArrayList<String>();
+    for (int i = 0; i < 100; i++) {
       urls.add("http://zzz.com/" + i + ".html");
     }
     CrawlDBTestUtil.generateSeedList(fs, urlPath, urls);
-    
-    Injector injector=new Injector(conf);
+
+    Injector injector = new Injector(conf);
     injector.inject(crawldbPath, urlPath);
-    
+
     // verify results
-    List<String>read=readCrawldb();
-    
+    List<String> read = readCrawldb();
+
     Collections.sort(read);
     Collections.sort(urls);
 
     assertEquals(urls.size(), read.size());
-    
+
     assertTrue(read.containsAll(urls));
     assertTrue(urls.containsAll(read));
-    
-    //inject more urls
-    ArrayList<String> urls2=new ArrayList<String>();
-    for(int i=0;i<100;i++) {
+
+    // inject more urls
+    ArrayList<String> urls2 = new ArrayList<String>();
+    for (int i = 0; i < 100; i++) {
       urls2.add("http://xxx.com/" + i + ".html");
     }
     CrawlDBTestUtil.generateSeedList(fs, urlPath, urls2);
     injector.inject(crawldbPath, urlPath);
     urls.addAll(urls2);
-    
+
     // verify results
-    read=readCrawldb();
-    
+    read = readCrawldb();
 
     Collections.sort(read);
     Collections.sort(urls);
 
     assertEquals(urls.size(), read.size());
-    
+
     assertTrue(read.containsAll(urls));
     assertTrue(urls.containsAll(read));
-    
+
   }
-  
-  private List<String> readCrawldb() throws IOException{
-    Path dbfile=new Path(crawldbPath,CrawlDb.CURRENT_NAME + "/part-00000/data");
+
+  private List<String> readCrawldb() throws IOException {
+    Path dbfile = new Path(crawldbPath, CrawlDb.CURRENT_NAME
+        + "/part-00000/data");
     System.out.println("reading:" + dbfile);
-    SequenceFile.Reader reader=new SequenceFile.Reader(fs, dbfile, conf);
-    ArrayList<String> read=new ArrayList<String>();
-    
-    READ:
-      do {
-      Text key=new Text();
-      CrawlDatum value=new CrawlDatum();
-      if(!reader.next(key, value)) break READ;
+    SequenceFile.Reader reader = new SequenceFile.Reader(fs, dbfile, conf);
+    ArrayList<String> read = new ArrayList<String>();
+
+    READ: do {
+      Text key = new Text();
+      CrawlDatum value = new CrawlDatum();
+      if (!reader.next(key, value))
+        break READ;
       read.add(key.toString());
-    } while(true);
+    } while (true);
 
     return read;
   }
Index: src/test/org/apache/nutch/parse/TestParseText.java
===================================================================
--- src/test/org/apache/nutch/parse/TestParseText.java	(revision 1188252)
+++ src/test/org/apache/nutch/parse/TestParseText.java	(working copy)
@@ -24,15 +24,17 @@
 /** Unit tests for ParseText. */
 
 public class TestParseText extends TestCase {
-  public TestParseText(String name) { super(name); }
+  public TestParseText(String name) {
+    super(name);
+  }
 
   public void testParseText() throws Exception {
 
     String page = "Hello World The Quick Brown Fox Jumped Over the Lazy Fox";
 
     ParseText s = new ParseText(page);
-                        
+
     WritableTestUtils.testWritable(s);
   }
-	
+
 }
Index: src/test/org/apache/nutch/parse/TestOutlinkExtractor.java
===================================================================
--- src/test/org/apache/nutch/parse/TestOutlinkExtractor.java	(revision 1188252)
+++ src/test/org/apache/nutch/parse/TestOutlinkExtractor.java	(working copy)
@@ -14,7 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
- 
+
 package org.apache.nutch.parse;
 
 import org.apache.nutch.parse.Outlink;
@@ -34,47 +34,57 @@
 public class TestOutlinkExtractor extends TestCase {
 
   private static Configuration conf = NutchConfiguration.create();
+
   public void testGetNoOutlinks() {
-    Outlink[]  outlinks = null;
-            
+    Outlink[] outlinks = null;
+
     outlinks = OutlinkExtractor.getOutlinks(null, conf);
     assertNotNull(outlinks);
     assertEquals(0, outlinks.length);
-    
+
     outlinks = OutlinkExtractor.getOutlinks("", conf);
     assertNotNull(outlinks);
     assertEquals(0, outlinks.length);
   }
-  
+
   public void testGetOutlinksHttp() {
-    Outlink[] outlinks = OutlinkExtractor.getOutlinks(
-        "Test with http://www.nutch.org/index.html is it found? " +
-        "What about www.google.com at http://www.google.de " +
-        "A longer URL could be http://www.sybit.com/solutions/portals.html", conf);
-    
+    Outlink[] outlinks = OutlinkExtractor
+        .getOutlinks(
+            "Test with http://www.nutch.org/index.html is it found? "
+                + "What about www.google.com at http://www.google.de "
+                + "A longer URL could be http://www.sybit.com/solutions/portals.html",
+            conf);
+
     assertTrue("Url not found!", outlinks.length == 3);
-    assertEquals("Wrong URL", "http://www.nutch.org/index.html", outlinks[0].getToUrl());
+    assertEquals("Wrong URL", "http://www.nutch.org/index.html",
+        outlinks[0].getToUrl());
     assertEquals("Wrong URL", "http://www.google.de", outlinks[1].getToUrl());
-    assertEquals("Wrong URL", "http://www.sybit.com/solutions/portals.html", outlinks[2].getToUrl());
+    assertEquals("Wrong URL", "http://www.sybit.com/solutions/portals.html",
+        outlinks[2].getToUrl());
   }
-  
+
   public void testGetOutlinksHttp2() {
-    Outlink[] outlinks = OutlinkExtractor.getOutlinks(
-        "Test with http://www.nutch.org/index.html is it found? " +
-        "What about www.google.com at http://www.google.de " +
-        "A longer URL could be http://www.sybit.com/solutions/portals.html", "http://www.sybit.de", conf);
-    
+    Outlink[] outlinks = OutlinkExtractor
+        .getOutlinks(
+            "Test with http://www.nutch.org/index.html is it found? "
+                + "What about www.google.com at http://www.google.de "
+                + "A longer URL could be http://www.sybit.com/solutions/portals.html",
+            "http://www.sybit.de", conf);
+
     assertTrue("Url not found!", outlinks.length == 3);
-    assertEquals("Wrong URL", "http://www.nutch.org/index.html", outlinks[0].getToUrl());
+    assertEquals("Wrong URL", "http://www.nutch.org/index.html",
+        outlinks[0].getToUrl());
     assertEquals("Wrong URL", "http://www.google.de", outlinks[1].getToUrl());
-    assertEquals("Wrong URL", "http://www.sybit.com/solutions/portals.html", outlinks[2].getToUrl());
+    assertEquals("Wrong URL", "http://www.sybit.com/solutions/portals.html",
+        outlinks[2].getToUrl());
   }
+
   public void testGetOutlinksFtp() {
     Outlink[] outlinks = OutlinkExtractor.getOutlinks(
-        "Test with ftp://www.nutch.org is it found? " +
-        "What about www.google.com at ftp://www.google.de", conf);
-    
-    assertTrue("Url not found!", outlinks.length >1);
+        "Test with ftp://www.nutch.org is it found? "
+            + "What about www.google.com at ftp://www.google.de", conf);
+
+    assertTrue("Url not found!", outlinks.length > 1);
     assertEquals("Wrong URL", "ftp://www.nutch.org", outlinks[0].getToUrl());
     assertEquals("Wrong URL", "ftp://www.google.de", outlinks[1].getToUrl());
   }
Index: src/test/org/apache/nutch/parse/TestParserFactory.java
===================================================================
--- src/test/org/apache/nutch/parse/TestParserFactory.java	(revision 1188252)
+++ src/test/org/apache/nutch/parse/TestParserFactory.java	(working copy)
@@ -27,76 +27,80 @@
 
 /**
  * Unit test for new parse plugin selection.
- *
+ * 
  * @author Sebastien Le Callonnec
  * @version 1.0
  */
 public class TestParserFactory extends TestCase {
-	
+
   private Configuration conf;
   private ParserFactory parserFactory;
-    
-  public TestParserFactory(String name) { super(name); }
 
+  public TestParserFactory(String name) {
+    super(name);
+  }
+
   /** Inits the Test Case with the test parse-plugin file */
   protected void setUp() throws Exception {
-      conf = NutchConfiguration.create();
-      conf.set("plugin.includes", ".*");
-      conf.set("parse.plugin.file",
-               "org/apache/nutch/parse/parse-plugin-test.xml");
-      parserFactory = new ParserFactory(conf);
+    conf = NutchConfiguration.create();
+    conf.set("plugin.includes", ".*");
+    conf.set("parse.plugin.file",
+        "org/apache/nutch/parse/parse-plugin-test.xml");
+    parserFactory = new ParserFactory(conf);
   }
-    
+
   /** Unit test for <code>getExtensions(String)</code> method. */
   public void testGetExtensions() throws Exception {
-    Extension ext = (Extension)parserFactory.getExtensions("text/html").get(0);
+    Extension ext = (Extension) parserFactory.getExtensions("text/html").get(0);
     assertEquals("parse-tika", ext.getDescriptor().getPluginId());
-    ext = (Extension) parserFactory.getExtensions("text/html; charset=ISO-8859-1").get(0);
+    ext = (Extension) parserFactory.getExtensions(
+        "text/html; charset=ISO-8859-1").get(0);
     assertEquals("parse-tika", ext.getDescriptor().getPluginId());
-    ext = (Extension)parserFactory.getExtensions("foo/bar").get(0);
+    ext = (Extension) parserFactory.getExtensions("foo/bar").get(0);
     assertEquals("parse-tika", ext.getDescriptor().getPluginId());
   }
-  
+
   /** Unit test to check <code>getParsers</code> method */
   public void testGetParsers() throws Exception {
-    Parser [] parsers = parserFactory.getParsers("text/html", "http://foo.com");
+    Parser[] parsers = parserFactory.getParsers("text/html", "http://foo.com");
     assertNotNull(parsers);
     assertEquals(1, parsers.length);
-    assertEquals("org.apache.nutch.parse.tika.TikaParser",
-                 parsers[0].getClass().getName());
+    assertEquals("org.apache.nutch.parse.tika.TikaParser", parsers[0]
+        .getClass().getName());
 
     parsers = parserFactory.getParsers("text/html; charset=ISO-8859-1",
-                                       "http://foo.com");
+        "http://foo.com");
     assertNotNull(parsers);
     assertEquals(1, parsers.length);
-    assertEquals("org.apache.nutch.parse.tika.TikaParser",
-                 parsers[0].getClass().getName());
-    
+    assertEquals("org.apache.nutch.parse.tika.TikaParser", parsers[0]
+        .getClass().getName());
+
     parsers = parserFactory.getParsers("application/x-javascript",
-                                       "http://foo.com");
+        "http://foo.com");
     assertNotNull(parsers);
     assertEquals(1, parsers.length);
-    assertEquals("org.apache.nutch.parse.js.JSParseFilter",
-                 parsers[0].getClass().getName());
-    
+    assertEquals("org.apache.nutch.parse.js.JSParseFilter", parsers[0]
+        .getClass().getName());
+
     parsers = parserFactory.getParsers("text/plain", "http://foo.com");
     assertNotNull(parsers);
     assertEquals(1, parsers.length);
-    assertEquals("org.apache.nutch.parse.tika.TikaParser",
-                 parsers[0].getClass().getName());
-    
+    assertEquals("org.apache.nutch.parse.tika.TikaParser", parsers[0]
+        .getClass().getName());
+
     Parser parser1 = parserFactory.getParsers("text/plain", "http://foo.com")[0];
     Parser parser2 = parserFactory.getParsers("*", "http://foo.com")[0];
-   
+
     assertEquals("Different instances!", parser1.hashCode(), parser2.hashCode());
-    
-    //test and make sure that the rss parser is loaded even though its plugin.xml
-    //doesn't claim to support text/rss, only application/rss+xml
-    parsers = parserFactory.getParsers("text/rss","http://foo.com");
+
+    // test and make sure that the rss parser is loaded even though its
+    // plugin.xml
+    // doesn't claim to support text/rss, only application/rss+xml
+    parsers = parserFactory.getParsers("text/rss", "http://foo.com");
     assertNotNull(parsers);
-    assertEquals(1,parsers.length);
-    assertEquals("org.apache.nutch.parse.tika.TikaParser",
-                 parsers[0].getClass().getName());
+    assertEquals(1, parsers.length);
+    assertEquals("org.apache.nutch.parse.tika.TikaParser", parsers[0]
+        .getClass().getName());
   }
- 
+
 }
Index: src/test/org/apache/nutch/parse/TestParseData.java
===================================================================
--- src/test/org/apache/nutch/parse/TestParseData.java	(revision 1188252)
+++ src/test/org/apache/nutch/parse/TestParseData.java	(working copy)
@@ -28,38 +28,37 @@
 /** Unit tests for ParseData. */
 
 public class TestParseData extends TestCase {
-    
+
   private Configuration conf = NutchConfiguration.create();
-  
-  public TestParseData(String name) { super(name); }
 
+  public TestParseData(String name) {
+    super(name);
+  }
+
   public void testParseData() throws Exception {
 
     String title = "The Foo Page";
 
-    Outlink[] outlinks = new Outlink[] {
-      new Outlink("http://foo.com/", "Foo"),
-      new Outlink("http://bar.com/", "Bar")
-    };
+    Outlink[] outlinks = new Outlink[] { new Outlink("http://foo.com/", "Foo"),
+        new Outlink("http://bar.com/", "Bar") };
 
     Metadata metaData = new Metadata();
     metaData.add("Language", "en/us");
     metaData.add("Charset", "UTF-8");
 
-    ParseData r = new ParseData(ParseStatus.STATUS_SUCCESS, title, outlinks, metaData);
-                        
+    ParseData r = new ParseData(ParseStatus.STATUS_SUCCESS, title, outlinks,
+        metaData);
+
     WritableTestUtils.testWritable(r, null);
   }
-	
+
   public void testMaxOutlinks() throws Exception {
     Outlink[] outlinks = new Outlink[128];
-    for (int i=0; i<outlinks.length; i++) {
+    for (int i = 0; i < outlinks.length; i++) {
       outlinks[i] = new Outlink("http://outlink.com/" + i, "Outlink" + i);
     }
     ParseData original = new ParseData(ParseStatus.STATUS_SUCCESS,
-                                       "Max Outlinks Title",
-                                       outlinks,
-                                       new Metadata());
+        "Max Outlinks Title", outlinks, new Metadata());
     ParseData data = (ParseData) WritableTestUtils.writeRead(original, null);
     assertEquals(outlinks.length, data.getOutlinks().length);
   }
Index: src/test/org/apache/nutch/util/TestSuffixStringMatcher.java
===================================================================
--- src/test/org/apache/nutch/util/TestSuffixStringMatcher.java	(revision 1188252)
+++ src/test/org/apache/nutch/util/TestSuffixStringMatcher.java	(working copy)
@@ -21,101 +21,94 @@
 
 /** Unit tests for SuffixStringMatcher. */
 public class TestSuffixStringMatcher extends TestCase {
-  public TestSuffixStringMatcher(String name) { 
-    super(name); 
+  public TestSuffixStringMatcher(String name) {
+    super(name);
   }
 
-  private final static int NUM_TEST_ROUNDS= 20;
-  private final static int MAX_TEST_SUFFIXES= 100;
-  private final static int MAX_SUFFIX_LEN= 10;
-  private final static int NUM_TEST_INPUTS_PER_ROUND= 100;
-  private final static int MAX_INPUT_LEN= 20;
+  private final static int NUM_TEST_ROUNDS = 20;
+  private final static int MAX_TEST_SUFFIXES = 100;
+  private final static int MAX_SUFFIX_LEN = 10;
+  private final static int NUM_TEST_INPUTS_PER_ROUND = 100;
+  private final static int MAX_INPUT_LEN = 20;
 
-  private final static char[] alphabet= 
-    new char[] {
-      'a', 'b', 'c', 'd',
-//      'e', 'f', 'g', 'h', 'i', 'j',
-//      'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
-//      'u', 'v', 'w', 'x', 'y', 'z', '1', '2', '3', '4',
-//      '5', '6', '7', '8', '9', '0'
-    };
+  private final static char[] alphabet = new char[] { 'a', 'b', 'c', 'd',
+  // 'e', 'f', 'g', 'h', 'i', 'j',
+  // 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
+  // 'u', 'v', 'w', 'x', 'y', 'z', '1', '2', '3', '4',
+  // '5', '6', '7', '8', '9', '0'
+  };
 
   private String makeRandString(int minLen, int maxLen) {
-    int len= minLen + (int) (Math.random() * (maxLen - minLen));
-    char[] chars= new char[len];
-    
-    for (int pos= 0; pos < len; pos++) {
-      chars[pos]= alphabet[(int) (Math.random() * alphabet.length)];
+    int len = minLen + (int) (Math.random() * (maxLen - minLen));
+    char[] chars = new char[len];
+
+    for (int pos = 0; pos < len; pos++) {
+      chars[pos] = alphabet[(int) (Math.random() * alphabet.length)];
     }
-    
+
     return new String(chars);
   }
-  
+
   public void testSuffixMatcher() {
-    int numMatches= 0;
-    int numInputsTested= 0;
+    int numMatches = 0;
+    int numInputsTested = 0;
 
-    for (int round= 0; round < NUM_TEST_ROUNDS; round++) {
+    for (int round = 0; round < NUM_TEST_ROUNDS; round++) {
 
       // build list of suffixes
-      int numSuffixes= (int) (Math.random() * MAX_TEST_SUFFIXES);
-      String[] suffixes= new String[numSuffixes];
-      for (int i= 0; i < numSuffixes; i++) {
-        suffixes[i]= makeRandString(0, MAX_SUFFIX_LEN);
+      int numSuffixes = (int) (Math.random() * MAX_TEST_SUFFIXES);
+      String[] suffixes = new String[numSuffixes];
+      for (int i = 0; i < numSuffixes; i++) {
+        suffixes[i] = makeRandString(0, MAX_SUFFIX_LEN);
       }
 
-      SuffixStringMatcher sufmatcher= new SuffixStringMatcher(suffixes);
+      SuffixStringMatcher sufmatcher = new SuffixStringMatcher(suffixes);
 
       // test random strings for suffix matches
-      for (int i= 0; i < NUM_TEST_INPUTS_PER_ROUND; i++) {
-        String input= makeRandString(0, MAX_INPUT_LEN);
-        boolean matches= false;
-        int longestMatch= -1;
-        int shortestMatch= -1;
+      for (int i = 0; i < NUM_TEST_INPUTS_PER_ROUND; i++) {
+        String input = makeRandString(0, MAX_INPUT_LEN);
+        boolean matches = false;
+        int longestMatch = -1;
+        int shortestMatch = -1;
 
-        for (int j= 0; j < suffixes.length; j++) {
+        for (int j = 0; j < suffixes.length; j++) {
 
-          if ((suffixes[j].length() > 0) 
-              && input.endsWith(suffixes[j])) {
+          if ((suffixes[j].length() > 0) && input.endsWith(suffixes[j])) {
 
-            matches= true;
-            int matchSize= suffixes[j].length();
+            matches = true;
+            int matchSize = suffixes[j].length();
 
-            if (matchSize > longestMatch) 
-              longestMatch= matchSize;
+            if (matchSize > longestMatch)
+              longestMatch = matchSize;
 
-            if ( (matchSize < shortestMatch)
-                 || (shortestMatch == -1) )
-              shortestMatch= matchSize;
+            if ((matchSize < shortestMatch) || (shortestMatch == -1))
+              shortestMatch = matchSize;
           }
 
         }
 
-        if (matches) 
+        if (matches)
           numMatches++;
 
         numInputsTested++;
 
-        assertTrue( "'" + input + "' should " + (matches ? "" : "not ") 
-                    + "match!",
-                    matches == sufmatcher.matches(input) );
+        assertTrue("'" + input + "' should " + (matches ? "" : "not ")
+            + "match!", matches == sufmatcher.matches(input));
         if (matches) {
-          assertTrue( shortestMatch 
-                      == sufmatcher.shortestMatch(input).length());
-          assertTrue( input.substring(input.length() - shortestMatch).equals(
-                        sufmatcher.shortestMatch(input)) );
+          assertTrue(shortestMatch == sufmatcher.shortestMatch(input).length());
+          assertTrue(input.substring(input.length() - shortestMatch).equals(
+              sufmatcher.shortestMatch(input)));
 
-          assertTrue( longestMatch 
-                      == sufmatcher.longestMatch(input).length());
-          assertTrue( input.substring(input.length() - longestMatch).equals(
-                        sufmatcher.longestMatch(input)) );
+          assertTrue(longestMatch == sufmatcher.longestMatch(input).length());
+          assertTrue(input.substring(input.length() - longestMatch).equals(
+              sufmatcher.longestMatch(input)));
 
         }
       }
     }
 
-    System.out.println("got " + numMatches + " matches out of " 
-                       + numInputsTested + " tests");
+    System.out.println("got " + numMatches + " matches out of "
+        + numInputsTested + " tests");
   }
 
 }
Index: src/test/org/apache/nutch/util/TestURLUtil.java
===================================================================
--- src/test/org/apache/nutch/util/TestURLUtil.java	(revision 1188252)
+++ src/test/org/apache/nutch/util/TestURLUtil.java	(working copy)
@@ -22,17 +22,14 @@
 import junit.framework.TestCase;
 
 /** Test class for URLUtil */
-public class TestURLUtil
-  extends TestCase {
+public class TestURLUtil extends TestCase {
 
   @Override
-  protected void setUp()
-    throws Exception {
+  protected void setUp() throws Exception {
     super.setUp();
   }
 
-  public void testGetDomainName()
-    throws Exception {
+  public void testGetDomainName() throws Exception {
 
     URL url = null;
 
@@ -81,8 +78,7 @@
 
   }
 
-  public void testGetDomainSuffix()
-    throws Exception {
+  public void testGetDomainSuffix() throws Exception {
     URL url = null;
 
     url = new URL("http://lucene.apache.org/nutch");
@@ -133,8 +129,7 @@
 
   }
 
-  public void testGetHostSegments()
-    throws Exception {
+  public void testGetHostSegments() throws Exception {
     URL url;
     String[] segments;
 
@@ -165,9 +160,8 @@
 
   }
 
-  public void testChooseRepr()
-    throws Exception {
-    
+  public void testChooseRepr() throws Exception {
+
     String aDotCom = "http://www.a.com";
     String bDotCom = "http://www.b.com";
     String aSubDotCom = "http://www.news.a.com";
@@ -175,40 +169,41 @@
     String aPath = "http://www.a.com/xyz/index.html";
     String aPath2 = "http://www.a.com/abc/page.html";
     String aPath3 = "http://www.news.a.com/abc/page.html";
-    
+
     // 1) different domain them keep dest, temp or perm
     // a.com -> b.com*
     assertEquals(bDotCom, URLUtil.chooseRepr(aDotCom, bDotCom, true));
     assertEquals(bDotCom, URLUtil.chooseRepr(aDotCom, bDotCom, false));
-    
+
     // 2) permanent and root, keep src
     // *a.com -> a.com?y=1 || *a.com -> a.com/xyz/index.html
     assertEquals(aDotCom, URLUtil.chooseRepr(aDotCom, aQStr, false));
     assertEquals(aDotCom, URLUtil.chooseRepr(aDotCom, aPath, false));
-    
-    //3) permanent and not root and dest root, keep dest
-    //a.com/xyz/index.html -> a.com*
+
+    // 3) permanent and not root and dest root, keep dest
+    // a.com/xyz/index.html -> a.com*
     assertEquals(aDotCom, URLUtil.chooseRepr(aPath, aDotCom, false));
-    
-    //4) permanent and neither root keep dest
+
+    // 4) permanent and neither root keep dest
     // a.com/xyz/index.html -> a.com/abc/page.html*
     assertEquals(aPath2, URLUtil.chooseRepr(aPath, aPath2, false));
-    
-    //5) temp and root and dest not root keep src
-    //*a.com -> a.com/xyz/index.html
+
+    // 5) temp and root and dest not root keep src
+    // *a.com -> a.com/xyz/index.html
     assertEquals(aDotCom, URLUtil.chooseRepr(aDotCom, aPath, true));
-    
-    //6) temp and not root and dest root keep dest
+
+    // 6) temp and not root and dest root keep dest
     // a.com/xyz/index.html -> a.com*
     assertEquals(aDotCom, URLUtil.chooseRepr(aPath, aDotCom, true));
 
-    //7) temp and neither root, keep shortest, if hosts equal by path else by hosts
-    //  a.com/xyz/index.html -> a.com/abc/page.html*
+    // 7) temp and neither root, keep shortest, if hosts equal by path else by
+    // hosts
+    // a.com/xyz/index.html -> a.com/abc/page.html*
     // *www.a.com/xyz/index.html -> www.news.a.com/xyz/index.html
     assertEquals(aPath2, URLUtil.chooseRepr(aPath, aPath2, true));
     assertEquals(aPath, URLUtil.chooseRepr(aPath, aPath3, true));
 
-    //8) temp and both root keep shortest sub domain
+    // 8) temp and both root keep shortest sub domain
     // *www.a.com -> www.news.a.com
     assertEquals(aDotCom, URLUtil.chooseRepr(aDotCom, aSubDotCom, true));
   }
@@ -216,30 +211,18 @@
   // from RFC3986 section 5.4.1
   private static String baseString = "http://a/b/c/d;p?q";
   private static String[][] targets = new String[][] {
-    // unknown protocol {"g:h"           ,  "g:h"},
-    {"g"             ,  "http://a/b/c/g"},
-    { "./g"           ,  "http://a/b/c/g"},
-    { "g/"            ,  "http://a/b/c/g/"},
-    { "/g"            ,  "http://a/g"},
-    { "//g"           ,  "http://g"},
-    { "?y"            ,  "http://a/b/c/d;p?y"},
-    { "g?y"           ,  "http://a/b/c/g?y"},
-    { "#s"            ,  "http://a/b/c/d;p?q#s"},
-    { "g#s"           ,  "http://a/b/c/g#s"},
-    { "g?y#s"         ,  "http://a/b/c/g?y#s"},
-    { ";x"            ,  "http://a/b/c/;x"},
-    { "g;x"           ,  "http://a/b/c/g;x"},
-    { "g;x?y#s"       ,  "http://a/b/c/g;x?y#s"},
-    { ""              ,  "http://a/b/c/d;p?q"},
-    { "."             ,  "http://a/b/c/"},
-    { "./"            ,  "http://a/b/c/"},
-    { ".."            ,  "http://a/b/"},
-    { "../"           ,  "http://a/b/"},
-    { "../g"          ,  "http://a/b/g"},
-    { "../.."         ,  "http://a/"},
-    { "../../"        ,  "http://a/"},
-    { "../../g"       ,  "http://a/g"}
-  };
+      // unknown protocol {"g:h" , "g:h"},
+      { "g", "http://a/b/c/g" }, { "./g", "http://a/b/c/g" },
+      { "g/", "http://a/b/c/g/" }, { "/g", "http://a/g" },
+      { "//g", "http://g" }, { "?y", "http://a/b/c/d;p?y" },
+      { "g?y", "http://a/b/c/g?y" }, { "#s", "http://a/b/c/d;p?q#s" },
+      { "g#s", "http://a/b/c/g#s" }, { "g?y#s", "http://a/b/c/g?y#s" },
+      { ";x", "http://a/b/c/;x" }, { "g;x", "http://a/b/c/g;x" },
+      { "g;x?y#s", "http://a/b/c/g;x?y#s" }, { "", "http://a/b/c/d;p?q" },
+      { ".", "http://a/b/c/" }, { "./", "http://a/b/c/" },
+      { "..", "http://a/b/" }, { "../", "http://a/b/" },
+      { "../g", "http://a/b/g" }, { "../..", "http://a/" },
+      { "../../", "http://a/" }, { "../../g", "http://a/g" } };
 
   public void testResolveURL() throws Exception {
     // test NUTCH-436
@@ -250,7 +233,8 @@
     // test NUTCH-566
     URL u566 = new URL("http://www.fleurie.org/entreprise.asp");
     abs = URLUtil.resolveURL(u566, "?id_entrep=111");
-    assertEquals("http://www.fleurie.org/entreprise.asp?id_entrep=111", abs.toString());
+    assertEquals("http://www.fleurie.org/entreprise.asp?id_entrep=111",
+        abs.toString());
     URL base = new URL(baseString);
     assertEquals("base url parsing", baseString, base.toString());
     for (int i = 0; i < targets.length; i++) {
Index: src/test/org/apache/nutch/util/TestStringUtil.java
===================================================================
--- src/test/org/apache/nutch/util/TestStringUtil.java	(revision 1188252)
+++ src/test/org/apache/nutch/util/TestStringUtil.java	(working copy)
@@ -21,41 +21,41 @@
 
 /** Unit tests for StringUtil methods. */
 public class TestStringUtil extends TestCase {
-  public TestStringUtil(String name) { 
-    super(name); 
+  public TestStringUtil(String name) {
+    super(name);
   }
 
   public void testRightPad() {
-    String s= "my string";
+    String s = "my string";
 
-    String ps= StringUtil.rightPad(s, 0);
+    String ps = StringUtil.rightPad(s, 0);
     assertTrue(s.equals(ps));
 
-    ps= StringUtil.rightPad(s, 9);
+    ps = StringUtil.rightPad(s, 9);
     assertTrue(s.equals(ps));
 
-    ps= StringUtil.rightPad(s, 10);
-    assertTrue( (s+" ").equals(ps) );
+    ps = StringUtil.rightPad(s, 10);
+    assertTrue((s + " ").equals(ps));
 
-    ps= StringUtil.rightPad(s, 15);
-    assertTrue( (s+"      ").equals(ps) );
+    ps = StringUtil.rightPad(s, 15);
+    assertTrue((s + "      ").equals(ps));
 
   }
 
   public void testLeftPad() {
-    String s= "my string";
+    String s = "my string";
 
-    String ps= StringUtil.leftPad(s, 0);
+    String ps = StringUtil.leftPad(s, 0);
     assertTrue(s.equals(ps));
 
-    ps= StringUtil.leftPad(s, 9);
+    ps = StringUtil.leftPad(s, 9);
     assertTrue(s.equals(ps));
 
-    ps= StringUtil.leftPad(s, 10);
-    assertTrue( (" "+s).equals(ps) );
+    ps = StringUtil.leftPad(s, 10);
+    assertTrue((" " + s).equals(ps));
 
-    ps= StringUtil.leftPad(s, 15);
-    assertTrue( ("      "+s).equals(ps) );
+    ps = StringUtil.leftPad(s, 15);
+    assertTrue(("      " + s).equals(ps));
 
   }
 
Index: src/test/org/apache/nutch/util/TestPrefixStringMatcher.java
===================================================================
--- src/test/org/apache/nutch/util/TestPrefixStringMatcher.java	(revision 1188252)
+++ src/test/org/apache/nutch/util/TestPrefixStringMatcher.java	(working copy)
@@ -21,101 +21,94 @@
 
 /** Unit tests for PrefixStringMatcher. */
 public class TestPrefixStringMatcher extends TestCase {
-  public TestPrefixStringMatcher(String name) { 
-    super(name); 
+  public TestPrefixStringMatcher(String name) {
+    super(name);
   }
 
-  private final static int NUM_TEST_ROUNDS= 20;
-  private final static int MAX_TEST_PREFIXES= 100;
-  private final static int MAX_PREFIX_LEN= 10;
-  private final static int NUM_TEST_INPUTS_PER_ROUND= 100;
-  private final static int MAX_INPUT_LEN= 20;
+  private final static int NUM_TEST_ROUNDS = 20;
+  private final static int MAX_TEST_PREFIXES = 100;
+  private final static int MAX_PREFIX_LEN = 10;
+  private final static int NUM_TEST_INPUTS_PER_ROUND = 100;
+  private final static int MAX_INPUT_LEN = 20;
 
-  private final static char[] alphabet= 
-    new char[] {
-      'a', 'b', 'c', 'd',
-//      'e', 'f', 'g', 'h', 'i', 'j',
-//      'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
-//      'u', 'v', 'w', 'x', 'y', 'z', '1', '2', '3', '4',
-//      '5', '6', '7', '8', '9', '0'
-    };
+  private final static char[] alphabet = new char[] { 'a', 'b', 'c', 'd',
+  // 'e', 'f', 'g', 'h', 'i', 'j',
+  // 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
+  // 'u', 'v', 'w', 'x', 'y', 'z', '1', '2', '3', '4',
+  // '5', '6', '7', '8', '9', '0'
+  };
 
   private String makeRandString(int minLen, int maxLen) {
-    int len= minLen + (int) (Math.random() * (maxLen - minLen));
-    char[] chars= new char[len];
-    
-    for (int pos= 0; pos < len; pos++) {
-      chars[pos]= alphabet[(int) (Math.random() * alphabet.length)];
+    int len = minLen + (int) (Math.random() * (maxLen - minLen));
+    char[] chars = new char[len];
+
+    for (int pos = 0; pos < len; pos++) {
+      chars[pos] = alphabet[(int) (Math.random() * alphabet.length)];
     }
-    
+
     return new String(chars);
   }
-  
+
   public void testPrefixMatcher() {
-    int numMatches= 0;
-    int numInputsTested= 0;
+    int numMatches = 0;
+    int numInputsTested = 0;
 
-    for (int round= 0; round < NUM_TEST_ROUNDS; round++) {
+    for (int round = 0; round < NUM_TEST_ROUNDS; round++) {
 
       // build list of prefixes
-      int numPrefixes= (int) (Math.random() * MAX_TEST_PREFIXES);
-      String[] prefixes= new String[numPrefixes];
-      for (int i= 0; i < numPrefixes; i++) {
-        prefixes[i]= makeRandString(0, MAX_PREFIX_LEN);
+      int numPrefixes = (int) (Math.random() * MAX_TEST_PREFIXES);
+      String[] prefixes = new String[numPrefixes];
+      for (int i = 0; i < numPrefixes; i++) {
+        prefixes[i] = makeRandString(0, MAX_PREFIX_LEN);
       }
 
-      PrefixStringMatcher prematcher= new PrefixStringMatcher(prefixes);
+      PrefixStringMatcher prematcher = new PrefixStringMatcher(prefixes);
 
       // test random strings for prefix matches
-      for (int i= 0; i < NUM_TEST_INPUTS_PER_ROUND; i++) {
-        String input= makeRandString(0, MAX_INPUT_LEN);
-        boolean matches= false;
-        int longestMatch= -1;
-        int shortestMatch= -1;
+      for (int i = 0; i < NUM_TEST_INPUTS_PER_ROUND; i++) {
+        String input = makeRandString(0, MAX_INPUT_LEN);
+        boolean matches = false;
+        int longestMatch = -1;
+        int shortestMatch = -1;
 
-        for (int j= 0; j < prefixes.length; j++) {
+        for (int j = 0; j < prefixes.length; j++) {
 
-          if ((prefixes[j].length() > 0) 
-              && input.startsWith(prefixes[j])) {
+          if ((prefixes[j].length() > 0) && input.startsWith(prefixes[j])) {
 
-            matches= true;
-            int matchSize= prefixes[j].length();
+            matches = true;
+            int matchSize = prefixes[j].length();
 
-            if (matchSize > longestMatch) 
-              longestMatch= matchSize;
+            if (matchSize > longestMatch)
+              longestMatch = matchSize;
 
-            if ( (matchSize < shortestMatch)
-                 || (shortestMatch == -1) )
-              shortestMatch= matchSize;
+            if ((matchSize < shortestMatch) || (shortestMatch == -1))
+              shortestMatch = matchSize;
           }
 
         }
 
-        if (matches) 
+        if (matches)
           numMatches++;
 
         numInputsTested++;
 
-        assertTrue( "'" + input + "' should " + (matches ? "" : "not ") 
-                    + "match!",
-                    matches == prematcher.matches(input) );
+        assertTrue("'" + input + "' should " + (matches ? "" : "not ")
+            + "match!", matches == prematcher.matches(input));
         if (matches) {
-          assertTrue( shortestMatch 
-                      == prematcher.shortestMatch(input).length());
-          assertTrue( input.substring(0, shortestMatch).equals(
-                        prematcher.shortestMatch(input)) );
+          assertTrue(shortestMatch == prematcher.shortestMatch(input).length());
+          assertTrue(input.substring(0, shortestMatch).equals(
+              prematcher.shortestMatch(input)));
 
-          assertTrue( longestMatch 
-                      == prematcher.longestMatch(input).length());
-          assertTrue( input.substring(0, longestMatch).equals(
-                        prematcher.longestMatch(input)) );
+          assertTrue(longestMatch == prematcher.longestMatch(input).length());
+          assertTrue(input.substring(0, longestMatch).equals(
+              prematcher.longestMatch(input)));
 
         }
       }
     }
 
-    System.out.println("got " + numMatches + " matches out of " 
-                       + numInputsTested + " tests");
+    System.out.println("got " + numMatches + " matches out of "
+        + numInputsTested + " tests");
   }
 
 }
Index: src/test/org/apache/nutch/util/TestGZIPUtils.java
===================================================================
--- src/test/org/apache/nutch/util/TestGZIPUtils.java	(revision 1188252)
+++ src/test/org/apache/nutch/util/TestGZIPUtils.java	(working copy)
@@ -23,223 +23,216 @@
 
 /** Unit tests for GZIPUtils methods. */
 public class TestGZIPUtils extends TestCase {
-  public TestGZIPUtils(String name) { 
-    super(name); 
+  public TestGZIPUtils(String name) {
+    super(name);
   }
 
   /* a short, highly compressable, string */
-  String SHORT_TEST_STRING= 
-    "aaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbcccccccccccccccc";
+  String SHORT_TEST_STRING = "aaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbcccccccccccccccc";
 
   /* a short, highly compressable, string */
-  String LONGER_TEST_STRING= 
-    SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING 
-    + SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING 
-    + SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING 
-    + SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING;
+  String LONGER_TEST_STRING = SHORT_TEST_STRING + SHORT_TEST_STRING
+      + SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING
+      + SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING
+      + SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING
+      + SHORT_TEST_STRING;
 
   /* a snapshot of the nutch webpage */
-  String WEBPAGE= 
-  "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n"
-  + "<html>\n"
-  + "<head>\n"
-  + "  <meta http-equiv=\"content-type\"\n"
-  + " content=\"text/html; charset=ISO-8859-1\">\n"
-  + "  <title>Nutch</title>\n"
-  + "</head>\n"
-  + "<body>\n"
-  + "<h1\n"
-  + " style=\"font-family: helvetica,arial,sans-serif; text-align: center; color: rgb(255, 153, 0);\"><a\n"
-  + " href=\"http://www.nutch.org/\"><font style=\"color: rgb(255, 153, 0);\">Nutch</font></a><br>\n"
-  + "<small>an open source web-search engine</small></h1>\n"
-  + "<hr style=\"width: 100%; height: 1px;\" noshade=\"noshade\">\n"
-  + "<table\n"
-  + " style=\"width: 100%; text-align: left; margin-left: auto; margin-right: auto;\"\n"
-  + " border=\"0\" cellspacing=\"0\" cellpadding=\"0\">\n"
-  + "  <tbody>\n"
-  + "    <tr>\n"
-  + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
-  + " href=\"http://sourceforge.net/project/showfiles.php?group_id=59548\">Download</a><br>\n"
-  + "      </td>\n"
-  + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
-  + " href=\"tutorial.html\">Tutorial</a><br>\n"
-  + "      </td>\n"
-  + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
-  + " href=\"http://cvs.sourceforge.net/cgi-bin/viewcvs.cgi/nutch/nutch/\">CVS</a><br>\n"
-  + "      </td>\n"
-  + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
-  + " href=\"api/index.html\">Javadoc</a><br>\n"
-  + "      </td>\n"
-  + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
-  + " href=\"http://sourceforge.net/tracker/?atid=491356&amp;group_id=59548&amp;func=browse\">Bugs</a><br>\n"
-  + "      </td>\n"
-  + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
-  + " href=\"http://sourceforge.net/mail/?group_id=59548\">Lists</a></td>\n"
-  + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
-  + " href=\"policies.html\">Policies</a><br>\n"
-  + "      </td>\n"
-  + "    </tr>\n"
-  + "  </tbody>\n"
-  + "</table>\n"
-  + "<hr style=\"width: 100%; height: 1px;\" noshade=\"noshade\">\n"
-  + "<h2>Introduction</h2>\n"
-  + "Nutch is a nascent effort to implement an open-source web search\n"
-  + "engine. Web search is a basic requirement for internet navigation, yet\n"
-  + "the number of web search engines is decreasing. Today's oligopoly could\n"
-  + "soon be a monopoly, with a single company controlling nearly all web\n"
-  + "search for its commercial gain. &nbsp;That would not be good for the\n"
-  + "users of internet. &nbsp;Nutch aims to enable anyone to easily and\n"
-  + "cost-effectively deploy a world-class web search engine.<br>\n"
-  + "<br>\n"
-  + "To succeed, the Nutch software must be able to:<br>\n"
-  + "<ul>\n"
-  + "  <li> crawl several billion pages per month</li>\n"
-  + "  <li>maintain an index of these pages</li>\n"
-  + "  <li>search that index up to 1000 times per second</li>\n"
-  + "  <li>provide very high quality search results</li>\n"
-  + "  <li>operate at minimal cost</li>\n"
-  + "</ul>\n"
-  + "<h2>Status</h2>\n"
-  + "Currently we're just a handful of developers working part-time to put\n"
-  + "together a demo. &nbsp;The demo is coded entirely in Java. &nbsp;However\n"
-  + "persistent data is written in well-documented formats so that modules\n"
-  + "may eventually be re-written in other languages (e.g., Perl, C++) as the\n"
-  + "project progresses.<br>\n"
-  + "<br>\n"
-  + "<hr style=\"width: 100%; height: 1px;\" noshade=\"noshade\"> <a\n"
-  + " href=\"http://sourceforge.net\"> </a>\n"
-  + "<div style=\"text-align: center;\"><a href=\"http://sourceforge.net\"><img\n"
-  + " src=\"http://sourceforge.net/sflogo.php?group_id=59548&amp;type=1\"\n"
-  + " style=\"border: 0px solid ; width: 88px; height: 31px;\"\n"
-  + " alt=\"SourceForge.net Logo\" title=\"\"></a></div>\n"
-  + "</body>\n"
-  + "</html>\n";
+  String WEBPAGE = "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n"
+      + "<html>\n"
+      + "<head>\n"
+      + "  <meta http-equiv=\"content-type\"\n"
+      + " content=\"text/html; charset=ISO-8859-1\">\n"
+      + "  <title>Nutch</title>\n"
+      + "</head>\n"
+      + "<body>\n"
+      + "<h1\n"
+      + " style=\"font-family: helvetica,arial,sans-serif; text-align: center; color: rgb(255, 153, 0);\"><a\n"
+      + " href=\"http://www.nutch.org/\"><font style=\"color: rgb(255, 153, 0);\">Nutch</font></a><br>\n"
+      + "<small>an open source web-search engine</small></h1>\n"
+      + "<hr style=\"width: 100%; height: 1px;\" noshade=\"noshade\">\n"
+      + "<table\n"
+      + " style=\"width: 100%; text-align: left; margin-left: auto; margin-right: auto;\"\n"
+      + " border=\"0\" cellspacing=\"0\" cellpadding=\"0\">\n"
+      + "  <tbody>\n"
+      + "    <tr>\n"
+      + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
+      + " href=\"http://sourceforge.net/project/showfiles.php?group_id=59548\">Download</a><br>\n"
+      + "      </td>\n"
+      + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
+      + " href=\"tutorial.html\">Tutorial</a><br>\n"
+      + "      </td>\n"
+      + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
+      + " href=\"http://cvs.sourceforge.net/cgi-bin/viewcvs.cgi/nutch/nutch/\">CVS</a><br>\n"
+      + "      </td>\n"
+      + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
+      + " href=\"api/index.html\">Javadoc</a><br>\n"
+      + "      </td>\n"
+      + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
+      + " href=\"http://sourceforge.net/tracker/?atid=491356&amp;group_id=59548&amp;func=browse\">Bugs</a><br>\n"
+      + "      </td>\n"
+      + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
+      + " href=\"http://sourceforge.net/mail/?group_id=59548\">Lists</a></td>\n"
+      + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
+      + " href=\"policies.html\">Policies</a><br>\n"
+      + "      </td>\n"
+      + "    </tr>\n"
+      + "  </tbody>\n"
+      + "</table>\n"
+      + "<hr style=\"width: 100%; height: 1px;\" noshade=\"noshade\">\n"
+      + "<h2>Introduction</h2>\n"
+      + "Nutch is a nascent effort to implement an open-source web search\n"
+      + "engine. Web search is a basic requirement for internet navigation, yet\n"
+      + "the number of web search engines is decreasing. Today's oligopoly could\n"
+      + "soon be a monopoly, with a single company controlling nearly all web\n"
+      + "search for its commercial gain. &nbsp;That would not be good for the\n"
+      + "users of internet. &nbsp;Nutch aims to enable anyone to easily and\n"
+      + "cost-effectively deploy a world-class web search engine.<br>\n"
+      + "<br>\n"
+      + "To succeed, the Nutch software must be able to:<br>\n"
+      + "<ul>\n"
+      + "  <li> crawl several billion pages per month</li>\n"
+      + "  <li>maintain an index of these pages</li>\n"
+      + "  <li>search that index up to 1000 times per second</li>\n"
+      + "  <li>provide very high quality search results</li>\n"
+      + "  <li>operate at minimal cost</li>\n"
+      + "</ul>\n"
+      + "<h2>Status</h2>\n"
+      + "Currently we're just a handful of developers working part-time to put\n"
+      + "together a demo. &nbsp;The demo is coded entirely in Java. &nbsp;However\n"
+      + "persistent data is written in well-documented formats so that modules\n"
+      + "may eventually be re-written in other languages (e.g., Perl, C++) as the\n"
+      + "project progresses.<br>\n"
+      + "<br>\n"
+      + "<hr style=\"width: 100%; height: 1px;\" noshade=\"noshade\"> <a\n"
+      + " href=\"http://sourceforge.net\"> </a>\n"
+      + "<div style=\"text-align: center;\"><a href=\"http://sourceforge.net\"><img\n"
+      + " src=\"http://sourceforge.net/sflogo.php?group_id=59548&amp;type=1\"\n"
+      + " style=\"border: 0px solid ; width: 88px; height: 31px;\"\n"
+      + " alt=\"SourceForge.net Logo\" title=\"\"></a></div>\n"
+      + "</body>\n"
+      + "</html>\n";
 
   // tests
 
   public void testZipUnzip() {
-    byte[] testBytes= SHORT_TEST_STRING.getBytes();
+    byte[] testBytes = SHORT_TEST_STRING.getBytes();
     testZipUnzip(testBytes);
-    testBytes= LONGER_TEST_STRING.getBytes();
+    testBytes = LONGER_TEST_STRING.getBytes();
     testZipUnzip(testBytes);
-    testBytes= WEBPAGE.getBytes();
+    testBytes = WEBPAGE.getBytes();
     testZipUnzip(testBytes);
   }
 
   public void testZipUnzipBestEffort() {
-    byte[] testBytes= SHORT_TEST_STRING.getBytes();
+    byte[] testBytes = SHORT_TEST_STRING.getBytes();
     testZipUnzipBestEffort(testBytes);
-    testBytes= LONGER_TEST_STRING.getBytes();
+    testBytes = LONGER_TEST_STRING.getBytes();
     testZipUnzipBestEffort(testBytes);
-    testBytes= WEBPAGE.getBytes();
+    testBytes = WEBPAGE.getBytes();
     testZipUnzipBestEffort(testBytes);
   }
-  
+
   public void testTruncation() {
-    byte[] testBytes= SHORT_TEST_STRING.getBytes();
+    byte[] testBytes = SHORT_TEST_STRING.getBytes();
     testTruncation(testBytes);
-    testBytes= LONGER_TEST_STRING.getBytes();
+    testBytes = LONGER_TEST_STRING.getBytes();
     testTruncation(testBytes);
-    testBytes= WEBPAGE.getBytes();
+    testBytes = WEBPAGE.getBytes();
     testTruncation(testBytes);
   }
 
   public void testLimit() {
-    byte[] testBytes= SHORT_TEST_STRING.getBytes();
+    byte[] testBytes = SHORT_TEST_STRING.getBytes();
     testLimit(testBytes);
-    testBytes= LONGER_TEST_STRING.getBytes();
+    testBytes = LONGER_TEST_STRING.getBytes();
     testLimit(testBytes);
-    testBytes= WEBPAGE.getBytes();
+    testBytes = WEBPAGE.getBytes();
     testLimit(testBytes);
   }
 
   // helpers
 
   public void testZipUnzip(byte[] origBytes) {
-    byte[] compressedBytes= GZIPUtils.zip(origBytes);
+    byte[] compressedBytes = GZIPUtils.zip(origBytes);
 
     assertTrue("compressed array is not smaller!",
-	       compressedBytes.length < origBytes.length);
+        compressedBytes.length < origBytes.length);
 
-    byte[] uncompressedBytes= null;
+    byte[] uncompressedBytes = null;
     try {
-      uncompressedBytes= GZIPUtils.unzip(compressedBytes);
+      uncompressedBytes = GZIPUtils.unzip(compressedBytes);
     } catch (IOException e) {
       e.printStackTrace();
-      assertTrue("caught exception '" + e + "' during unzip()",
-		 false);
+      assertTrue("caught exception '" + e + "' during unzip()", false);
     }
-    assertTrue("uncompressedBytes is wrong size", 
-	       uncompressedBytes.length == origBytes.length);
+    assertTrue("uncompressedBytes is wrong size",
+        uncompressedBytes.length == origBytes.length);
 
-    for (int i= 0; i < origBytes.length; i++) 
+    for (int i = 0; i < origBytes.length; i++)
       if (origBytes[i] != uncompressedBytes[i])
-	assertTrue("uncompressedBytes does not match origBytes", false);
+        assertTrue("uncompressedBytes does not match origBytes", false);
   }
 
   public void testZipUnzipBestEffort(byte[] origBytes) {
-    byte[] compressedBytes= GZIPUtils.zip(origBytes);
+    byte[] compressedBytes = GZIPUtils.zip(origBytes);
 
     assertTrue("compressed array is not smaller!",
-	       compressedBytes.length < origBytes.length);
+        compressedBytes.length < origBytes.length);
 
-    byte[] uncompressedBytes= GZIPUtils.unzipBestEffort(compressedBytes);
-    assertTrue("uncompressedBytes is wrong size", 
-	       uncompressedBytes.length == origBytes.length);
+    byte[] uncompressedBytes = GZIPUtils.unzipBestEffort(compressedBytes);
+    assertTrue("uncompressedBytes is wrong size",
+        uncompressedBytes.length == origBytes.length);
 
-    for (int i= 0; i < origBytes.length; i++) 
+    for (int i = 0; i < origBytes.length; i++)
       if (origBytes[i] != uncompressedBytes[i])
-	assertTrue("uncompressedBytes does not match origBytes", false);
+        assertTrue("uncompressedBytes does not match origBytes", false);
   }
 
   public void testTruncation(byte[] origBytes) {
-    byte[] compressedBytes= GZIPUtils.zip(origBytes);
+    byte[] compressedBytes = GZIPUtils.zip(origBytes);
 
     System.out.println("original data has len " + origBytes.length);
-    System.out.println("compressed data has len " 
-		       + compressedBytes.length);
+    System.out.println("compressed data has len " + compressedBytes.length);
 
-    for (int i= compressedBytes.length; i >= 0; i--) {
+    for (int i = compressedBytes.length; i >= 0; i--) {
 
-      byte[] truncCompressed= new byte[i];
+      byte[] truncCompressed = new byte[i];
 
-      for (int j= 0; j < i; j++)
-	truncCompressed[j]= compressedBytes[j];
+      for (int j = 0; j < i; j++)
+        truncCompressed[j] = compressedBytes[j];
 
-      byte[] trunc= GZIPUtils.unzipBestEffort(truncCompressed);
+      byte[] trunc = GZIPUtils.unzipBestEffort(truncCompressed);
 
       if (trunc == null) {
-	System.out.println("truncated to len "
-			   + i + ", trunc is null");
+        System.out.println("truncated to len " + i + ", trunc is null");
       } else {
-	System.out.println("truncated to len "
-			   + i + ", trunc.length=  " 
-			   + trunc.length);
+        System.out.println("truncated to len " + i + ", trunc.length=  "
+            + trunc.length);
 
-	for (int j= 0; j < trunc.length; j++)
-	  if (trunc[j] != origBytes[j]) 
-	    assertTrue("truncated/uncompressed array differs at pos "
-		       + j + " (compressed data had been truncated to len "
-		       + i + ")", false);
+        for (int j = 0; j < trunc.length; j++)
+          if (trunc[j] != origBytes[j])
+            assertTrue("truncated/uncompressed array differs at pos " + j
+                + " (compressed data had been truncated to len " + i + ")",
+                false);
       }
     }
   }
 
   public void testLimit(byte[] origBytes) {
-    byte[] compressedBytes= GZIPUtils.zip(origBytes);
+    byte[] compressedBytes = GZIPUtils.zip(origBytes);
 
     assertTrue("compressed array is not smaller!",
-               compressedBytes.length < origBytes.length);
+        compressedBytes.length < origBytes.length);
 
-    for (int i= 0; i < origBytes.length; i++) {
+    for (int i = 0; i < origBytes.length; i++) {
 
-      byte[] uncompressedBytes= 
-        GZIPUtils.unzipBestEffort(compressedBytes, i);
+      byte[] uncompressedBytes = GZIPUtils.unzipBestEffort(compressedBytes, i);
 
-      assertTrue("uncompressedBytes is wrong size", 
-                 uncompressedBytes.length == i);
+      assertTrue("uncompressedBytes is wrong size",
+          uncompressedBytes.length == i);
 
-      for (int j= 0; j < i; j++) 
+      for (int j = 0; j < i; j++)
         if (origBytes[j] != uncompressedBytes[j])
           assertTrue("uncompressedBytes does not match origBytes", false);
     }
Index: src/test/org/apache/nutch/util/WritableTestUtils.java
===================================================================
--- src/test/org/apache/nutch/util/WritableTestUtils.java	(revision 1188252)
+++ src/test/org/apache/nutch/util/WritableTestUtils.java	(working copy)
@@ -35,23 +35,22 @@
     TestCase.assertEquals(before, writeRead(before, conf));
   }
 
-  
   /** Utility method for testing writables. */
   public static Writable writeRead(Writable before, Configuration conf)
-    throws Exception {
-    
+      throws Exception {
+
     DataOutputBuffer dob = new DataOutputBuffer();
     before.write(dob);
-    
+
     DataInputBuffer dib = new DataInputBuffer();
     dib.reset(dob.getData(), dob.getLength());
-    
-    Writable after = (Writable)before.getClass().newInstance();
+
+    Writable after = (Writable) before.getClass().newInstance();
     if (conf != null) {
-      ((Configurable)after).setConf(conf);
+      ((Configurable) after).setConf(conf);
     }
     after.readFields(dib);
     return after;
   }
-  
+
 }
Index: src/test/org/apache/nutch/util/TestNodeWalker.java
===================================================================
--- src/test/org/apache/nutch/util/TestNodeWalker.java	(revision 1188252)
+++ src/test/org/apache/nutch/util/TestNodeWalker.java	(working copy)
@@ -24,49 +24,45 @@
 import org.w3c.dom.Node;
 import org.xml.sax.InputSource;
 
-
-
-
 /** Unit tests for NodeWalker methods. */
 public class TestNodeWalker extends TestCase {
-  public TestNodeWalker(String name) { 
-    super(name); 
+  public TestNodeWalker(String name) {
+    super(name);
   }
 
   /* a snapshot of the nutch webpage */
-  private final static String WEBPAGE= 
-  "<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"en\" xml:lang=\"en\"><head><title>Nutch</title></head>"
-  + "<body>"
-  + "<ul>"
-  + "<li>crawl several billion pages per month</li>"
-  + "<li>maintain an index of these pages</li>"
-  + "<li>search that index up to 1000 times per second</li>"
-  + "<li>provide very high quality search results</li>"
-  + "<li>operate at minimal cost</li>"
-  + "</ul>"
-  + "</body>"
-  + "</html>";
+  private final static String WEBPAGE = "<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"en\" xml:lang=\"en\"><head><title>Nutch</title></head>"
+      + "<body>"
+      + "<ul>"
+      + "<li>crawl several billion pages per month</li>"
+      + "<li>maintain an index of these pages</li>"
+      + "<li>search that index up to 1000 times per second</li>"
+      + "<li>provide very high quality search results</li>"
+      + "<li>operate at minimal cost</li>" + "</ul>" + "</body>" + "</html>";
 
   private final static String[] ULCONTENT = new String[4];
-  
-  protected void setUp() throws Exception{
-    ULCONTENT[0]="crawl several billion pages per month" ;
-    ULCONTENT[1]="maintain an index of these pages" ;
-    ULCONTENT[2]="search that index up to 1000 times per second"  ;
-    ULCONTENT[3]="operate at minimal cost" ;
+
+  protected void setUp() throws Exception {
+    ULCONTENT[0] = "crawl several billion pages per month";
+    ULCONTENT[1] = "maintain an index of these pages";
+    ULCONTENT[2] = "search that index up to 1000 times per second";
+    ULCONTENT[3] = "operate at minimal cost";
   }
 
   public void testSkipChildren() {
-    DOMParser parser= new DOMParser();
-    
+    DOMParser parser = new DOMParser();
+
     try {
       parser.setFeature("http://xml.org/sax/features/validation", false);
-      parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
-      parser.parse(new InputSource(new ByteArrayInputStream(WEBPAGE.getBytes())));
+      parser.setFeature(
+          "http://apache.org/xml/features/nonvalidating/load-external-dtd",
+          false);
+      parser
+          .parse(new InputSource(new ByteArrayInputStream(WEBPAGE.getBytes())));
     } catch (Exception e) {
       e.printStackTrace();
     }
-     
+
     StringBuffer sb = new StringBuffer();
     NodeWalker walker = new NodeWalker(parser.getDocument());
     while (walker.hasNext()) {
@@ -78,30 +74,33 @@
         sb.append(text);
       }
     }
-   assertTrue("UL Content can NOT be found in the node", findSomeUlContent(sb.toString()));
-     
-   StringBuffer sbSkip = new StringBuffer();
-   NodeWalker walkerSkip = new NodeWalker(parser.getDocument());
-   while (walkerSkip.hasNext()) {
-     Node currentNode = walkerSkip.nextNode();
-     String nodeName = currentNode.getNodeName();
-     short nodeType = currentNode.getNodeType();
-     if ("ul".equalsIgnoreCase(nodeName)) {
-       walkerSkip.skipChildren();
-     }
-     if (nodeType == Node.TEXT_NODE) {
-       String text = currentNode.getNodeValue();
-       text = text.replaceAll("\\s+", " ");
-       sbSkip.append(text);
-     }
-   }
-   assertFalse("UL Content can be found in the node", findSomeUlContent(sbSkip.toString()));
+    assertTrue("UL Content can NOT be found in the node",
+        findSomeUlContent(sb.toString()));
+
+    StringBuffer sbSkip = new StringBuffer();
+    NodeWalker walkerSkip = new NodeWalker(parser.getDocument());
+    while (walkerSkip.hasNext()) {
+      Node currentNode = walkerSkip.nextNode();
+      String nodeName = currentNode.getNodeName();
+      short nodeType = currentNode.getNodeType();
+      if ("ul".equalsIgnoreCase(nodeName)) {
+        walkerSkip.skipChildren();
+      }
+      if (nodeType == Node.TEXT_NODE) {
+        String text = currentNode.getNodeValue();
+        text = text.replaceAll("\\s+", " ");
+        sbSkip.append(text);
+      }
+    }
+    assertFalse("UL Content can be found in the node",
+        findSomeUlContent(sbSkip.toString()));
   }
-  
+
   public boolean findSomeUlContent(String str) {
-    for(int i=0; i<ULCONTENT.length ; i++){
-      if(str.contains(ULCONTENT[i])) return true;
-    }    
+    for (int i = 0; i < ULCONTENT.length; i++) {
+      if (str.contains(ULCONTENT[i]))
+        return true;
+    }
     return false;
   }
 }
Index: src/test/org/apache/nutch/indexer/TestIndexingFilters.java
===================================================================
--- src/test/org/apache/nutch/indexer/TestIndexingFilters.java	(revision 1188252)
+++ src/test/org/apache/nutch/indexer/TestIndexingFilters.java	(working copy)
@@ -33,6 +33,7 @@
 
   /**
    * Test behaviour when defined filter does not exist.
+   * 
    * @throws IndexingException
    */
   public void testNonExistingIndexingFilter() throws IndexingException {
Index: src/test/org/apache/nutch/plugin/TestPluginSystem.java
===================================================================
--- src/test/org/apache/nutch/plugin/TestPluginSystem.java	(revision 1188252)
+++ src/test/org/apache/nutch/plugin/TestPluginSystem.java	(working copy)
@@ -39,262 +39,256 @@
  * @author joa23
  */
 public class TestPluginSystem extends TestCase {
-    private int fPluginCount;
+  private int fPluginCount;
 
-    private LinkedList fFolders = new LinkedList();
-    private Configuration conf ;
-    private PluginRepository repository;
+  private LinkedList fFolders = new LinkedList();
+  private Configuration conf;
+  private PluginRepository repository;
 
-    protected void setUp() throws Exception {
-        this.conf = NutchConfiguration.create();
-        conf.set("plugin.includes", ".*");
-//        String string = this.conf.get("plugin.includes", "");
-//        conf.set("plugin.includes", string + "|Dummy*");
-        fPluginCount = 5;
-        createDummyPlugins(fPluginCount);
-        this.repository = PluginRepository.get(conf);
-    }
+  protected void setUp() throws Exception {
+    this.conf = NutchConfiguration.create();
+    conf.set("plugin.includes", ".*");
+    // String string = this.conf.get("plugin.includes", "");
+    // conf.set("plugin.includes", string + "|Dummy*");
+    fPluginCount = 5;
+    createDummyPlugins(fPluginCount);
+    this.repository = PluginRepository.get(conf);
+  }
 
-    /*
-     * (non-Javadoc)
-     * 
-     * @see junit.framework.TestCase#tearDown()
-     */
-    protected void tearDown() throws Exception {
-        for (int i = 0; i < fFolders.size(); i++) {
-            File folder = (File) fFolders.get(i);
-            delete(folder);
-            folder.delete();
-        }
-
+  /*
+   * (non-Javadoc)
+   * 
+   * @see junit.framework.TestCase#tearDown()
+   */
+  protected void tearDown() throws Exception {
+    for (int i = 0; i < fFolders.size(); i++) {
+      File folder = (File) fFolders.get(i);
+      delete(folder);
+      folder.delete();
     }
 
-    /**
+  }
+
+  /**
      */
-    public void testPluginConfiguration() {
-        String string = getPluginFolder();
-        File file = new File(string);
-        if (!file.exists()) {
-            file.mkdir();
-        }
-        assertTrue(file.exists());
+  public void testPluginConfiguration() {
+    String string = getPluginFolder();
+    File file = new File(string);
+    if (!file.exists()) {
+      file.mkdir();
     }
+    assertTrue(file.exists());
+  }
 
-    /**
+  /**
      */
-    public void testLoadPlugins() {
-        PluginDescriptor[] descriptors = repository
-                .getPluginDescriptors();
-        int k = descriptors.length;
-        assertTrue(fPluginCount <= k);
-        for (int i = 0; i < descriptors.length; i++) {
-            PluginDescriptor descriptor = descriptors[i];
-            if (!descriptor.getPluginId().startsWith("getPluginFolder()")) {
-                continue;
-            }
-            assertEquals(1, descriptor.getExportedLibUrls().length);
-            assertEquals(1, descriptor.getNotExportedLibUrls().length);
-        }
+  public void testLoadPlugins() {
+    PluginDescriptor[] descriptors = repository.getPluginDescriptors();
+    int k = descriptors.length;
+    assertTrue(fPluginCount <= k);
+    for (int i = 0; i < descriptors.length; i++) {
+      PluginDescriptor descriptor = descriptors[i];
+      if (!descriptor.getPluginId().startsWith("getPluginFolder()")) {
+        continue;
+      }
+      assertEquals(1, descriptor.getExportedLibUrls().length);
+      assertEquals(1, descriptor.getNotExportedLibUrls().length);
     }
+  }
 
-    public void testRepositoryCache() {
-      Configuration config = NutchConfiguration.create();
-      PluginRepository repo = PluginRepository.get(config);
-      JobConf job = new NutchJob(config);
-      PluginRepository repo1 = PluginRepository.get(job);
-      assertTrue(repo == repo1);
-      // now construct a config without UUID
-      config = new Configuration();
-      config.addResource("nutch-default.xml");
-      config.addResource("nutch-site.xml");
-      repo = PluginRepository.get(config);
-      job = new NutchJob(config);
-      repo1 = PluginRepository.get(job);
-      assertTrue(repo1 != repo);
-    }
+  public void testRepositoryCache() {
+    Configuration config = NutchConfiguration.create();
+    PluginRepository repo = PluginRepository.get(config);
+    JobConf job = new NutchJob(config);
+    PluginRepository repo1 = PluginRepository.get(job);
+    assertTrue(repo == repo1);
+    // now construct a config without UUID
+    config = new Configuration();
+    config.addResource("nutch-default.xml");
+    config.addResource("nutch-site.xml");
+    repo = PluginRepository.get(config);
+    job = new NutchJob(config);
+    repo1 = PluginRepository.get(job);
+    assertTrue(repo1 != repo);
+  }
 
-    /**
+  /**
      *  
      */
-    public void testGetExtensionAndAttributes() {
-        String xpId = " sdsdsd";
-        ExtensionPoint extensionPoint =repository
-                .getExtensionPoint(xpId);
-        assertEquals(extensionPoint, null);
-        Extension[] extension1 = repository
-                .getExtensionPoint(getGetExtensionId()).getExtensions();
-        assertEquals(extension1.length, fPluginCount);
-        for (int i = 0; i < extension1.length; i++) {
-            Extension extension2 = extension1[i];
-            String string = extension2.getAttribute(getGetConfigElementName());
-            assertEquals(string, getParameterValue());
-        }
+  public void testGetExtensionAndAttributes() {
+    String xpId = " sdsdsd";
+    ExtensionPoint extensionPoint = repository.getExtensionPoint(xpId);
+    assertEquals(extensionPoint, null);
+    Extension[] extension1 = repository.getExtensionPoint(getGetExtensionId())
+        .getExtensions();
+    assertEquals(extension1.length, fPluginCount);
+    for (int i = 0; i < extension1.length; i++) {
+      Extension extension2 = extension1[i];
+      String string = extension2.getAttribute(getGetConfigElementName());
+      assertEquals(string, getParameterValue());
     }
+  }
 
-    /**
-     * @throws PluginRuntimeException
-     */
-    public void testGetExtensionInstances() throws PluginRuntimeException {
-        Extension[] extensions = repository
-                .getExtensionPoint(getGetExtensionId()).getExtensions();
-        assertEquals(extensions.length, fPluginCount);
-        for (int i = 0; i < extensions.length; i++) {
-            Extension extension = extensions[i];
-            Object object = extension.getExtensionInstance();
-            if (!(object instanceof HelloWorldExtension))
-                fail(" object is not a instance of HelloWorldExtension");
-            ((ITestExtension) object).testGetExtension("Bla ");
-            String string = ((ITestExtension) object).testGetExtension("Hello");
-            assertEquals("Hello World", string);
-        }
+  /**
+   * @throws PluginRuntimeException
+   */
+  public void testGetExtensionInstances() throws PluginRuntimeException {
+    Extension[] extensions = repository.getExtensionPoint(getGetExtensionId())
+        .getExtensions();
+    assertEquals(extensions.length, fPluginCount);
+    for (int i = 0; i < extensions.length; i++) {
+      Extension extension = extensions[i];
+      Object object = extension.getExtensionInstance();
+      if (!(object instanceof HelloWorldExtension))
+        fail(" object is not a instance of HelloWorldExtension");
+      ((ITestExtension) object).testGetExtension("Bla ");
+      String string = ((ITestExtension) object).testGetExtension("Hello");
+      assertEquals("Hello World", string);
     }
+  }
 
-    /**
+  /**
      * 
      *  
      */
-    public void testGetClassLoader() {
-        PluginDescriptor[] descriptors = repository
-                .getPluginDescriptors();
-        for (int i = 0; i < descriptors.length; i++) {
-            PluginDescriptor descriptor = descriptors[i];
-            assertNotNull(descriptor.getClassLoader());
-        }
+  public void testGetClassLoader() {
+    PluginDescriptor[] descriptors = repository.getPluginDescriptors();
+    for (int i = 0; i < descriptors.length; i++) {
+      PluginDescriptor descriptor = descriptors[i];
+      assertNotNull(descriptor.getClassLoader());
     }
+  }
 
-    /**
-     * @throws IOException
-     */
-    public void testGetResources() throws IOException {
-        PluginDescriptor[] descriptors = repository
-                .getPluginDescriptors();
-        for (int i = 0; i < descriptors.length; i++) {
-            PluginDescriptor descriptor = descriptors[i];
-            if (!descriptor.getPluginId().startsWith("getPluginFolder()")) {
-                continue;
-            }
-            String value = descriptor.getResourceString("key", Locale.UK);
-            assertEquals("value", value);
-            value = descriptor.getResourceString("key",
-                    Locale.TRADITIONAL_CHINESE);
-            assertEquals("value", value);
+  /**
+   * @throws IOException
+   */
+  public void testGetResources() throws IOException {
+    PluginDescriptor[] descriptors = repository.getPluginDescriptors();
+    for (int i = 0; i < descriptors.length; i++) {
+      PluginDescriptor descriptor = descriptors[i];
+      if (!descriptor.getPluginId().startsWith("getPluginFolder()")) {
+        continue;
+      }
+      String value = descriptor.getResourceString("key", Locale.UK);
+      assertEquals("value", value);
+      value = descriptor.getResourceString("key", Locale.TRADITIONAL_CHINESE);
+      assertEquals("value", value);
 
-        }
     }
+  }
 
-    /**
-     * @return a PluginFolderPath
-     */
-    private String getPluginFolder() {
-        String[] strings = conf.getStrings("plugin.folders");
-        if (strings == null || strings.length == 0)
-            fail("no plugin directory setuped..");
+  /**
+   * @return a PluginFolderPath
+   */
+  private String getPluginFolder() {
+    String[] strings = conf.getStrings("plugin.folders");
+    if (strings == null || strings.length == 0)
+      fail("no plugin directory setuped..");
 
-        String name = strings[0];
-        return new PluginManifestParser(conf, this.repository).getPluginFolder(name).toString();
-    }
+    String name = strings[0];
+    return new PluginManifestParser(conf, this.repository)
+        .getPluginFolder(name).toString();
+  }
 
-    /**
-     * Creates some Dummy Plugins
-     * 
-     * @param pCount
-     */
-    private void createDummyPlugins(int pCount) {
-        String string = getPluginFolder();
-        try {
-            File folder = new File(string);
-            folder.mkdir();
-            for (int i = 0; i < pCount; i++) {
-                String pluginFolder = string + File.separator + "DummyPlugin"
-                        + i;
-                File file = new File(pluginFolder);
-                file.mkdir();
-                fFolders.add(file);
-                createPluginManifest(i, file.getAbsolutePath());
-                createResourceFile(file.getAbsolutePath());
-            }
-        } catch (IOException e) {
-            e.printStackTrace();
-        }
+  /**
+   * Creates some Dummy Plugins
+   * 
+   * @param pCount
+   */
+  private void createDummyPlugins(int pCount) {
+    String string = getPluginFolder();
+    try {
+      File folder = new File(string);
+      folder.mkdir();
+      for (int i = 0; i < pCount; i++) {
+        String pluginFolder = string + File.separator + "DummyPlugin" + i;
+        File file = new File(pluginFolder);
+        file.mkdir();
+        fFolders.add(file);
+        createPluginManifest(i, file.getAbsolutePath());
+        createResourceFile(file.getAbsolutePath());
+      }
+    } catch (IOException e) {
+      e.printStackTrace();
     }
+  }
 
-    /**
-     * Creates an ResourceFile
-     * 
-     * @param pFolderPath
-     * @throws FileNotFoundException
-     * @throws IOException
-     */
-    private void createResourceFile(String pFolderPath)
-            throws FileNotFoundException, IOException {
-        Properties properties = new Properties();
-        properties.setProperty("key", "value");
-        properties.store(new FileOutputStream(pFolderPath + File.separator
-                + "messages" + ".properties"), "");
-    }
+  /**
+   * Creates an ResourceFile
+   * 
+   * @param pFolderPath
+   * @throws FileNotFoundException
+   * @throws IOException
+   */
+  private void createResourceFile(String pFolderPath)
+      throws FileNotFoundException, IOException {
+    Properties properties = new Properties();
+    properties.setProperty("key", "value");
+    properties.store(new FileOutputStream(pFolderPath + File.separator
+        + "messages" + ".properties"), "");
+  }
 
-    /**
-     * Deletes files in path
-     * 
-     * @param path
-     * @throws IOException
-     */
-    private void delete(File path) throws IOException {
-        File[] files = path.listFiles();
-        for (int i = 0; i < files.length; ++i) {
-            if (files[i].isDirectory())
-                delete(files[i]);
-            files[i].delete();
-        }
+  /**
+   * Deletes files in path
+   * 
+   * @param path
+   * @throws IOException
+   */
+  private void delete(File path) throws IOException {
+    File[] files = path.listFiles();
+    for (int i = 0; i < files.length; ++i) {
+      if (files[i].isDirectory())
+        delete(files[i]);
+      files[i].delete();
     }
+  }
 
-    /**
-     * Creates an Plugin Manifest File
-     * 
-     * @param i
-     * @param pFolderPath
-     * @throws IOException
-     */
-    private void createPluginManifest(int i, String pFolderPath)
-            throws IOException {
-        FileWriter out = new FileWriter(pFolderPath + File.separator
-                + "plugin.xml");
-        String xml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" 
-                + "<!--this is just a simple plugin for testing issues.-->"
-                + "<plugin id=\"org.apache.nutch.plugin."
-                + i
-                + "\" name=\""
-                + i
-                + "\" version=\"1.0\" provider-name=\"joa23\" "
-                + "class=\"org.apache.nutch.plugin.SimpleTestPlugin\">"
-                + "<extension-point id=\"aExtensioID\" "
-                + "name=\"simple Parser Extension\" "
-                + "schema=\"schema/testExtensionPoint.exsd\"/>"
-                + "<runtime><library name=\"libs/exported.jar\"><extport/></library>"
-                + "<library name=\"libs/not_exported.jar\"/></runtime>"
-                + "<extension point=\"aExtensioID\">"
-                + "<implementation name=\"simple Parser Extension\" "
-                + "id=\"aExtensionId.\" class=\"org.apache.nutch.plugin.HelloWorldExtension\">"
-                + "<parameter name=\"dummy-name\" value=\"a simple param value\"/>"
-                + "</implementation></extension></plugin>";
-        out.write(xml);
-        out.flush();
-        out.close();
-    }
+  /**
+   * Creates an Plugin Manifest File
+   * 
+   * @param i
+   * @param pFolderPath
+   * @throws IOException
+   */
+  private void createPluginManifest(int i, String pFolderPath)
+      throws IOException {
+    FileWriter out = new FileWriter(pFolderPath + File.separator + "plugin.xml");
+    String xml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
+        + "<!--this is just a simple plugin for testing issues.-->"
+        + "<plugin id=\"org.apache.nutch.plugin."
+        + i
+        + "\" name=\""
+        + i
+        + "\" version=\"1.0\" provider-name=\"joa23\" "
+        + "class=\"org.apache.nutch.plugin.SimpleTestPlugin\">"
+        + "<extension-point id=\"aExtensioID\" "
+        + "name=\"simple Parser Extension\" "
+        + "schema=\"schema/testExtensionPoint.exsd\"/>"
+        + "<runtime><library name=\"libs/exported.jar\"><extport/></library>"
+        + "<library name=\"libs/not_exported.jar\"/></runtime>"
+        + "<extension point=\"aExtensioID\">"
+        + "<implementation name=\"simple Parser Extension\" "
+        + "id=\"aExtensionId.\" class=\"org.apache.nutch.plugin.HelloWorldExtension\">"
+        + "<parameter name=\"dummy-name\" value=\"a simple param value\"/>"
+        + "</implementation></extension></plugin>";
+    out.write(xml);
+    out.flush();
+    out.close();
+  }
 
-    private String getParameterValue() {
-        return "a simple param value";
-    }
+  private String getParameterValue() {
+    return "a simple param value";
+  }
 
-    private static String getGetExtensionId() {
-        return "aExtensioID";
-    }
+  private static String getGetExtensionId() {
+    return "aExtensioID";
+  }
 
-    private static String getGetConfigElementName() {
-        return "dummy-name";
-    }
+  private static String getGetConfigElementName() {
+    return "dummy-name";
+  }
 
-    public static void main(String[] args) throws IOException {
-        new TestPluginSystem().createPluginManifest(1, "/");
-    }
+  public static void main(String[] args) throws IOException {
+    new TestPluginSystem().createPluginManifest(1, "/");
+  }
 }
Index: src/test/org/apache/nutch/plugin/ITestExtension.java
===================================================================
--- src/test/org/apache/nutch/plugin/ITestExtension.java	(revision 1188252)
+++ src/test/org/apache/nutch/plugin/ITestExtension.java	(working copy)
@@ -15,11 +15,12 @@
  * limitations under the License.
  */
 package org.apache.nutch.plugin;
+
 /**
  * A Simple Test Extension Interface.
  * 
  * @author joa23
- *
+ * 
  */
 public interface ITestExtension {
   public String testGetExtension(String hello);
Index: src/test/org/apache/nutch/plugin/HelloWorldExtension.java
===================================================================
--- src/test/org/apache/nutch/plugin/HelloWorldExtension.java	(revision 1188252)
+++ src/test/org/apache/nutch/plugin/HelloWorldExtension.java	(working copy)
@@ -24,8 +24,11 @@
  */
 public class HelloWorldExtension implements ITestExtension {
 
-  /* (non-Javadoc)
-   * @see org.apache.nutch.plugin.ITestExtension#testGetExtension(java.lang.String)
+  /*
+   * (non-Javadoc)
+   * 
+   * @see
+   * org.apache.nutch.plugin.ITestExtension#testGetExtension(java.lang.String)
    */
   public String testGetExtension(String hello) {
     return hello + " World";
Index: src/test/org/apache/nutch/plugin/SimpleTestPlugin.java
===================================================================
--- src/test/org/apache/nutch/plugin/SimpleTestPlugin.java	(revision 1188252)
+++ src/test/org/apache/nutch/plugin/SimpleTestPlugin.java	(working copy)
@@ -28,8 +28,8 @@
 public class SimpleTestPlugin extends Plugin {
 
   /**
-   * @param pDescriptor 
-   * @param conf 
+   * @param pDescriptor
+   * @param conf
    */
   public SimpleTestPlugin(PluginDescriptor pDescriptor, Configuration conf) {
 
@@ -55,4 +55,3 @@
   }
 
 }
-
Index: src/java/org/apache/nutch/fetcher/OldFetcher.java
===================================================================
--- src/java/org/apache/nutch/fetcher/OldFetcher.java	(revision 1188252)
+++ src/java/org/apache/nutch/fetcher/OldFetcher.java	(working copy)
@@ -43,29 +43,29 @@
 import org.apache.nutch.scoring.ScoringFilters;
 import org.apache.nutch.util.*;
 
-
 /** The fetcher. Most of the work is done by plugins. */
-public class OldFetcher extends Configured implements Tool, MapRunnable<WritableComparable, Writable, Text, NutchWritable> { 
+public class OldFetcher extends Configured implements Tool,
+    MapRunnable<WritableComparable, Writable, Text, NutchWritable> {
 
   public static final Logger LOG = LoggerFactory.getLogger(OldFetcher.class);
-  
+
   public static final int PERM_REFRESH_TIME = 5;
 
   public static final String CONTENT_REDIR = "content";
 
   public static final String PROTOCOL_REDIR = "protocol";
 
-  public static class InputFormat extends SequenceFileInputFormat<WritableComparable, Writable> {
+  public static class InputFormat extends
+      SequenceFileInputFormat<WritableComparable, Writable> {
     /** Don't split inputs, to keep things polite. */
-    public InputSplit[] getSplits(JobConf job, int nSplits)
-      throws IOException {
+    public InputSplit[] getSplits(JobConf job, int nSplits) throws IOException {
       FileStatus[] files = listStatus(job);
       FileSystem fs = FileSystem.get(job);
       InputSplit[] splits = new InputSplit[files.length];
       for (int i = 0; i < files.length; i++) {
         FileStatus cur = files[i];
-        splits[i] = new FileSplit(cur.getPath(), 0,
-            cur.getLen(), (String[])null);
+        splits[i] = new FileSplit(cur.getPath(), 0, cur.getLen(),
+            (String[]) null);
       }
       return splits;
     }
@@ -82,9 +82,9 @@
   private long start = System.currentTimeMillis(); // start time of fetcher run
   private long lastRequestStart = start;
 
-  private long bytes;                             // total bytes fetched
-  private int pages;                              // total pages fetched
-  private int errors;                             // total pages errored
+  private long bytes; // total bytes fetched
+  private int pages; // total pages fetched
+  private int errors; // total pages errored
 
   private boolean storingContent;
   private boolean parsing;
@@ -101,8 +101,8 @@
     private String reprUrl;
 
     public FetcherThread(Configuration conf) {
-      this.setDaemon(true);                       // don't hang JVM on exit
-      this.setName("FetcherThread");              // use an informative name
+      this.setDaemon(true); // don't hang JVM on exit
+      this.setName("FetcherThread"); // use an informative name
       this.conf = conf;
       this.urlFilters = new URLFilters(conf);
       this.scfilters = new ScoringFilters(conf);
@@ -112,27 +112,29 @@
     }
 
     public void run() {
-      synchronized (OldFetcher.this) {activeThreads++;} // count threads
-      
+      synchronized (OldFetcher.this) {
+        activeThreads++;
+      } // count threads
+
       try {
         Text key = new Text();
         CrawlDatum datum = new CrawlDatum();
-        
+
         while (true) {
           // TODO : NUTCH-258 ...
           // If something bad happened, then exit
           // if (conf.getBoolean("fetcher.exit", false)) {
-          //   break;
+          // break;
           // ]
-          
-          try {                                   // get next entry from input
+
+          try { // get next entry from input
             if (!input.next(key, datum)) {
-              break;                              // at eof, exit
+              break; // at eof, exit
             }
           } catch (IOException e) {
             if (LOG.isErrorEnabled()) {
               e.printStackTrace(LogUtil.getErrorStream(LOG));
-              LOG.error("fetcher caught:"+e.toString());
+              LOG.error("fetcher caught:" + e.toString());
             }
             break;
           }
@@ -144,8 +146,8 @@
           // url may be changed through redirects.
           Text url = new Text(key);
 
-          Text reprUrlWritable =
-            (Text) datum.getMetaData().get(Nutch.WRITABLE_REPR_URL_KEY);
+          Text reprUrlWritable = (Text) datum.getMetaData().get(
+              Nutch.WRITABLE_REPR_URL_KEY);
           if (reprUrlWritable == null) {
             reprUrl = key.toString();
           } else {
@@ -153,7 +155,9 @@
           }
 
           try {
-            if (LOG.isInfoEnabled()) { LOG.info("fetching " + url); }
+            if (LOG.isInfoEnabled()) {
+              LOG.info("fetching " + url);
+            }
 
             // fetch the page
             redirectCount = 0;
@@ -162,7 +166,8 @@
                 LOG.debug("redirectCount=" + redirectCount);
               }
               redirecting = false;
-              Protocol protocol = this.protocolFactory.getProtocol(url.toString());
+              Protocol protocol = this.protocolFactory.getProtocol(url
+                  .toString());
               ProtocolOutput output = protocol.getProtocolOutput(url, datum);
               ProtocolStatus status = output.getStatus();
               Content content = output.getContent();
@@ -174,22 +179,22 @@
                     new Text(reprUrl));
               }
 
-              switch(status.getCode()) {
+              switch (status.getCode()) {
 
-              case ProtocolStatus.SUCCESS:        // got a page
-                pstatus = output(url, datum, content, status, CrawlDatum.STATUS_FETCH_SUCCESS);
+              case ProtocolStatus.SUCCESS: // got a page
+                pstatus = output(url, datum, content, status,
+                    CrawlDatum.STATUS_FETCH_SUCCESS);
                 updateStatus(content.getContent().length);
-                if (pstatus != null && pstatus.isSuccess() &&
-                        pstatus.getMinorCode() == ParseStatus.SUCCESS_REDIRECT) {
+                if (pstatus != null && pstatus.isSuccess()
+                    && pstatus.getMinorCode() == ParseStatus.SUCCESS_REDIRECT) {
                   String newUrl = pstatus.getMessage();
                   int refreshTime = Integer.valueOf(pstatus.getArgs()[1]);
                   url = handleRedirect(url, datum, urlString, newUrl,
-                                       refreshTime < PERM_REFRESH_TIME,
-                                       CONTENT_REDIR);
+                      refreshTime < PERM_REFRESH_TIME, CONTENT_REDIR);
                 }
                 break;
 
-              case ProtocolStatus.MOVED:         // redirect
+              case ProtocolStatus.MOVED: // redirect
               case ProtocolStatus.TEMP_MOVED:
                 int code;
                 boolean temp;
@@ -202,22 +207,22 @@
                 }
                 output(url, datum, content, status, code);
                 String newUrl = status.getMessage();
-                url = handleRedirect(url, datum, urlString, newUrl,
-                                     temp, PROTOCOL_REDIR);
+                url = handleRedirect(url, datum, urlString, newUrl, temp,
+                    PROTOCOL_REDIR);
                 break;
 
               // failures - increase the retry counter
               case ProtocolStatus.EXCEPTION:
                 logError(url, status.getMessage());
-              /* FALLTHROUGH */
-              case ProtocolStatus.RETRY:          // retry
+                /* FALLTHROUGH */
+              case ProtocolStatus.RETRY: // retry
               case ProtocolStatus.WOULDBLOCK:
               case ProtocolStatus.BLOCKED:
                 output(url, datum, null, status, CrawlDatum.STATUS_FETCH_RETRY);
                 break;
-                
+
               // permanent failures
-              case ProtocolStatus.GONE:           // gone
+              case ProtocolStatus.GONE: // gone
               case ProtocolStatus.NOTFOUND:
               case ProtocolStatus.ACCESS_DENIED:
               case ProtocolStatus.ROBOTS_DENIED:
@@ -225,9 +230,10 @@
                 break;
 
               case ProtocolStatus.NOTMODIFIED:
-                output(url, datum, null, status, CrawlDatum.STATUS_FETCH_NOTMODIFIED);
+                output(url, datum, null, status,
+                    CrawlDatum.STATUS_FETCH_NOTMODIFIED);
                 break;
-                
+
               default:
                 if (LOG.isWarnEnabled()) {
                   LOG.warn("Unknown ProtocolStatus: " + status.getCode());
@@ -244,28 +250,28 @@
 
             } while (redirecting && (redirectCount < maxRedirect));
 
-            
-          } catch (Throwable t) {                 // unexpected exception
+          } catch (Throwable t) { // unexpected exception
             logError(url, t.toString());
             output(url, datum, null, null, CrawlDatum.STATUS_FETCH_RETRY);
-            
+
           }
         }
 
       } catch (Throwable e) {
         if (LOG.isErrorEnabled()) {
           e.printStackTrace(LogUtil.getErrorStream(LOG));
-          LOG.error("fetcher caught:"+e.toString());
+          LOG.error("fetcher caught:" + e.toString());
         }
       } finally {
-        synchronized (OldFetcher.this) {activeThreads--;} // count threads
+        synchronized (OldFetcher.this) {
+          activeThreads--;
+        } // count threads
       }
     }
 
-    private Text handleRedirect(Text url, CrawlDatum datum,
-                                String urlString, String newUrl,
-                                boolean temp, String redirType)
-    throws MalformedURLException, URLFilterException {
+    private Text handleRedirect(Text url, CrawlDatum datum, String urlString,
+        String newUrl, boolean temp, String redirType)
+        throws MalformedURLException, URLFilterException {
       newUrl = normalizers.normalize(newUrl, URLNormalizers.SCOPE_FETCHER);
       newUrl = urlFilters.filter(newUrl);
       if (newUrl != null && !newUrl.equals(urlString)) {
@@ -275,8 +281,8 @@
           redirecting = true;
           redirectCount++;
           if (LOG.isDebugEnabled()) {
-            LOG.debug(" - " + redirType + " redirect to " +
-                      url + " (fetching now)");
+            LOG.debug(" - " + redirType + " redirect to " + url
+                + " (fetching now)");
           }
           return url;
         } else {
@@ -287,15 +293,15 @@
           }
           output(url, newDatum, null, null, CrawlDatum.STATUS_LINKED);
           if (LOG.isDebugEnabled()) {
-            LOG.debug(" - " + redirType + " redirect to " +
-                      url + " (fetching later)");
+            LOG.debug(" - " + redirType + " redirect to " + url
+                + " (fetching later)");
           }
           return null;
         }
       } else {
         if (LOG.isDebugEnabled()) {
-          LOG.debug(" - " + redirType + " redirect skipped: " +
-              (newUrl != null ? "to same url" : "filtered"));
+          LOG.debug(" - " + redirType + " redirect skipped: "
+              + (newUrl != null ? "to same url" : "filtered"));
         }
         return null;
       }
@@ -305,17 +311,18 @@
       if (LOG.isInfoEnabled()) {
         LOG.info("fetch of " + url + " failed with: " + message);
       }
-      synchronized (OldFetcher.this) {               // record failure
+      synchronized (OldFetcher.this) { // record failure
         errors++;
       }
     }
 
-    private ParseStatus output(Text key, CrawlDatum datum,
-                        Content content, ProtocolStatus pstatus, int status) {
+    private ParseStatus output(Text key, CrawlDatum datum, Content content,
+        ProtocolStatus pstatus, int status) {
 
       datum.setStatus(status);
       datum.setFetchTime(System.currentTimeMillis());
-      if (pstatus != null) datum.getMetaData().put(Nutch.WRITABLE_PROTO_STATUS_KEY, pstatus);
+      if (pstatus != null)
+        datum.getMetaData().put(Nutch.WRITABLE_PROTO_STATUS_KEY, pstatus);
 
       ParseResult parseResult = null;
       if (content != null) {
@@ -331,27 +338,31 @@
             LOG.warn("Couldn't pass score, url " + key + " (" + e + ")");
           }
         }
-        /* Note: Fetcher will only follow meta-redirects coming from the
-         * original URL. */ 
+        /*
+         * Note: Fetcher will only follow meta-redirects coming from the
+         * original URL.
+         */
         if (parsing && status == CrawlDatum.STATUS_FETCH_SUCCESS) {
           try {
             parseResult = this.parseUtil.parse(content);
           } catch (Exception e) {
-            LOG.warn("Error parsing: " + key + ": " + StringUtils.stringifyException(e));
+            LOG.warn("Error parsing: " + key + ": "
+                + StringUtils.stringifyException(e));
           }
 
           if (parseResult == null) {
-            byte[] signature = 
-              SignatureFactory.getSignature(getConf()).calculate(content, 
-                  new ParseStatus().getEmptyParse(conf));
+            byte[] signature = SignatureFactory.getSignature(getConf())
+                .calculate(content, new ParseStatus().getEmptyParse(conf));
             datum.setSignature(signature);
           }
         }
-        
-        /* Store status code in content So we can read this value during 
-         * parsing (as a separate job) and decide to parse or not.
+
+        /*
+         * Store status code in content So we can read this value during parsing
+         * (as a separate job) and decide to parse or not.
          */
-        content.getMetadata().add(Nutch.FETCH_STATUS_KEY, Integer.toString(status));
+        content.getMetadata().add(Nutch.FETCH_STATUS_KEY,
+            Integer.toString(status));
       }
 
       try {
@@ -363,7 +374,7 @@
             Text url = entry.getKey();
             Parse parse = entry.getValue();
             ParseStatus parseStatus = parse.getData().getStatus();
-            
+
             if (!parseStatus.isSuccess()) {
               LOG.warn("Error parsing: " + key + ": " + parseStatus);
               parse = parseStatus.getEmptyParse(getConf());
@@ -371,16 +382,16 @@
 
             // Calculate page signature. For non-parsing fetchers this will
             // be done in ParseSegment
-            byte[] signature = 
-              SignatureFactory.getSignature(getConf()).calculate(content, parse);
+            byte[] signature = SignatureFactory.getSignature(getConf())
+                .calculate(content, parse);
             // Ensure segment name and score are in parseData metadata
-            parse.getData().getContentMeta().set(Nutch.SEGMENT_NAME_KEY, 
-                segmentName);
-            parse.getData().getContentMeta().set(Nutch.SIGNATURE_KEY, 
-                StringUtil.toHexString(signature));
+            parse.getData().getContentMeta()
+                .set(Nutch.SEGMENT_NAME_KEY, segmentName);
+            parse.getData().getContentMeta()
+                .set(Nutch.SIGNATURE_KEY, StringUtil.toHexString(signature));
             // Pass fetch time to content meta
-            parse.getData().getContentMeta().set(Nutch.FETCH_TIME_KEY,
-                Long.toString(datum.getFetchTime()));
+            parse.getData().getContentMeta()
+                .set(Nutch.FETCH_TIME_KEY, Long.toString(datum.getFetchTime()));
             if (url.equals(key))
               datum.setSignature(signature);
             try {
@@ -391,15 +402,14 @@
                 LOG.warn("Couldn't pass score, url " + key + " (" + e + ")");
               }
             }
-            output.collect(url, new NutchWritable(
-                    new ParseImpl(new ParseText(parse.getText()), 
-                                  parse.getData(), parse.isCanonical())));
+            output.collect(url, new NutchWritable(new ParseImpl(new ParseText(
+                parse.getText()), parse.getData(), parse.isCanonical())));
           }
         }
       } catch (IOException e) {
         if (LOG.isErrorEnabled()) {
           e.printStackTrace(LogUtil.getErrorStream(LOG));
-          LOG.error("fetcher caught:"+e.toString());
+          LOG.error("fetcher caught:" + e.toString());
         }
       }
 
@@ -409,10 +419,10 @@
         if (p != null) {
           return p.getData().getStatus();
         }
-      } 
+      }
       return null;
     }
-    
+
   }
 
   private synchronized void updateStatus(int bytesInPage) throws IOException {
@@ -423,23 +433,22 @@
   private void reportStatus() throws IOException {
     String status;
     synchronized (this) {
-      long elapsed = (System.currentTimeMillis() - start)/1000;
-      status = 
-        pages+" pages, "+errors+" errors, "
-        + Math.round(((float)pages*10)/elapsed)/10.0+" pages/s, "
-        + Math.round(((((float)bytes)*8)/1024)/elapsed)+" kb/s, ";
+      long elapsed = (System.currentTimeMillis() - start) / 1000;
+      status = pages + " pages, " + errors + " errors, "
+          + Math.round(((float) pages * 10) / elapsed) / 10.0 + " pages/s, "
+          + Math.round(((((float) bytes) * 8) / 1024) / elapsed) + " kb/s, ";
     }
     reporter.setStatus(status);
   }
 
   public OldFetcher() {
-    
+
   }
-  
+
   public OldFetcher(Configuration conf) {
     setConf(conf);
   }
-  
+
   public void configure(JobConf job) {
     setConf(job);
 
@@ -447,12 +456,13 @@
     this.storingContent = isStoringContent(job);
     this.parsing = isParsing(job);
 
-//    if (job.getBoolean("fetcher.verbose", false)) {
-//      LOG.setLevel(Level.FINE);
-//    }
+    // if (job.getBoolean("fetcher.verbose", false)) {
+    // LOG.setLevel(Level.FINE);
+    // }
   }
 
-  public void close() {}
+  public void close() {
+  }
 
   public static boolean isParsing(Configuration conf) {
     return conf.getBoolean("fetcher.parse", true);
@@ -462,29 +472,33 @@
     return conf.getBoolean("fetcher.store.content", true);
   }
 
-  public void run(RecordReader<WritableComparable, Writable> input, OutputCollector<Text, NutchWritable> output,
-                  Reporter reporter) throws IOException {
+  public void run(RecordReader<WritableComparable, Writable> input,
+      OutputCollector<Text, NutchWritable> output, Reporter reporter)
+      throws IOException {
 
     this.input = input;
     this.output = output;
     this.reporter = reporter;
 
     this.maxRedirect = getConf().getInt("http.redirect.max", 3);
-    
+
     int threadCount = getConf().getInt("fetcher.threads.fetch", 10);
-    if (LOG.isInfoEnabled()) { LOG.info("OldFetcher: threads: " + threadCount); }
+    if (LOG.isInfoEnabled()) {
+      LOG.info("OldFetcher: threads: " + threadCount);
+    }
 
-    for (int i = 0; i < threadCount; i++) {       // spawn threads
+    for (int i = 0; i < threadCount; i++) { // spawn threads
       new FetcherThread(getConf()).start();
     }
 
     // select a timeout that avoids a task timeout
-    long timeout = getConf().getInt("mapred.task.timeout", 10*60*1000)/2;
+    long timeout = getConf().getInt("mapred.task.timeout", 10 * 60 * 1000) / 2;
 
-    do {                                          // wait for threads to exit
+    do { // wait for threads to exit
       try {
         Thread.sleep(1000);
-      } catch (InterruptedException e) {}
+      } catch (InterruptedException e) {
+      }
 
       reportStatus();
 
@@ -492,18 +506,17 @@
       synchronized (this) {
         if ((System.currentTimeMillis() - lastRequestStart) > timeout) {
           if (LOG.isWarnEnabled()) {
-            LOG.warn("Aborting with "+activeThreads+" hung threads.");
+            LOG.warn("Aborting with " + activeThreads + " hung threads.");
           }
           return;
         }
       }
 
     } while (activeThreads > 0);
-    
+
   }
 
-  public void fetch(Path segment, int threads)
-    throws IOException {
+  public void fetch(Path segment, int threads) throws IOException {
 
     SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
     long start = System.currentTimeMillis();
@@ -521,7 +534,8 @@
     // for politeness, don't permit parallel execution of a single task
     job.setSpeculativeExecution(false);
 
-    FileInputFormat.addInputPath(job, new Path(segment, CrawlDatum.GENERATE_DIR_NAME));
+    FileInputFormat.addInputPath(job, new Path(segment,
+        CrawlDatum.GENERATE_DIR_NAME));
     job.setInputFormat(InputFormat.class);
 
     job.setMapRunnerClass(OldFetcher.class);
@@ -533,16 +547,17 @@
 
     JobClient.runJob(job);
     long end = System.currentTimeMillis();
-    LOG.info("OldFetcher: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    LOG.info("OldFetcher: finished at " + sdf.format(end) + ", elapsed: "
+        + TimingUtil.elapsedTime(start, end));
   }
 
-
   /** Run the fetcher. */
   public static void main(String[] args) throws Exception {
-    int res = ToolRunner.run(NutchConfiguration.create(), new OldFetcher(), args);
+    int res = ToolRunner.run(NutchConfiguration.create(), new OldFetcher(),
+        args);
     System.exit(res);
   }
-  
+
   public int run(String[] args) throws Exception {
 
     String usage = "Usage: OldFetcher <segment> [-threads n] [-noParsing]";
@@ -551,15 +566,16 @@
       System.err.println(usage);
       return -1;
     }
-      
+
     Path segment = new Path(args[0]);
     int threads = getConf().getInt("fetcher.threads.fetch", 10);
     boolean parsing = true;
 
-    for (int i = 1; i < args.length; i++) {       // parse command line
-      if (args[i].equals("-threads")) {           // found -threads option
-        threads =  Integer.parseInt(args[++i]);
-      } else if (args[i].equals("-noParsing")) parsing = false;
+    for (int i = 1; i < args.length; i++) { // parse command line
+      if (args[i].equals("-threads")) { // found -threads option
+        threads = Integer.parseInt(args[++i]);
+      } else if (args[i].equals("-noParsing"))
+        parsing = false;
     }
 
     getConf().setInt("fetcher.threads.fetch", threads);
@@ -567,7 +583,7 @@
       getConf().setBoolean("fetcher.parse", parsing);
     }
     try {
-      fetch(segment, threads);              // run the Fetcher
+      fetch(segment, threads); // run the Fetcher
       return 0;
     } catch (Exception e) {
       LOG.error("OldFetcher: " + StringUtils.stringifyException(e));
Index: src/java/org/apache/nutch/fetcher/Fetcher.java
===================================================================
--- src/java/org/apache/nutch/fetcher/Fetcher.java	(revision 1188252)
+++ src/java/org/apache/nutch/fetcher/Fetcher.java	(working copy)
@@ -51,44 +51,46 @@
 import org.apache.nutch.scoring.ScoringFilters;
 import org.apache.nutch.util.*;
 
-
-/** 
+/**
  * A queue-based fetcher.
  * 
- * <p>This fetcher uses a well-known model of one producer (a QueueFeeder)
- * and many consumers (FetcherThread-s).
+ * <p>
+ * This fetcher uses a well-known model of one producer (a QueueFeeder) and many
+ * consumers (FetcherThread-s).
  * 
- * <p>QueueFeeder reads input fetchlists and
- * populates a set of FetchItemQueue-s, which hold FetchItem-s that
- * describe the items to be fetched. There are as many queues as there are unique
- * hosts, but at any given time the total number of fetch items in all queues
- * is less than a fixed number (currently set to a multiple of the number of
- * threads).
+ * <p>
+ * QueueFeeder reads input fetchlists and populates a set of FetchItemQueue-s,
+ * which hold FetchItem-s that describe the items to be fetched. There are as
+ * many queues as there are unique hosts, but at any given time the total number
+ * of fetch items in all queues is less than a fixed number (currently set to a
+ * multiple of the number of threads).
  * 
- * <p>As items are consumed from the queues, the QueueFeeder continues to add new
+ * <p>
+ * As items are consumed from the queues, the QueueFeeder continues to add new
  * input items, so that their total count stays fixed (FetcherThread-s may also
  * add new items to the queues e.g. as a results of redirection) - until all
  * input items are exhausted, at which point the number of items in the queues
  * begins to decrease. When this number reaches 0 fetcher will finish.
  * 
- * <p>This fetcher implementation handles per-host blocking itself, instead
- * of delegating this work to protocol-specific plugins.
- * Each per-host queue handles its own "politeness" settings, such as the
- * maximum number of concurrent requests and crawl delay between consecutive
- * requests - and also a list of requests in progress, and the time the last
- * request was finished. As FetcherThread-s ask for new items to be fetched,
- * queues may return eligible items or null if for "politeness" reasons this
- * host's queue is not yet ready.
+ * <p>
+ * This fetcher implementation handles per-host blocking itself, instead of
+ * delegating this work to protocol-specific plugins. Each per-host queue
+ * handles its own "politeness" settings, such as the maximum number of
+ * concurrent requests and crawl delay between consecutive requests - and also a
+ * list of requests in progress, and the time the last request was finished. As
+ * FetcherThread-s ask for new items to be fetched, queues may return eligible
+ * items or null if for "politeness" reasons this host's queue is not yet ready.
  * 
- * <p>If there are still unfetched items in the queues, but none of the items
- * are ready, FetcherThread-s will spin-wait until either some items become
+ * <p>
+ * If there are still unfetched items in the queues, but none of the items are
+ * ready, FetcherThread-s will spin-wait until either some items become
  * available, or a timeout is reached (at which point the Fetcher will abort,
  * assuming the task is hung).
  * 
  * @author Andrzej Bialecki
  */
 public class Fetcher extends Configured implements Tool,
-    MapRunnable<Text, CrawlDatum, Text, NutchWritable> { 
+    MapRunnable<Text, CrawlDatum, Text, NutchWritable> {
 
   public static final int PERM_REFRESH_TIME = 5;
 
@@ -97,17 +99,17 @@
   public static final String PROTOCOL_REDIR = "protocol";
 
   public static final Logger LOG = LoggerFactory.getLogger(Fetcher.class);
-  
-  public static class InputFormat extends SequenceFileInputFormat<Text, CrawlDatum> {
+
+  public static class InputFormat extends
+      SequenceFileInputFormat<Text, CrawlDatum> {
     /** Don't split inputs, to keep things polite. */
-    public InputSplit[] getSplits(JobConf job, int nSplits)
-      throws IOException {
+    public InputSplit[] getSplits(JobConf job, int nSplits) throws IOException {
       FileStatus[] files = listStatus(job);
       FileSplit[] splits = new FileSplit[files.length];
       for (int i = 0; i < files.length; i++) {
         FileStatus cur = files[i];
-        splits[i] = new FileSplit(cur.getPath(), 0,
-            cur.getLen(), (String[])null);
+        splits[i] = new FileSplit(cur.getPath(), 0, cur.getLen(),
+            (String[]) null);
       }
       return splits;
     }
@@ -115,7 +117,7 @@
 
   private OutputCollector<Text, NutchWritable> output;
   private Reporter reporter;
-  
+
   private String segmentName;
   private AtomicInteger activeThreads = new AtomicInteger(0);
   private AtomicInteger spinWaiting = new AtomicInteger(0);
@@ -123,36 +125,37 @@
   private long start = System.currentTimeMillis(); // start time of fetcher run
   private AtomicLong lastRequestStart = new AtomicLong(start);
 
-  private AtomicLong bytes = new AtomicLong(0);        // total bytes fetched
-  private AtomicInteger pages = new AtomicInteger(0);  // total pages fetched
+  private AtomicLong bytes = new AtomicLong(0); // total bytes fetched
+  private AtomicInteger pages = new AtomicInteger(0); // total pages fetched
   private AtomicInteger errors = new AtomicInteger(0); // total pages errored
 
   private boolean storingContent;
   private boolean parsing;
   FetchItemQueues fetchQueues;
   QueueFeeder feeder;
-  
+
   /**
    * This class described the item to be fetched.
    */
-  private static class FetchItem {    
+  private static class FetchItem {
     String queueID;
     Text url;
     URL u;
     CrawlDatum datum;
-    
+
     public FetchItem(Text url, URL u, CrawlDatum datum, String queueID) {
       this.url = url;
       this.u = u;
       this.datum = datum;
       this.queueID = queueID;
     }
-    
-    /** Create an item. Queue id will be created based on <code>queueMode</code>
-     * argument, either as a protocol + hostname pair, protocol + IP
-     * address pair or protocol+domain pair.
+
+    /**
+     * Create an item. Queue id will be created based on <code>queueMode</code>
+     * argument, either as a protocol + hostname pair, protocol + IP address
+     * pair or protocol+domain pair.
      */
-    public static FetchItem create(Text url, CrawlDatum datum,  String queueMode) {
+    public static FetchItem create(Text url, CrawlDatum datum, String queueMode) {
       String queueID;
       URL u = null;
       try {
@@ -172,19 +175,18 @@
           LOG.warn("Unable to resolve: " + u.getHost() + ", skipping.");
           return null;
         }
-      }
-      else if (FetchItemQueues.QUEUE_MODE_DOMAIN.equalsIgnoreCase(queueMode)){
+      } else if (FetchItemQueues.QUEUE_MODE_DOMAIN.equalsIgnoreCase(queueMode)) {
         key = URLUtil.getDomainName(u);
         if (key == null) {
-          LOG.warn("Unknown domain for url: " + url + ", using URL string as key");
-          key=u.toExternalForm();
+          LOG.warn("Unknown domain for url: " + url
+              + ", using URL string as key");
+          key = u.toExternalForm();
         }
-      }
-      else {
+      } else {
         key = u.getHost();
         if (key == null) {
           LOG.warn("Unknown host for url: " + url + ", using URL string as key");
-          key=u.toExternalForm();
+          key = u.toExternalForm();
         }
       }
       queueID = proto + "://" + key.toLowerCase();
@@ -202,28 +204,31 @@
     public Text getUrl() {
       return url;
     }
-    
+
     public URL getURL2() {
       return u;
     }
   }
-  
+
   /**
-   * This class handles FetchItems which come from the same host ID (be it
-   * a proto/hostname or proto/IP pair). It also keeps track of requests in
+   * This class handles FetchItems which come from the same host ID (be it a
+   * proto/hostname or proto/IP pair). It also keeps track of requests in
    * progress and elapsed time between requests.
    */
   private static class FetchItemQueue {
-    List<FetchItem> queue = Collections.synchronizedList(new LinkedList<FetchItem>());
-    Set<FetchItem>  inProgress = Collections.synchronizedSet(new HashSet<FetchItem>());
+    List<FetchItem> queue = Collections
+        .synchronizedList(new LinkedList<FetchItem>());
+    Set<FetchItem> inProgress = Collections
+        .synchronizedSet(new HashSet<FetchItem>());
     AtomicLong nextFetchTime = new AtomicLong();
     AtomicInteger exceptionCounter = new AtomicInteger();
     long crawlDelay;
     long minCrawlDelay;
     int maxThreads;
     Configuration conf;
-    
-    public FetchItemQueue(Configuration conf, int maxThreads, long crawlDelay, long minCrawlDelay) {
+
+    public FetchItemQueue(Configuration conf, int maxThreads, long crawlDelay,
+        long minCrawlDelay) {
       this.conf = conf;
       this.maxThreads = maxThreads;
       this.crawlDelay = crawlDelay;
@@ -231,57 +236,64 @@
       // ready to start
       setEndTime(System.currentTimeMillis() - crawlDelay);
     }
-    
+
     public synchronized int emptyQueue() {
       int presize = queue.size();
       queue.clear();
       return presize;
     }
-    
+
     public int getQueueSize() {
       return queue.size();
     }
-    
+
     public int getInProgressSize() {
       return inProgress.size();
     }
-    
+
     public int incrementExceptionCounter() {
       return exceptionCounter.incrementAndGet();
     }
-    
+
     public void finishFetchItem(FetchItem it, boolean asap) {
       if (it != null) {
         inProgress.remove(it);
         setEndTime(System.currentTimeMillis(), asap);
       }
     }
-    
+
     public void addFetchItem(FetchItem it) {
-      if (it == null) return;
+      if (it == null)
+        return;
       queue.add(it);
     }
-    
+
     public void addInProgressFetchItem(FetchItem it) {
-      if (it == null) return;
+      if (it == null)
+        return;
       inProgress.add(it);
     }
-    
+
     public FetchItem getFetchItem() {
-      if (inProgress.size() >= maxThreads) return null;
+      if (inProgress.size() >= maxThreads)
+        return null;
       long now = System.currentTimeMillis();
-      if (nextFetchTime.get() > now) return null;
+      if (nextFetchTime.get() > now)
+        return null;
       FetchItem it = null;
-      if (queue.size() == 0) return null;
+      if (queue.size() == 0)
+        return null;
       try {
         it = queue.remove(0);
         inProgress.add(it);
       } catch (Exception e) {
-        LOG.error("Cannot remove FetchItem from queue or cannot add it to inProgress queue", e);
+        LOG.error(
+            "Cannot remove FetchItem from queue or cannot add it to inProgress queue",
+            e);
       }
       return it;
     }
-    
+
     public synchronized void dump() {
       LOG.info("  maxThreads    = " + maxThreads);
       LOG.info("  inProgress    = " + inProgress.size());
@@ -294,19 +306,20 @@
         LOG.info("  " + i + ". " + it.url);
       }
     }
-    
+
     private void setEndTime(long endTime) {
       setEndTime(endTime, false);
     }
-    
+
     private void setEndTime(long endTime, boolean asap) {
       if (!asap)
-        nextFetchTime.set(endTime + (maxThreads > 1 ? minCrawlDelay : crawlDelay));
+        nextFetchTime.set(endTime
+            + (maxThreads > 1 ? minCrawlDelay : crawlDelay));
       else
         nextFetchTime.set(endTime);
     }
   }
-  
+
   /**
    * Convenience class - a collection of queues that keeps track of the total
    * number of items, and provides items eligible for fetching from any queue.
@@ -320,55 +333,60 @@
     long minCrawlDelay;
     long timelimit = -1;
     int maxExceptionsPerQueue = -1;
-    Configuration conf;  
+    Configuration conf;
 
     public static final String QUEUE_MODE_HOST = "byHost";
     public static final String QUEUE_MODE_DOMAIN = "byDomain";
     public static final String QUEUE_MODE_IP = "byIP";
-    
+
     String queueMode;
-    
+
     public FetchItemQueues(Configuration conf) {
       this.conf = conf;
       this.maxThreads = conf.getInt("fetcher.threads.per.queue", 1);
       queueMode = conf.get("fetcher.queue.mode", QUEUE_MODE_HOST);
       // check that the mode is known
-      if (!queueMode.equals(QUEUE_MODE_IP) && !queueMode.equals(QUEUE_MODE_DOMAIN)
+      if (!queueMode.equals(QUEUE_MODE_IP)
+          && !queueMode.equals(QUEUE_MODE_DOMAIN)
           && !queueMode.equals(QUEUE_MODE_HOST)) {
-        LOG.error("Unknown partition mode : " + queueMode + " - forcing to byHost");
+        LOG.error("Unknown partition mode : " + queueMode
+            + " - forcing to byHost");
         queueMode = QUEUE_MODE_HOST;
       }
-      LOG.info("Using queue mode : "+queueMode);
-      
+      LOG.info("Using queue mode : " + queueMode);
+
       this.crawlDelay = (long) (conf.getFloat("fetcher.server.delay", 1.0f) * 1000);
-      this.minCrawlDelay = (long) (conf.getFloat("fetcher.server.min.delay", 0.0f) * 1000);
+      this.minCrawlDelay = (long) (conf.getFloat("fetcher.server.min.delay",
+          0.0f) * 1000);
       this.timelimit = conf.getLong("fetcher.timelimit", -1);
-      this.maxExceptionsPerQueue = conf.getInt("fetcher.max.exceptions.per.queue", -1);
+      this.maxExceptionsPerQueue = conf.getInt(
+          "fetcher.max.exceptions.per.queue", -1);
     }
-    
+
     public int getTotalSize() {
       return totalSize.get();
     }
-    
+
     public int getQueueCount() {
       return queues.size();
     }
-    
+
     public void addFetchItem(Text url, CrawlDatum datum) {
       FetchItem it = FetchItem.create(url, datum, queueMode);
-      if (it != null) addFetchItem(it);
+      if (it != null)
+        addFetchItem(it);
     }
-    
+
     public synchronized void addFetchItem(FetchItem it) {
       FetchItemQueue fiq = getFetchItemQueue(it.queueID);
       fiq.addFetchItem(it);
       totalSize.incrementAndGet();
     }
-    
+
     public void finishFetchItem(FetchItem it) {
       finishFetchItem(it, false);
     }
-    
+
     public void finishFetchItem(FetchItem it, boolean asap) {
       FetchItemQueue fiq = queues.get(it.queueID);
       if (fiq == null) {
@@ -377,7 +395,7 @@
       }
       fiq.finishFetchItem(it, asap);
     }
-    
+
     public synchronized FetchItemQueue getFetchItemQueue(String id) {
       FetchItemQueue fiq = queues.get(id);
       if (fiq == null) {
@@ -387,10 +405,10 @@
       }
       return fiq;
     }
-    
+
     public synchronized FetchItem getFetchItem() {
-      Iterator<Map.Entry<String, FetchItemQueue>> it =
-        queues.entrySet().iterator();
+      Iterator<Map.Entry<String, FetchItemQueue>> it = queues.entrySet()
+          .iterator();
       while (it.hasNext()) {
         FetchItemQueue fiq = it.next().getValue();
         // reap empty queues
@@ -406,7 +424,7 @@
       }
       return null;
     }
-    
+
     // called only once the feeder has stopped
     public synchronized int checkTimelimit() {
       int count = 0;
@@ -418,7 +436,8 @@
         // there might also be a case where totalsize !=0 but number of queues
         // == 0
         // in which case we simply force it to 0 to avoid blocking
-        if (totalSize.get() != 0 && queues.size() == 0) totalSize.set(0);
+        if (totalSize.get() != 0 && queues.size() == 0)
+          totalSize.set(0);
       }
       return count;
     }
@@ -429,7 +448,8 @@
 
       for (String id : queues.keySet()) {
         FetchItemQueue fiq = queues.get(id);
-        if (fiq.getQueueSize() == 0) continue;
+        if (fiq.getQueueSize() == 0)
+          continue;
         LOG.info("* queue: " + id + " >> dropping! ");
         int deleted = fiq.emptyQueue();
         for (int i = 0; i < deleted; i++) {
@@ -440,11 +460,11 @@
 
       return count;
     }
-    
+
     /**
      * Increment the exception counter of a queue in case of an exception e.g.
      * timeout; when higher than a given threshold simply empty the queue.
-     *
+     * 
      * @param queueid
      * @return number of purged items
      */
@@ -457,7 +477,7 @@
         return 0;
       }
       int excCount = fiq.incrementExceptionCounter();
-      if (maxExceptionsPerQueue!= -1 && excCount >= maxExceptionsPerQueue) {
+      if (maxExceptionsPerQueue != -1 && excCount >= maxExceptionsPerQueue) {
         // too many exceptions for items in this queue - purge it
         int deleted = fiq.emptyQueue();
         LOG.info("* queue: " + queueid + " >> removed " + deleted
@@ -470,20 +490,20 @@
       return 0;
     }
 
-    
     public synchronized void dump() {
       for (String id : queues.keySet()) {
         FetchItemQueue fiq = queues.get(id);
-        if (fiq.getQueueSize() == 0) continue;
+        if (fiq.getQueueSize() == 0)
+          continue;
         LOG.info("* queue: " + id);
         fiq.dump();
       }
     }
   }
-  
+
   /**
-   * This class feeds the queues with input items, and re-fills them as
-   * items are consumed by FetcherThread-s.
+   * This class feeds the queues with input items, and re-fills them as items
+   * are consumed by FetcherThread-s.
    */
   private static class QueueFeeder extends Thread {
     private RecordReader<Text, CrawlDatum> reader;
@@ -499,7 +519,7 @@
       this.setDaemon(true);
       this.setName("QueueFeeder");
     }
-    
+
     public void setTimeLimit(long tl) {
       timelimit = tl;
     }
@@ -528,7 +548,9 @@
           // queues are full - spin-wait until they have some free space
           try {
             Thread.sleep(1000);
-          } catch (Exception e) {};
+          } catch (Exception e) {
+          }
+          ;
           continue;
         } else {
           LOG.debug("-feeding " + feed + " input urls ...");
@@ -549,11 +571,11 @@
           }
         }
       }
-      LOG.info("QueueFeeder finished: total " + cnt + " records + hit by time limit :"
-          + timelimitcount);
+      LOG.info("QueueFeeder finished: total " + cnt
+          + " records + hit by time limit :" + timelimitcount);
     }
   }
-  
+
   /**
    * This class picks items from queues and fetches the pages.
    */
@@ -573,8 +595,8 @@
     private boolean ignoreExternalLinks;
 
     public FetcherThread(Configuration conf) {
-      this.setDaemon(true);                       // don't hang JVM on exit
-      this.setName("FetcherThread");              // use an informative name
+      this.setDaemon(true); // don't hang JVM on exit
+      this.setName("FetcherThread"); // use an informative name
       this.conf = conf;
       this.urlFilters = new URLFilters(conf);
       this.scfilters = new ScoringFilters(conf);
@@ -582,25 +604,28 @@
       this.protocolFactory = new ProtocolFactory(conf);
       this.normalizers = new URLNormalizers(conf, URLNormalizers.SCOPE_FETCHER);
       this.maxCrawlDelay = conf.getInt("fetcher.max.crawl.delay", 30) * 1000;
-      queueMode = conf.get("fetcher.queue.mode", FetchItemQueues.QUEUE_MODE_HOST);
+      queueMode = conf.get("fetcher.queue.mode",
+          FetchItemQueues.QUEUE_MODE_HOST);
       // check that the mode is known
-      if (!queueMode.equals(FetchItemQueues.QUEUE_MODE_IP) && !queueMode.equals(FetchItemQueues.QUEUE_MODE_DOMAIN)
+      if (!queueMode.equals(FetchItemQueues.QUEUE_MODE_IP)
+          && !queueMode.equals(FetchItemQueues.QUEUE_MODE_DOMAIN)
           && !queueMode.equals(FetchItemQueues.QUEUE_MODE_HOST)) {
-        LOG.error("Unknown partition mode : " + queueMode + " - forcing to byHost");
+        LOG.error("Unknown partition mode : " + queueMode
+            + " - forcing to byHost");
         queueMode = FetchItemQueues.QUEUE_MODE_HOST;
       }
-      LOG.info("Using queue mode : "+queueMode);
+      LOG.info("Using queue mode : " + queueMode);
       this.maxRedirect = conf.getInt("http.redirect.max", 3);
-      this.ignoreExternalLinks = 
-        conf.getBoolean("db.ignore.external.links", false);
+      this.ignoreExternalLinks = conf.getBoolean("db.ignore.external.links",
+          false);
     }
 
     public void run() {
       activeThreads.incrementAndGet(); // count threads
-      
+
       FetchItem fit = null;
       try {
-        
+
         while (true) {
           fit = fetchQueues.getFetchItem();
           if (fit == null) {
@@ -610,8 +635,9 @@
               spinWaiting.incrementAndGet();
               try {
                 Thread.sleep(500);
-              } catch (Exception e) {}
-                spinWaiting.decrementAndGet();
+              } catch (Exception e) {
+              }
+              spinWaiting.decrementAndGet();
               continue;
             } else {
               // all done, finish this thread
@@ -619,15 +645,17 @@
             }
           }
           lastRequestStart.set(System.currentTimeMillis());
-          Text reprUrlWritable =
-            (Text) fit.datum.getMetaData().get(Nutch.WRITABLE_REPR_URL_KEY);
+          Text reprUrlWritable = (Text) fit.datum.getMetaData().get(
+              Nutch.WRITABLE_REPR_URL_KEY);
           if (reprUrlWritable == null) {
             reprUrl = fit.url.toString();
           } else {
             reprUrl = reprUrlWritable.toString();
           }
           try {
-            if (LOG.isInfoEnabled()) { LOG.info("fetching " + fit.url); }
+            if (LOG.isInfoEnabled()) {
+              LOG.info("fetching " + fit.url);
+            }
 
             // fetch the page
             redirecting = false;
@@ -637,7 +665,8 @@
                 LOG.debug("redirectCount=" + redirectCount);
               }
               redirecting = false;
-              Protocol protocol = this.protocolFactory.getProtocol(fit.url.toString());
+              Protocol protocol = this.protocolFactory.getProtocol(fit.url
+                  .toString());
               RobotRules rules = protocol.getRobotRules(fit.url, fit.datum);
               if (!rules.isAllowed(fit.u)) {
                 // unblock
@@ -645,7 +674,9 @@
                 if (LOG.isDebugEnabled()) {
                   LOG.debug("Denied by robots.txt: " + fit.url);
                 }
-                output(fit.url, fit.datum, null, ProtocolStatus.STATUS_ROBOTS_DENIED, CrawlDatum.STATUS_FETCH_GONE);
+                output(fit.url, fit.datum, null,
+                    ProtocolStatus.STATUS_ROBOTS_DENIED,
+                    CrawlDatum.STATUS_FETCH_GONE);
                 reporter.incrCounter("FetcherStatus", "robots_denied", 1);
                 continue;
               }
@@ -653,16 +684,22 @@
                 if (rules.getCrawlDelay() > maxCrawlDelay) {
                   // unblock
                   fetchQueues.finishFetchItem(fit, true);
-                  LOG.debug("Crawl-Delay for " + fit.url + " too long (" + rules.getCrawlDelay() + "), skipping");
-                  output(fit.url, fit.datum, null, ProtocolStatus.STATUS_ROBOTS_DENIED, CrawlDatum.STATUS_FETCH_GONE);
-                  reporter.incrCounter("FetcherStatus", "robots_denied_maxcrawldelay", 1);
+                  LOG.debug("Crawl-Delay for " + fit.url + " too long ("
+                      + rules.getCrawlDelay() + "), skipping");
+                  output(fit.url, fit.datum, null,
+                      ProtocolStatus.STATUS_ROBOTS_DENIED,
+                      CrawlDatum.STATUS_FETCH_GONE);
+                  reporter.incrCounter("FetcherStatus",
+                      "robots_denied_maxcrawldelay", 1);
                   continue;
                 } else {
-                  FetchItemQueue fiq = fetchQueues.getFetchItemQueue(fit.queueID);
+                  FetchItemQueue fiq = fetchQueues
+                      .getFetchItemQueue(fit.queueID);
                   fiq.crawlDelay = rules.getCrawlDelay();
                 }
               }
-              ProtocolOutput output = protocol.getProtocolOutput(fit.url, fit.datum);
+              ProtocolOutput output = protocol.getProtocolOutput(fit.url,
+                  fit.datum);
               ProtocolStatus status = output.getStatus();
               Content content = output.getContent();
               ParseStatus pstatus = null;
@@ -672,28 +709,28 @@
               String urlString = fit.url.toString();
 
               reporter.incrCounter("FetcherStatus", status.getName(), 1);
-              
-              switch(status.getCode()) {
-                
+
+              switch (status.getCode()) {
+
               case ProtocolStatus.WOULDBLOCK:
                 // retry ?
                 fetchQueues.addFetchItem(fit);
                 break;
 
-              case ProtocolStatus.SUCCESS:        // got a page
-                pstatus = output(fit.url, fit.datum, content, status, CrawlDatum.STATUS_FETCH_SUCCESS);
+              case ProtocolStatus.SUCCESS: // got a page
+                pstatus = output(fit.url, fit.datum, content, status,
+                    CrawlDatum.STATUS_FETCH_SUCCESS);
                 updateStatus(content.getContent().length);
-                if (pstatus != null && pstatus.isSuccess() &&
-                        pstatus.getMinorCode() == ParseStatus.SUCCESS_REDIRECT) {
+                if (pstatus != null && pstatus.isSuccess()
+                    && pstatus.getMinorCode() == ParseStatus.SUCCESS_REDIRECT) {
                   String newUrl = pstatus.getMessage();
                   int refreshTime = Integer.valueOf(pstatus.getArgs()[1]);
-                  Text redirUrl =
-                    handleRedirect(fit.url, fit.datum,
-                                   urlString, newUrl,
-                                   refreshTime < Fetcher.PERM_REFRESH_TIME,
-                                   Fetcher.CONTENT_REDIR);
+                  Text redirUrl = handleRedirect(fit.url, fit.datum, urlString,
+                      newUrl, refreshTime < Fetcher.PERM_REFRESH_TIME,
+                      Fetcher.CONTENT_REDIR);
                   if (redirUrl != null) {
-                    CrawlDatum newDatum = new CrawlDatum(CrawlDatum.STATUS_DB_UNFETCHED,
+                    CrawlDatum newDatum = new CrawlDatum(
+                        CrawlDatum.STATUS_DB_UNFETCHED,
                         fit.datum.getFetchInterval(), fit.datum.getScore());
                     // transfer existing metadata to the redir
                     newDatum.getMetaData().putAll(fit.datum.getMetaData());
@@ -704,19 +741,20 @@
                     }
                     fit = FetchItem.create(redirUrl, newDatum, queueMode);
                     if (fit != null) {
-                      FetchItemQueue fiq =
-                        fetchQueues.getFetchItemQueue(fit.queueID);
+                      FetchItemQueue fiq = fetchQueues
+                          .getFetchItemQueue(fit.queueID);
                       fiq.addInProgressFetchItem(fit);
                     } else {
                       // stop redirecting
                       redirecting = false;
-                      reporter.incrCounter("FetcherStatus", "FetchItem.notCreated.redirect", 1);
+                      reporter.incrCounter("FetcherStatus",
+                          "FetchItem.notCreated.redirect", 1);
                     }
                   }
                 }
                 break;
 
-              case ProtocolStatus.MOVED:         // redirect
+              case ProtocolStatus.MOVED: // redirect
               case ProtocolStatus.TEMP_MOVED:
                 int code;
                 boolean temp;
@@ -729,12 +767,11 @@
                 }
                 output(fit.url, fit.datum, content, status, code);
                 String newUrl = status.getMessage();
-                Text redirUrl =
-                  handleRedirect(fit.url, fit.datum,
-                                 urlString, newUrl, temp,
-                                 Fetcher.PROTOCOL_REDIR);
+                Text redirUrl = handleRedirect(fit.url, fit.datum, urlString,
+                    newUrl, temp, Fetcher.PROTOCOL_REDIR);
                 if (redirUrl != null) {
-                  CrawlDatum newDatum = new CrawlDatum(CrawlDatum.STATUS_DB_UNFETCHED,
+                  CrawlDatum newDatum = new CrawlDatum(
+                      CrawlDatum.STATUS_DB_UNFETCHED,
                       fit.datum.getFetchInterval(), fit.datum.getScore());
                   // transfer existing metadata
                   newDatum.getMetaData().putAll(fit.datum.getMetaData());
@@ -745,13 +782,14 @@
                   }
                   fit = FetchItem.create(redirUrl, newDatum, queueMode);
                   if (fit != null) {
-                    FetchItemQueue fiq =
-                      fetchQueues.getFetchItemQueue(fit.queueID);
+                    FetchItemQueue fiq = fetchQueues
+                        .getFetchItemQueue(fit.queueID);
                     fiq.addInProgressFetchItem(fit);
                   } else {
                     // stop redirecting
                     redirecting = false;
-                    reporter.incrCounter("FetcherStatus", "FetchItem.notCreated.redirect", 1);
+                    reporter.incrCounter("FetcherStatus",
+                        "FetchItem.notCreated.redirect", 1);
                   }
                 } else {
                   // stop redirecting
@@ -761,31 +799,37 @@
 
               case ProtocolStatus.EXCEPTION:
                 logError(fit.url, status.getMessage());
-                int killedURLs = fetchQueues.checkExceptionThreshold(fit.getQueueID());
-                if (killedURLs!=0)
-                   reporter.incrCounter("FetcherStatus", "AboveExceptionThresholdInQueue", killedURLs);
+                int killedURLs = fetchQueues.checkExceptionThreshold(fit
+                    .getQueueID());
+                if (killedURLs != 0)
+                  reporter.incrCounter("FetcherStatus",
+                      "AboveExceptionThresholdInQueue", killedURLs);
                 /* FALLTHROUGH */
-              case ProtocolStatus.RETRY:          // retry
+              case ProtocolStatus.RETRY: // retry
               case ProtocolStatus.BLOCKED:
-                output(fit.url, fit.datum, null, status, CrawlDatum.STATUS_FETCH_RETRY);
+                output(fit.url, fit.datum, null, status,
+                    CrawlDatum.STATUS_FETCH_RETRY);
                 break;
-                
-              case ProtocolStatus.GONE:           // gone
+
+              case ProtocolStatus.GONE: // gone
               case ProtocolStatus.NOTFOUND:
               case ProtocolStatus.ACCESS_DENIED:
               case ProtocolStatus.ROBOTS_DENIED:
-                output(fit.url, fit.datum, null, status, CrawlDatum.STATUS_FETCH_GONE);
+                output(fit.url, fit.datum, null, status,
+                    CrawlDatum.STATUS_FETCH_GONE);
                 break;
 
               case ProtocolStatus.NOTMODIFIED:
-                output(fit.url, fit.datum, null, status, CrawlDatum.STATUS_FETCH_NOTMODIFIED);
+                output(fit.url, fit.datum, null, status,
+                    CrawlDatum.STATUS_FETCH_NOTMODIFIED);
                 break;
 
               default:
                 if (LOG.isWarnEnabled()) {
                   LOG.warn("Unknown ProtocolStatus: " + status.getCode());
                 }
-                output(fit.url, fit.datum, null, status, CrawlDatum.STATUS_FETCH_RETRY);
+                output(fit.url, fit.datum, null, status,
+                    CrawlDatum.STATUS_FETCH_RETRY);
               }
 
               if (redirecting && redirectCount > maxRedirect) {
@@ -793,53 +837,58 @@
                 if (LOG.isInfoEnabled()) {
                   LOG.info(" - redirect count exceeded " + fit.url);
                 }
-                output(fit.url, fit.datum, null, ProtocolStatus.STATUS_REDIR_EXCEEDED, CrawlDatum.STATUS_FETCH_GONE);
+                output(fit.url, fit.datum, null,
+                    ProtocolStatus.STATUS_REDIR_EXCEEDED,
+                    CrawlDatum.STATUS_FETCH_GONE);
               }
 
             } while (redirecting && (redirectCount <= maxRedirect));
-            
-          } catch (Throwable t) {                 // unexpected exception
+
+          } catch (Throwable t) { // unexpected exception
             // unblock
             fetchQueues.finishFetchItem(fit);
             logError(fit.url, t.toString());
-            output(fit.url, fit.datum, null, ProtocolStatus.STATUS_FAILED, CrawlDatum.STATUS_FETCH_RETRY);
+            output(fit.url, fit.datum, null, ProtocolStatus.STATUS_FAILED,
+                CrawlDatum.STATUS_FETCH_RETRY);
           }
         }
 
       } catch (Throwable e) {
         if (LOG.isErrorEnabled()) {
           e.printStackTrace(LogUtil.getErrorStream(LOG));
-          LOG.error("fetcher caught:"+e.toString());
+          LOG.error("fetcher caught:" + e.toString());
         }
       } finally {
-        if (fit != null) fetchQueues.finishFetchItem(fit);
+        if (fit != null)
+          fetchQueues.finishFetchItem(fit);
         activeThreads.decrementAndGet(); // count threads
-        LOG.info("-finishing thread " + getName() + ", activeThreads=" + activeThreads);
+        LOG.info("-finishing thread " + getName() + ", activeThreads="
+            + activeThreads);
       }
     }
 
-    private Text handleRedirect(Text url, CrawlDatum datum,
-                                String urlString, String newUrl,
-                                boolean temp, String redirType)
-    throws MalformedURLException, URLFilterException {
+    private Text handleRedirect(Text url, CrawlDatum datum, String urlString,
+        String newUrl, boolean temp, String redirType)
+        throws MalformedURLException, URLFilterException {
       newUrl = normalizers.normalize(newUrl, URLNormalizers.SCOPE_FETCHER);
       newUrl = urlFilters.filter(newUrl);
-      
+
       if (ignoreExternalLinks) {
         try {
           String origHost = new URL(urlString).getHost().toLowerCase();
           String newHost = new URL(newUrl).getHost().toLowerCase();
           if (!origHost.equals(newHost)) {
             if (LOG.isDebugEnabled()) {
-              LOG.debug(" - ignoring redirect " + redirType + " from " +
-                          urlString + " to " + newUrl +
-                          " because external links are ignored");
+              LOG.debug(" - ignoring redirect " + redirType + " from "
+                  + urlString + " to " + newUrl
+                  + " because external links are ignored");
             }
             return null;
           }
-        } catch (MalformedURLException e) { }
+        } catch (MalformedURLException e) {
+        }
       }
-      
+
       if (newUrl != null && !newUrl.equals(urlString)) {
         reprUrl = URLUtil.chooseRepr(reprUrl, newUrl, temp);
         url = new Text(newUrl);
@@ -847,14 +896,14 @@
           redirecting = true;
           redirectCount++;
           if (LOG.isDebugEnabled()) {
-            LOG.debug(" - " + redirType + " redirect to " +
-                url + " (fetching now)");
+            LOG.debug(" - " + redirType + " redirect to " + url
+                + " (fetching now)");
           }
           return url;
         } else {
           CrawlDatum newDatum = new CrawlDatum(CrawlDatum.STATUS_LINKED,
-              datum.getFetchInterval(),datum.getScore());
-          // transfer existing metadata 
+              datum.getFetchInterval(), datum.getScore());
+          // transfer existing metadata
           newDatum.getMetaData().putAll(datum.getMetaData());
           try {
             scfilters.initialScore(url, newDatum);
@@ -867,15 +916,15 @@
           }
           output(url, newDatum, null, null, CrawlDatum.STATUS_LINKED);
           if (LOG.isDebugEnabled()) {
-            LOG.debug(" - " + redirType + " redirect to " +
-                url + " (fetching later)");
+            LOG.debug(" - " + redirType + " redirect to " + url
+                + " (fetching later)");
           }
           return null;
         }
       } else {
         if (LOG.isDebugEnabled()) {
-          LOG.debug(" - " + redirType + " redirect skipped: " +
-              (newUrl != null ? "to same url" : "filtered"));
+          LOG.debug(" - " + redirType + " redirect skipped: "
+              + (newUrl != null ? "to same url" : "filtered"));
         }
         return null;
       }
@@ -888,12 +937,13 @@
       errors.incrementAndGet();
     }
 
-    private ParseStatus output(Text key, CrawlDatum datum,
-                        Content content, ProtocolStatus pstatus, int status) {
+    private ParseStatus output(Text key, CrawlDatum datum, Content content,
+        ProtocolStatus pstatus, int status) {
 
       datum.setStatus(status);
       datum.setFetchTime(System.currentTimeMillis());
-      if (pstatus != null) datum.getMetaData().put(Nutch.WRITABLE_PROTO_STATUS_KEY, pstatus);
+      if (pstatus != null)
+        datum.getMetaData().put(Nutch.WRITABLE_PROTO_STATUS_KEY, pstatus);
 
       ParseResult parseResult = null;
       if (content != null) {
@@ -909,27 +959,31 @@
             LOG.warn("Couldn't pass score, url " + key + " (" + e + ")");
           }
         }
-        /* Note: Fetcher will only follow meta-redirects coming from the
-         * original URL. */ 
+        /*
+         * Note: Fetcher will only follow meta-redirects coming from the
+         * original URL.
+         */
         if (parsing && status == CrawlDatum.STATUS_FETCH_SUCCESS) {
           try {
             parseResult = this.parseUtil.parse(content);
           } catch (Exception e) {
-            LOG.warn("Error parsing: " + key + ": " + StringUtils.stringifyException(e));
+            LOG.warn("Error parsing: " + key + ": "
+                + StringUtils.stringifyException(e));
           }
 
           if (parseResult == null) {
-            byte[] signature = 
-              SignatureFactory.getSignature(getConf()).calculate(content, 
-                  new ParseStatus().getEmptyParse(conf));
+            byte[] signature = SignatureFactory.getSignature(getConf())
+                .calculate(content, new ParseStatus().getEmptyParse(conf));
             datum.setSignature(signature);
           }
         }
-        
-        /* Store status code in content So we can read this value during 
-         * parsing (as a separate job) and decide to parse or not.
+
+        /*
+         * Store status code in content So we can read this value during parsing
+         * (as a separate job) and decide to parse or not.
          */
-        content.getMetadata().add(Nutch.FETCH_STATUS_KEY, Integer.toString(status));
+        content.getMetadata().add(Nutch.FETCH_STATUS_KEY,
+            Integer.toString(status));
       }
 
       try {
@@ -941,7 +995,7 @@
             Text url = entry.getKey();
             Parse parse = entry.getValue();
             ParseStatus parseStatus = parse.getData().getStatus();
-            
+
             if (!parseStatus.isSuccess()) {
               LOG.warn("Error parsing: " + key + ": " + parseStatus);
               parse = parseStatus.getEmptyParse(getConf());
@@ -949,16 +1003,16 @@
 
             // Calculate page signature. For non-parsing fetchers this will
             // be done in ParseSegment
-            byte[] signature = 
-              SignatureFactory.getSignature(getConf()).calculate(content, parse);
+            byte[] signature = SignatureFactory.getSignature(getConf())
+                .calculate(content, parse);
             // Ensure segment name and score are in parseData metadata
-            parse.getData().getContentMeta().set(Nutch.SEGMENT_NAME_KEY, 
-                segmentName);
-            parse.getData().getContentMeta().set(Nutch.SIGNATURE_KEY, 
-                StringUtil.toHexString(signature));
+            parse.getData().getContentMeta()
+                .set(Nutch.SEGMENT_NAME_KEY, segmentName);
+            parse.getData().getContentMeta()
+                .set(Nutch.SIGNATURE_KEY, StringUtil.toHexString(signature));
             // Pass fetch time to content meta
-            parse.getData().getContentMeta().set(Nutch.FETCH_TIME_KEY,
-                Long.toString(datum.getFetchTime()));
+            parse.getData().getContentMeta()
+                .set(Nutch.FETCH_TIME_KEY, Long.toString(datum.getFetchTime()));
             if (url.equals(key))
               datum.setSignature(signature);
             try {
@@ -969,15 +1023,14 @@
                 LOG.warn("Couldn't pass score, url " + key + " (" + e + ")");
               }
             }
-            output.collect(url, new NutchWritable(
-                    new ParseImpl(new ParseText(parse.getText()), 
-                                  parse.getData(), parse.isCanonical())));
+            output.collect(url, new NutchWritable(new ParseImpl(new ParseText(
+                parse.getText()), parse.getData(), parse.isCanonical())));
           }
         }
       } catch (IOException e) {
         if (LOG.isErrorEnabled()) {
           e.printStackTrace(LogUtil.getErrorStream(LOG));
-          LOG.error("fetcher caught:"+e.toString());
+          LOG.error("fetcher caught:" + e.toString());
         }
       }
 
@@ -985,38 +1038,42 @@
       if (parseResult != null && !parseResult.isEmpty()) {
         Parse p = parseResult.get(content.getUrl());
         if (p != null) {
-          reporter.incrCounter("ParserStatus", ParseStatus.majorCodes[p.getData().getStatus().getMajorCode()], 1);
+          reporter.incrCounter("ParserStatus", ParseStatus.majorCodes[p
+              .getData().getStatus().getMajorCode()], 1);
           return p.getData().getStatus();
         }
       }
       return null;
     }
-    
+
   }
 
-  public Fetcher() { super(null); }
+  public Fetcher() {
+    super(null);
+  }
 
-  public Fetcher(Configuration conf) { super(conf); }
+  public Fetcher(Configuration conf) {
+    super(conf);
+  }
 
   private void updateStatus(int bytesInPage) throws IOException {
     pages.incrementAndGet();
     bytes.addAndGet(bytesInPage);
   }
 
-  
-  private void reportStatus(int pagesLastSec, int bytesLastSec) throws IOException {
+  private void reportStatus(int pagesLastSec, int bytesLastSec)
+      throws IOException {
     String status;
-    long elapsed = (System.currentTimeMillis() - start)/1000;
+    long elapsed = (System.currentTimeMillis() - start) / 1000;
 
-    float avgPagesSec = Math.round(((float)pages.get()*10)/elapsed)/10;
-    float avgBytesSec = Math.round(((((float)bytes.get())*8)/1000)/elapsed);
+    float avgPagesSec = Math.round(((float) pages.get() * 10) / elapsed) / 10;
+    float avgBytesSec = Math.round(((((float) bytes.get()) * 8) / 1000)
+        / elapsed);
 
-    status = activeThreads + " threads, " +
-     fetchQueues.getQueueCount() + " queues, "+
-     fetchQueues.getTotalSize() + " URLs queued, "+
-      pages+" pages, "+errors+" errors, "
-      + avgPagesSec + " (" + pagesLastSec + ") pages/s, "
-      + avgBytesSec + " (" + bytesLastSec + ") kbits/s, ";
+    status = activeThreads + " threads, " + fetchQueues.getQueueCount()
+        + " queues, " + fetchQueues.getTotalSize() + " URLs queued, " + pages
+        + " pages, " + errors + " errors, " + avgPagesSec + " (" + pagesLastSec
+        + ") pages/s, " + avgBytesSec + " (" + bytesLastSec + ") kbits/s, ";
 
     reporter.setStatus(status);
   }
@@ -1028,12 +1085,13 @@
     this.storingContent = isStoringContent(job);
     this.parsing = isParsing(job);
 
-//    if (job.getBoolean("fetcher.verbose", false)) {
-//      LOG.setLevel(Level.FINE);
-//    }
+    // if (job.getBoolean("fetcher.verbose", false)) {
+    // LOG.setLevel(Level.FINE);
+    // }
   }
 
-  public void close() {}
+  public void close() {
+  }
 
   public static boolean isParsing(Configuration conf) {
     return conf.getBoolean("fetcher.parse", true);
@@ -1044,41 +1102,51 @@
   }
 
   public void run(RecordReader<Text, CrawlDatum> input,
-      OutputCollector<Text, NutchWritable> output,
-                  Reporter reporter) throws IOException {
+      OutputCollector<Text, NutchWritable> output, Reporter reporter)
+      throws IOException {
 
     this.output = output;
     this.reporter = reporter;
     this.fetchQueues = new FetchItemQueues(getConf());
 
     int threadCount = getConf().getInt("fetcher.threads.fetch", 10);
-    if (LOG.isInfoEnabled()) { LOG.info("Fetcher: threads: " + threadCount); }
+    if (LOG.isInfoEnabled()) {
+      LOG.info("Fetcher: threads: " + threadCount);
+    }
 
     int timeoutDivisor = getConf().getInt("fetcher.threads.timeout.divisor", 2);
-    if (LOG.isInfoEnabled()) { LOG.info("Fetcher: time-out divisor: " + timeoutDivisor); }
+    if (LOG.isInfoEnabled()) {
+      LOG.info("Fetcher: time-out divisor: " + timeoutDivisor);
+    }
 
-    int queueDepthMuliplier =  getConf().getInt("fetcher.queue.depth.multiplier", 50);
-    
-    feeder = new QueueFeeder(input, fetchQueues, threadCount * queueDepthMuliplier);
-    //feeder.setPriority((Thread.MAX_PRIORITY + Thread.NORM_PRIORITY) / 2);
-    
-    // the value of the time limit is either -1 or the time where it should finish
+    int queueDepthMuliplier = getConf().getInt(
+        "fetcher.queue.depth.multiplier", 50);
+
+    feeder = new QueueFeeder(input, fetchQueues, threadCount
+        * queueDepthMuliplier);
+    // feeder.setPriority((Thread.MAX_PRIORITY + Thread.NORM_PRIORITY) / 2);
+
+    // the value of the time limit is either -1 or the time where it should
+    // finish
     long timelimit = getConf().getLong("fetcher.timelimit", -1);
-    if (timelimit != -1) feeder.setTimeLimit(timelimit);
+    if (timelimit != -1)
+      feeder.setTimeLimit(timelimit);
     feeder.start();
 
     // set non-blocking & no-robots mode for HTTP protocol plugins.
     getConf().setBoolean(Protocol.CHECK_BLOCKING, false);
     getConf().setBoolean(Protocol.CHECK_ROBOTS, false);
-    
-    for (int i = 0; i < threadCount; i++) {       // spawn threads
+
+    for (int i = 0; i < threadCount; i++) { // spawn threads
       new FetcherThread(getConf()).start();
     }
 
     // select a timeout that avoids a task timeout
-    long timeout = getConf().getInt("mapred.task.timeout", 10*60*1000)/timeoutDivisor;
+    long timeout = getConf().getInt("mapred.task.timeout", 10 * 60 * 1000)
+        / timeoutDivisor;
 
-    // Used for threshold check, holds pages and bytes processed in the last second
+    // Used for threshold check, holds pages and bytes processed in the last
+    // second
     int pagesLastSec;
     int bytesLastSec;
 
@@ -1086,26 +1154,35 @@
     boolean throughputThresholdExceeded = false;
     int throughputThresholdNumRetries = 0;
 
-    int throughputThresholdPages = getConf().getInt("fetcher.throughput.threshold.pages", -1);
-    if (LOG.isInfoEnabled()) { LOG.info("Fetcher: throughput threshold: " + throughputThresholdPages); }
-    int throughputThresholdMaxRetries = getConf().getInt("fetcher.throughput.threshold.retries", 5);
-    if (LOG.isInfoEnabled()) { LOG.info("Fetcher: throughput threshold retries: " + throughputThresholdMaxRetries); }
+    int throughputThresholdPages = getConf().getInt(
+        "fetcher.throughput.threshold.pages", -1);
+    if (LOG.isInfoEnabled()) {
+      LOG.info("Fetcher: throughput threshold: " + throughputThresholdPages);
+    }
+    int throughputThresholdMaxRetries = getConf().getInt(
+        "fetcher.throughput.threshold.retries", 5);
+    if (LOG.isInfoEnabled()) {
+      LOG.info("Fetcher: throughput threshold retries: "
+          + throughputThresholdMaxRetries);
+    }
 
-    do {                                          // wait for threads to exit
+    do { // wait for threads to exit
       pagesLastSec = pages.get();
-      bytesLastSec = (int)bytes.get();
+      bytesLastSec = (int) bytes.get();
 
       try {
         Thread.sleep(1000);
-      } catch (InterruptedException e) {}
+      } catch (InterruptedException e) {
+      }
 
       pagesLastSec = pages.get() - pagesLastSec;
-      bytesLastSec = (int)bytes.get() - bytesLastSec;
+      bytesLastSec = (int) bytes.get() - bytesLastSec;
 
       reportStatus(pagesLastSec, bytesLastSec);
 
-      LOG.info("-activeThreads=" + activeThreads + ", spinWaiting=" + spinWaiting.get()
-          + ", fetchQueues.totalSize=" + fetchQueues.getTotalSize());
+      LOG.info("-activeThreads=" + activeThreads + ", spinWaiting="
+          + spinWaiting.get() + ", fetchQueues.totalSize="
+          + fetchQueues.getTotalSize());
 
       if (!feeder.isAlive() && fetchQueues.getTotalSize() < 5) {
         fetchQueues.dump();
@@ -1113,16 +1190,23 @@
 
       // if throughput threshold is enabled
       if (!feeder.isAlive() && throughputThresholdPages != -1) {
-        // Have we reached the threshold of pages/second and threshold was not yet exceeded
-        if (pagesLastSec > throughputThresholdPages && !throughputThresholdExceeded) {
-          LOG.info("Exceding " + Integer.toString(throughputThresholdPages) + " pages/second");
+        // Have we reached the threshold of pages/second and threshold was not
+        // yet exceeded
+        if (pagesLastSec > throughputThresholdPages
+            && !throughputThresholdExceeded) {
+          LOG.info("Exceding " + Integer.toString(throughputThresholdPages)
+              + " pages/second");
           throughputThresholdExceeded = true;
         }
 
         // Check if we're dropping below the threshold
-        if (throughputThresholdExceeded && pagesLastSec < throughputThresholdPages) {
+        if (throughputThresholdExceeded
+            && pagesLastSec < throughputThresholdPages) {
           throughputThresholdNumRetries++;
-          LOG.warn(Integer.toString(throughputThresholdNumRetries) + ": dropping below configured threshold of " + Integer.toString(throughputThresholdPages) + " pages per second");
+          LOG.warn(Integer.toString(throughputThresholdNumRetries)
+              + ": dropping below configured threshold of "
+              + Integer.toString(throughputThresholdPages)
+              + " pages per second");
 
           // Quit if we dropped below threshold too many times
           if (throughputThresholdNumRetries == throughputThresholdMaxRetries) {
@@ -1131,11 +1215,13 @@
             // Disable the threshold checker
             throughputThresholdPages = -1;
 
-            // Empty the queues cleanly and get number of items that were dropped
+            // Empty the queues cleanly and get number of items that were
+            // dropped
             int hitByThrougputThreshold = fetchQueues.emptyQueues();
 
-            if (hitByThrougputThreshold != 0) reporter.incrCounter("FetcherStatus",
-              "hitByThrougputThreshold", hitByThrougputThreshold);
+            if (hitByThrougputThreshold != 0)
+              reporter.incrCounter("FetcherStatus", "hitByThrougputThreshold",
+                  hitByThrougputThreshold);
           }
         }
       }
@@ -1143,25 +1229,25 @@
       // check timelimit
       if (!feeder.isAlive()) {
         int hitByTimeLimit = fetchQueues.checkTimelimit();
-        if (hitByTimeLimit != 0) reporter.incrCounter("FetcherStatus",
-            "hitByTimeLimit", hitByTimeLimit);
+        if (hitByTimeLimit != 0)
+          reporter.incrCounter("FetcherStatus", "hitByTimeLimit",
+              hitByTimeLimit);
       }
-      
+
       // some requests seem to hang, despite all intentions
       if ((System.currentTimeMillis() - lastRequestStart.get()) > timeout) {
         if (LOG.isWarnEnabled()) {
-          LOG.warn("Aborting with "+activeThreads+" hung threads.");
+          LOG.warn("Aborting with " + activeThreads + " hung threads.");
         }
         return;
       }
 
     } while (activeThreads.get() > 0);
     LOG.info("-activeThreads=" + activeThreads);
-    
+
   }
 
-  public void fetch(Path segment, int threads)
-    throws IOException {
+  public void fetch(Path segment, int threads) throws IOException {
 
     checkConfiguration();
 
@@ -1181,7 +1267,7 @@
       LOG.info("Fetcher Timelimit set for : " + timelimit);
       getConf().setLong("fetcher.timelimit", timelimit);
     }
-        
+
     JobConf job = new NutchJob(getConf());
     job.setJobName("fetch " + segment);
 
@@ -1191,7 +1277,8 @@
     // for politeness, don't permit parallel execution of a single task
     job.setSpeculativeExecution(false);
 
-    FileInputFormat.addInputPath(job, new Path(segment, CrawlDatum.GENERATE_DIR_NAME));
+    FileInputFormat.addInputPath(job, new Path(segment,
+        CrawlDatum.GENERATE_DIR_NAME));
     job.setInputFormat(InputFormat.class);
 
     job.setMapRunnerClass(Fetcher.class);
@@ -1204,16 +1291,16 @@
     JobClient.runJob(job);
 
     long end = System.currentTimeMillis();
-    LOG.info("Fetcher: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    LOG.info("Fetcher: finished at " + sdf.format(end) + ", elapsed: "
+        + TimingUtil.elapsedTime(start, end));
   }
 
-
   /** Run the fetcher. */
   public static void main(String[] args) throws Exception {
     int res = ToolRunner.run(NutchConfiguration.create(), new Fetcher(), args);
     System.exit(res);
   }
-  
+
   public int run(String[] args) throws Exception {
 
     String usage = "Usage: Fetcher <segment> [-threads n]";
@@ -1222,15 +1309,15 @@
       System.err.println(usage);
       return -1;
     }
-      
+
     Path segment = new Path(args[0]);
 
     int threads = getConf().getInt("fetcher.threads.fetch", 10);
     boolean parsing = false;
 
-    for (int i = 1; i < args.length; i++) {       // parse command line
-      if (args[i].equals("-threads")) {           // found -threads option
-        threads =  Integer.parseInt(args[++i]);
+    for (int i = 1; i < args.length; i++) { // parse command line
+      if (args[i].equals("-threads")) { // found -threads option
+        threads = Integer.parseInt(args[++i]);
       }
     }
 
Index: src/java/org/apache/nutch/fetcher/FetcherOutput.java
===================================================================
--- src/java/org/apache/nutch/fetcher/FetcherOutput.java	(revision 1188252)
+++ src/java/org/apache/nutch/fetcher/FetcherOutput.java	(working copy)
@@ -30,10 +30,10 @@
   private Content content;
   private ParseImpl parse;
 
-  public FetcherOutput() {}
+  public FetcherOutput() {
+  }
 
-  public FetcherOutput(CrawlDatum crawlDatum, Content content,
-                       ParseImpl parse) {
+  public FetcherOutput(CrawlDatum crawlDatum, Content content, ParseImpl parse) {
     this.crawlDatum = crawlDatum;
     this.content = content;
     this.parse = parse;
@@ -59,22 +59,29 @@
     }
   }
 
-  public CrawlDatum getCrawlDatum() { return crawlDatum; }
-  public Content getContent() { return content; }
-  public ParseImpl getParse() { return parse; }
+  public CrawlDatum getCrawlDatum() {
+    return crawlDatum;
+  }
 
+  public Content getContent() {
+    return content;
+  }
+
+  public ParseImpl getParse() {
+    return parse;
+  }
+
   public boolean equals(Object o) {
     if (!(o instanceof FetcherOutput))
       return false;
-    FetcherOutput other = (FetcherOutput)o;
-    return
-      this.crawlDatum.equals(other.crawlDatum) &&
-      this.content.equals(other.content);
+    FetcherOutput other = (FetcherOutput) o;
+    return this.crawlDatum.equals(other.crawlDatum)
+        && this.content.equals(other.content);
   }
 
   public String toString() {
     StringBuffer buffer = new StringBuffer();
-    buffer.append("CrawlDatum: " + crawlDatum+"\n" );
+    buffer.append("CrawlDatum: " + crawlDatum + "\n");
     return buffer.toString();
   }
 
Index: src/java/org/apache/nutch/fetcher/FetcherOutputFormat.java
===================================================================
--- src/java/org/apache/nutch/fetcher/FetcherOutputFormat.java	(revision 1188252)
+++ src/java/org/apache/nutch/fetcher/FetcherOutputFormat.java	(working copy)
@@ -48,74 +48,68 @@
   public void checkOutputSpecs(FileSystem fs, JobConf job) throws IOException {
     Path out = FileOutputFormat.getOutputPath(job);
     if ((out == null) && (job.getNumReduceTasks() != 0)) {
-    	throw new InvalidJobConfException(
-    			"Output directory not set in JobConf.");
+      throw new InvalidJobConfException("Output directory not set in JobConf.");
     }
     if (fs == null) {
-    	fs = out.getFileSystem(job);
+      fs = out.getFileSystem(job);
     }
     if (fs.exists(new Path(out, CrawlDatum.FETCH_DIR_NAME)))
-    	throw new IOException("Segment already fetched!");
+      throw new IOException("Segment already fetched!");
   }
 
   public RecordWriter<Text, NutchWritable> getRecordWriter(final FileSystem fs,
-                                      final JobConf job,
-                                      final String name,
-                                      final Progressable progress) throws IOException {
+      final JobConf job, final String name, final Progressable progress)
+      throws IOException {
 
     Path out = FileOutputFormat.getOutputPath(job);
-    final Path fetch =
-      new Path(new Path(out, CrawlDatum.FETCH_DIR_NAME), name);
-    final Path content =
-      new Path(new Path(out, Content.DIR_NAME), name);
-    
-    final CompressionType compType = SequenceFileOutputFormat.getOutputCompressionType(job);
+    final Path fetch = new Path(new Path(out, CrawlDatum.FETCH_DIR_NAME), name);
+    final Path content = new Path(new Path(out, Content.DIR_NAME), name);
 
-    final MapFile.Writer fetchOut =
-      new MapFile.Writer(job, fs, fetch.toString(), Text.class, CrawlDatum.class,
-          compType, progress);
-    
+    final CompressionType compType = SequenceFileOutputFormat
+        .getOutputCompressionType(job);
+
+    final MapFile.Writer fetchOut = new MapFile.Writer(job, fs,
+        fetch.toString(), Text.class, CrawlDatum.class, compType, progress);
+
     return new RecordWriter<Text, NutchWritable>() {
-        private MapFile.Writer contentOut;
-        private RecordWriter<Text, Parse> parseOut;
+      private MapFile.Writer contentOut;
+      private RecordWriter<Text, Parse> parseOut;
 
-        {
-          if (Fetcher.isStoringContent(job)) {
-            contentOut = new MapFile.Writer(job, fs, content.toString(),
-                                            Text.class, Content.class,
-                                            compType, progress);
-          }
+      {
+        if (Fetcher.isStoringContent(job)) {
+          contentOut = new MapFile.Writer(job, fs, content.toString(),
+              Text.class, Content.class, compType, progress);
+        }
 
-          if (Fetcher.isParsing(job)) {
-            parseOut = new ParseOutputFormat().getRecordWriter(fs, job, name, progress);
-          }
+        if (Fetcher.isParsing(job)) {
+          parseOut = new ParseOutputFormat().getRecordWriter(fs, job, name,
+              progress);
         }
+      }
 
-        public void write(Text key, NutchWritable value)
-          throws IOException {
+      public void write(Text key, NutchWritable value) throws IOException {
 
-          Writable w = value.get();
-          
-          if (w instanceof CrawlDatum)
-            fetchOut.append(key, w);
-          else if (w instanceof Content)
-            contentOut.append(key, w);
-          else if (w instanceof Parse)
-            parseOut.write(key, (Parse)w);
-        }
+        Writable w = value.get();
 
-        public void close(Reporter reporter) throws IOException {
-          fetchOut.close();
-          if (contentOut != null) {
-            contentOut.close();
-          }
-          if (parseOut != null) {
-            parseOut.close(reporter);
-          }
+        if (w instanceof CrawlDatum)
+          fetchOut.append(key, w);
+        else if (w instanceof Content)
+          contentOut.append(key, w);
+        else if (w instanceof Parse)
+          parseOut.write(key, (Parse) w);
+      }
+
+      public void close(Reporter reporter) throws IOException {
+        fetchOut.close();
+        if (contentOut != null) {
+          contentOut.close();
         }
+        if (parseOut != null) {
+          parseOut.close(reporter);
+        }
+      }
 
-      };
+    };
 
-  }      
+  }
 }
-
Index: src/java/org/apache/nutch/metadata/Metadata.java
===================================================================
--- src/java/org/apache/nutch/metadata/Metadata.java	(revision 1188252)
+++ src/java/org/apache/nutch/metadata/Metadata.java	(working copy)
@@ -27,23 +27,21 @@
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Writable;
 
-
 /**
  * A multi-valued metadata container.
- *
+ * 
  * @author Chris Mattmann
  * @author J&eacute;r&ocirc;me Charron
- *
+ * 
  */
-public class Metadata implements Writable, CreativeCommons,
-DublinCore, HttpHeaders, Nutch, Office, Feed {
+public class Metadata implements Writable, CreativeCommons, DublinCore,
+    HttpHeaders, Nutch, Office, Feed {
 
   /**
    * A map of all metadata attributes.
    */
   private Map<String, String[]> metadata = null;
 
-
   /**
    * Constructs a new, empty metadata.
    */
@@ -53,9 +51,10 @@
 
   /**
    * Returns true if named value is multivalued.
-   * @param name name of metadata
-   * @return true is named value is multivalued, false if single
-   * value or null
+   * 
+   * @param name
+   *          name of metadata
+   * @return true is named value is multivalued, false if single value or null
    */
   public boolean isMultiValued(final String name) {
     return metadata.get(name) != null && metadata.get(name).length > 1;
@@ -63,6 +62,7 @@
 
   /**
    * Returns an array of the names contained in the metadata.
+   * 
    * @return Metadata names
    */
   public String[] names() {
@@ -70,11 +70,11 @@
   }
 
   /**
-   * Get the value associated to a metadata name.
-   * If many values are assiociated to the specified name, then the first
-   * one is returned.
-   *
-   * @param name of the metadata.
+   * Get the value associated to a metadata name. If many values are assiociated
+   * to the specified name, then the first one is returned.
+   * 
+   * @param name
+   *          of the metadata.
    * @return the value associated to the specified metadata name.
    */
   public String get(final String name) {
@@ -88,13 +88,15 @@
 
   /**
    * Get the values associated to a metadata name.
-   * @param name of the metadata.
+   * 
+   * @param name
+   *          of the metadata.
    * @return the values associated to a metadata name.
    */
   public String[] getValues(final String name) {
     return _getValues(name);
   }
-  
+
   private String[] _getValues(final String name) {
     String[] values = metadata.get(name);
     if (values == null) {
@@ -104,12 +106,13 @@
   }
 
   /**
-   * Add a metadata name/value mapping.
-   * Add the specified value to the list of values associated to the
-   * specified metadata name.
-   *
-   * @param name the metadata name.
-   * @param value the metadata value.
+   * Add a metadata name/value mapping. Add the specified value to the list of
+   * values associated to the specified metadata name.
+   * 
+   * @param name
+   *          the metadata name.
+   * @param value
+   *          the metadata value.
    */
   public void add(final String name, final String value) {
     String[] values = metadata.get(name);
@@ -125,31 +128,37 @@
 
   /**
    * Copy All key-value pairs from properties.
-   * @param properties properties to copy from
+   * 
+   * @param properties
+   *          properties to copy from
    */
   public void setAll(Properties properties) {
     Enumeration names = properties.propertyNames();
     while (names.hasMoreElements()) {
       String name = (String) names.nextElement();
-      metadata.put(name, new String[]{properties.getProperty(name)});
+      metadata.put(name, new String[] { properties.getProperty(name) });
     }
   }
 
   /**
-   * Set metadata name/value.
-   * Associate the specified value to the specified metadata name. If some
-   * previous values were associated to this name, they are removed.
-   *
-   * @param name the metadata name.
-   * @param value the metadata value.
+   * Set metadata name/value. Associate the specified value to the specified
+   * metadata name. If some previous values were associated to this name, they
+   * are removed.
+   * 
+   * @param name
+   *          the metadata name.
+   * @param value
+   *          the metadata value.
    */
   public void set(String name, String value) {
-    metadata.put(name, new String[]{value});
+    metadata.put(name, new String[] { value });
   }
 
   /**
    * Remove a metadata and all its associated values.
-   * @param name metadata name to remove
+   * 
+   * @param name
+   *          metadata name to remove
    */
   public void remove(String name) {
     metadata.remove(name);
@@ -157,12 +166,13 @@
 
   /**
    * Returns the number of metadata names in this metadata.
+   * 
    * @return number of metadata names
    */
   public int size() {
     return metadata.size();
   }
-  
+
   /** Remove all mappings from metadata. */
   public void clear() {
     metadata.clear();
@@ -170,7 +180,9 @@
 
   public boolean equals(Object o) {
 
-    if (o == null) { return false; }
+    if (o == null) {
+      return false;
+    }
 
     Metadata other = null;
     try {
@@ -179,7 +191,9 @@
       return false;
     }
 
-    if (other.size() != size()) { return false; }
+    if (other.size() != size()) {
+      return false;
+    }
 
     String[] names = names();
     for (int i = 0; i < names.length; i++) {
@@ -203,10 +217,7 @@
     for (int i = 0; i < names.length; i++) {
       String[] values = _getValues(names[i]);
       for (int j = 0; j < values.length; j++) {
-        buf.append(names[i])
-           .append("=")
-           .append(values[j])
-           .append(" ");
+        buf.append(names[i]).append("=").append(values[j]).append(" ");
       }
     }
     return buf.toString();
Index: src/java/org/apache/nutch/metadata/Nutch.java
===================================================================
--- src/java/org/apache/nutch/metadata/Nutch.java	(revision 1188252)
+++ src/java/org/apache/nutch/metadata/Nutch.java	(working copy)
@@ -18,21 +18,18 @@
 
 import org.apache.hadoop.io.Text;
 
-
 /**
  * A collection of Nutch internal metadata constants.
- *
+ * 
  * @author Chris Mattmann
  * @author J&eacute;r&ocirc;me Charron
  */
 public interface Nutch {
-  
-  public static final String ORIGINAL_CHAR_ENCODING =
-          "OriginalCharEncoding";
-  
-  public static final String CHAR_ENCODING_FOR_CONVERSION =
-          "CharEncodingForConversion";
 
+  public static final String ORIGINAL_CHAR_ENCODING = "OriginalCharEncoding";
+
+  public static final String CHAR_ENCODING_FOR_CONVERSION = "CharEncodingForConversion";
+
   public static final String SIGNATURE_KEY = "nutch.content.digest";
 
   public static final String SEGMENT_NAME_KEY = "nutch.segment.name";
@@ -41,17 +38,22 @@
 
   public static final String GENERATE_TIME_KEY = "_ngt_";
 
-  public static final Text WRITABLE_GENERATE_TIME_KEY = new Text(GENERATE_TIME_KEY);
+  public static final Text WRITABLE_GENERATE_TIME_KEY = new Text(
+      GENERATE_TIME_KEY);
 
   public static final String PROTO_STATUS_KEY = "_pst_";
 
-  public static final Text WRITABLE_PROTO_STATUS_KEY = new Text(PROTO_STATUS_KEY);
-  
+  public static final Text WRITABLE_PROTO_STATUS_KEY = new Text(
+      PROTO_STATUS_KEY);
+
   public static final String FETCH_TIME_KEY = "_ftk_";
-  
+
   public static final String FETCH_STATUS_KEY = "_fst_";
 
-  /** Sites may request that search engines don't provide access to cached documents. */
+  /**
+   * Sites may request that search engines don't provide access to cached
+   * documents.
+   */
   public static final String CACHING_FORBIDDEN_KEY = "caching.forbidden";
 
   /** Show both original forbidden content and summaries (default). */
Index: src/java/org/apache/nutch/metadata/DublinCore.java
===================================================================
--- src/java/org/apache/nutch/metadata/DublinCore.java	(revision 1188252)
+++ src/java/org/apache/nutch/metadata/DublinCore.java	(working copy)
@@ -16,149 +16,146 @@
  */
 package org.apache.nutch.metadata;
 
-
 /**
  * A collection of Dublin Core metadata names.
- *
- * @see <a href="http://dublincore.org">dublincore.org</a> 
- *
+ * 
+ * @see <a href="http://dublincore.org">dublincore.org</a>
+ * 
  * @author Chris Mattmann
  * @author J&eacute;r&ocirc;me Charron
  */
 public interface DublinCore {
-  
-    
+
   /**
-   * Typically, Format may include the media-type or dimensions of the
-   * resource. Format may be used to determine the software, hardware or other
-   * equipment needed to display or operate the resource. Examples of
-   * dimensions include size and duration. Recommended best practice is to
-   * select a value from a controlled vocabulary (for example, the list of
-   * Internet Media Types [MIME] defining computer media formats).
+   * Typically, Format may include the media-type or dimensions of the resource.
+   * Format may be used to determine the software, hardware or other equipment
+   * needed to display or operate the resource. Examples of dimensions include
+   * size and duration. Recommended best practice is to select a value from a
+   * controlled vocabulary (for example, the list of Internet Media Types [MIME]
+   * defining computer media formats).
    */
   public static final String FORMAT = "format";
-  
+
   /**
-   * Recommended best practice is to identify the resource by means of a
-   * string or number conforming to a formal identification system. Example
-   * formal identification systems include the Uniform Resource Identifier
-   * (URI) (including the Uniform Resource Locator (URL)), the Digital Object
+   * Recommended best practice is to identify the resource by means of a string
+   * or number conforming to a formal identification system. Example formal
+   * identification systems include the Uniform Resource Identifier (URI)
+   * (including the Uniform Resource Locator (URL)), the Digital Object
    * Identifier (DOI) and the International Standard Book Number (ISBN).
    */
   public static final String IDENTIFIER = "identifier";
-  
+
   /**
    * Date on which the resource was changed.
    */
   public static final String MODIFIED = "modified";
-  
+
   /**
    * An entity responsible for making contributions to the content of the
-   * resource. Examples of a Contributor include a person, an organisation, or
-   * a service. Typically, the name of a Contributor should be used to
-   * indicate the entity.
+   * resource. Examples of a Contributor include a person, an organisation, or a
+   * service. Typically, the name of a Contributor should be used to indicate
+   * the entity.
    */
   public static final String CONTRIBUTOR = "contributor";
-  
+
   /**
-   * The extent or scope of the content of the resource. Coverage will
-   * typically include spatial location (a place name or geographic
-   * coordinates), temporal period (a period label, date, or date range) or
-   * jurisdiction (such as a named administrative entity). Recommended best
-   * practice is to select a value from a controlled vocabulary (for example,
-   * the Thesaurus of Geographic Names [TGN]) and that, where appropriate,
-   * named places or time periods be used in preference to numeric identifiers
-   * such as sets of coordinates or date ranges.
+   * The extent or scope of the content of the resource. Coverage will typically
+   * include spatial location (a place name or geographic coordinates), temporal
+   * period (a period label, date, or date range) or jurisdiction (such as a
+   * named administrative entity). Recommended best practice is to select a
+   * value from a controlled vocabulary (for example, the Thesaurus of
+   * Geographic Names [TGN]) and that, where appropriate, named places or time
+   * periods be used in preference to numeric identifiers such as sets of
+   * coordinates or date ranges.
    */
   public static final String COVERAGE = "coverage";
-  
+
   /**
    * An entity primarily responsible for making the content of the resource.
    * Examples of a Creator include a person, an organisation, or a service.
    * Typically, the name of a Creator should be used to indicate the entity.
    */
   public static final String CREATOR = "creator";
-  
+
   /**
    * A date associated with an event in the life cycle of the resource.
-   * Typically, Date will be associated with the creation or availability of
-   * the resource. Recommended best practice for encoding the date value is
-   * defined in a profile of ISO 8601 [W3CDTF] and follows the YYYY-MM-DD
-   * format.
+   * Typically, Date will be associated with the creation or availability of the
+   * resource. Recommended best practice for encoding the date value is defined
+   * in a profile of ISO 8601 [W3CDTF] and follows the YYYY-MM-DD format.
    */
   public static final String DATE = "date";
-  
+
   /**
    * An account of the content of the resource. Description may include but is
    * not limited to: an abstract, table of contents, reference to a graphical
    * representation of content or a free-text account of the content.
    */
   public static final String DESCRIPTION = "description";
-  
+
   /**
    * A language of the intellectual content of the resource. Recommended best
    * practice is to use RFC 3066 [RFC3066], which, in conjunction with ISO 639
-   * [ISO639], defines two- and three-letter primary language tags with
-   * optional subtags. Examples include "en" or "eng" for English, "akk" for
-   * Akkadian, and "en-GB" for English used in the United Kingdom.
+   * [ISO639], defines two- and three-letter primary language tags with optional
+   * subtags. Examples include "en" or "eng" for English, "akk" for Akkadian,
+   * and "en-GB" for English used in the United Kingdom.
    */
   public static final String LANGUAGE = "language";
-  
+
   /**
    * An entity responsible for making the resource available. Examples of a
    * Publisher include a person, an organisation, or a service. Typically, the
    * name of a Publisher should be used to indicate the entity.
    */
   public static final String PUBLISHER = "publisher";
-  
+
   /**
    * A reference to a related resource. Recommended best practice is to
    * reference the resource by means of a string or number conforming to a
    * formal identification system.
    */
   public static final String RELATION = "relation";
-  
+
   /**
-   * Information about rights held in and over the resource. Typically, a
-   * Rights element will contain a rights management statement for the
-   * resource, or reference a service providing such information. Rights
-   * information often encompasses Intellectual Property Rights (IPR),
-   * Copyright, and various Property Rights. If the Rights element is absent,
-   * no assumptions can be made about the status of these and other rights
-   * with respect to the resource.
+   * Information about rights held in and over the resource. Typically, a Rights
+   * element will contain a rights management statement for the resource, or
+   * reference a service providing such information. Rights information often
+   * encompasses Intellectual Property Rights (IPR), Copyright, and various
+   * Property Rights. If the Rights element is absent, no assumptions can be
+   * made about the status of these and other rights with respect to the
+   * resource.
    */
   public static final String RIGHTS = "rights";
-  
+
   /**
    * A reference to a resource from which the present resource is derived. The
    * present resource may be derived from the Source resource in whole or in
-   * part. Recommended best practice is to reference the resource by means of
-   * a string or number conforming to a formal identification system.
+   * part. Recommended best practice is to reference the resource by means of a
+   * string or number conforming to a formal identification system.
    */
   public static final String SOURCE = "source";
-  
+
   /**
    * The topic of the content of the resource. Typically, a Subject will be
-   * expressed as keywords, key phrases or classification codes that describe
-   * a topic of the resource. Recommended best practice is to select a value
-   * from a controlled vocabulary or formal classification scheme.
+   * expressed as keywords, key phrases or classification codes that describe a
+   * topic of the resource. Recommended best practice is to select a value from
+   * a controlled vocabulary or formal classification scheme.
    */
   public static final String SUBJECT = "subject";
-  
+
   /**
    * A name given to the resource. Typically, a Title will be a name by which
    * the resource is formally known.
    */
   public static final String TITLE = "title";
-  
+
   /**
    * The nature or genre of the content of the resource. Type includes terms
-   * describing general categories, functions, genres, or aggregation levels
-   * for content. Recommended best practice is to select a value from a
-   * controlled vocabulary (for example, the DCMI Type Vocabulary [DCMITYPE]).
-   * To describe the physical or digital manifestation of the resource, use
-   * the Format element.
+   * describing general categories, functions, genres, or aggregation levels for
+   * content. Recommended best practice is to select a value from a controlled
+   * vocabulary (for example, the DCMI Type Vocabulary [DCMITYPE]). To describe
+   * the physical or digital manifestation of the resource, use the Format
+   * element.
    */
   public static final String TYPE = "type";
-  
+
 }
Index: src/java/org/apache/nutch/metadata/MetaWrapper.java
===================================================================
--- src/java/org/apache/nutch/metadata/MetaWrapper.java	(revision 1188252)
+++ src/java/org/apache/nutch/metadata/MetaWrapper.java	(working copy)
@@ -28,28 +28,29 @@
 /**
  * This is a simple decorator that adds metadata to any Writable-s that can be
  * serialized by <tt>NutchWritable</tt>. This is useful when data needs to be
- * temporarily enriched during processing, but this
- * temporary metadata doesn't need to be permanently stored after the job is done.
+ * temporarily enriched during processing, but this temporary metadata doesn't
+ * need to be permanently stored after the job is done.
  * 
  * @author Andrzej Bialecki
  */
 public class MetaWrapper extends NutchWritable {
   private Metadata metadata;
-  
+
   public MetaWrapper() {
     super();
     metadata = new Metadata();
   }
-  
+
   public MetaWrapper(Writable instance, Configuration conf) {
     super(instance);
     metadata = new Metadata();
     setConf(conf);
   }
-  
+
   public MetaWrapper(Metadata metadata, Writable instance, Configuration conf) {
     super(instance);
-    if (metadata == null) metadata = new Metadata();
+    if (metadata == null)
+      metadata = new Metadata();
     this.metadata = metadata;
     setConf(conf);
   }
@@ -60,43 +61,52 @@
   public Metadata getMetadata() {
     return metadata;
   }
-  
+
   /**
-   * Add metadata. See {@link Metadata#add(String, String)} for more information.
-   * @param name metadata name
-   * @param value metadata value
+   * Add metadata. See {@link Metadata#add(String, String)} for more
+   * information.
+   * 
+   * @param name
+   *          metadata name
+   * @param value
+   *          metadata value
    */
   public void addMeta(String name, String value) {
     metadata.add(name, value);
   }
-  
+
   /**
-   * Set metadata. See {@link Metadata#set(String, String)} for more information.
+   * Set metadata. See {@link Metadata#set(String, String)} for more
+   * information.
+   * 
    * @param name
    * @param value
    */
   public void setMeta(String name, String value) {
     metadata.set(name, value);
   }
-  
+
   /**
    * Get metadata. See {@link Metadata#get(String)} for more information.
+   * 
    * @param name
    * @return metadata value
    */
   public String getMeta(String name) {
     return metadata.get(name);
   }
-  
+
   /**
-   * Get multiple metadata. See {@link Metadata#getValues(String)} for more information.
+   * Get multiple metadata. See {@link Metadata#getValues(String)} for more
+   * information.
+   * 
    * @param name
    * @return multiple values
    */
   public String[] getMetaValues(String name) {
     return metadata.getValues(name);
   }
-  
+
   public void readFields(DataInput in) throws IOException {
     super.readFields(in);
     metadata = new Metadata();
Index: src/java/org/apache/nutch/metadata/SpellCheckedMetadata.java
===================================================================
--- src/java/org/apache/nutch/metadata/SpellCheckedMetadata.java	(revision 1188252)
+++ src/java/org/apache/nutch/metadata/SpellCheckedMetadata.java	(working copy)
@@ -33,7 +33,7 @@
 
   /**
    * Treshold divider.
-   *
+   * 
    * <code>threshold = searched.length() / TRESHOLD_DIVIDER;</code>
    */
   private static final int TRESHOLD_DIVIDER = 3;
@@ -52,7 +52,7 @@
 
     // Uses following array to fill the metanames index and the
     // metanames list.
-    Class[] spellthese = {HttpHeaders.class};
+    Class[] spellthese = { HttpHeaders.class };
 
     for (Class spellCheckedNames : spellthese) {
       for (Field field : spellCheckedNames.getFields()) {
@@ -73,7 +73,7 @@
 
   /**
    * Normalizes String.
-   *
+   * 
    * @param str
    *          the string to normalize
    * @return normalized String
@@ -102,7 +102,7 @@
    * </ul>
    * If no matching with a well-known metadata name is found, then the original
    * name is returned.
-   *
+   * 
    * @param name
    *          Name to normalize
    * @return normalized name
Index: src/java/org/apache/nutch/metadata/HttpHeaders.java
===================================================================
--- src/java/org/apache/nutch/metadata/HttpHeaders.java	(revision 1188252)
+++ src/java/org/apache/nutch/metadata/HttpHeaders.java	(working copy)
@@ -16,34 +16,33 @@
  */
 package org.apache.nutch.metadata;
 
-
 /**
  * A collection of HTTP header names.
- *
- * @see <a href="http://rfc-ref.org/RFC-TEXTS/2616/">Hypertext Transfer
- *      Protocol -- HTTP/1.1 (RFC 2616)</a>
- *
+ * 
+ * @see <a href="http://rfc-ref.org/RFC-TEXTS/2616/">Hypertext Transfer Protocol
+ *      -- HTTP/1.1 (RFC 2616)</a>
+ * 
  * @author Chris Mattmann
  * @author J&eacute;r&ocirc;me Charron
  */
 public interface HttpHeaders {
 
   public final static String CONTENT_ENCODING = "Content-Encoding";
-  
+
   public final static String CONTENT_LANGUAGE = "Content-Language";
 
   public final static String CONTENT_LENGTH = "Content-Length";
-  
+
   public final static String CONTENT_LOCATION = "Content-Location";
-  
+
   public static final String CONTENT_DISPOSITION = "Content-Disposition";
 
   public final static String CONTENT_MD5 = "Content-MD5";
-  
+
   public final static String CONTENT_TYPE = "Content-Type";
-  
+
   public final static String LAST_MODIFIED = "Last-Modified";
-  
+
   public final static String LOCATION = "Location";
 
 }
Index: src/java/org/apache/nutch/metadata/Office.java
===================================================================
--- src/java/org/apache/nutch/metadata/Office.java	(revision 1188252)
+++ src/java/org/apache/nutch/metadata/Office.java	(working copy)
@@ -16,37 +16,36 @@
  */
 package org.apache.nutch.metadata;
 
-
 /**
  * A collection of <i>"Office"</i> documents properties names.
- *
+ * 
  * @author Chris Mattmann
  * @author J&eacute;r&ocirc;me Charron
  */
 public interface Office {
-    
+
   public static final String KEYWORDS = "Keywords";
-  
+
   public static final String COMMENTS = "Comments";
-  
+
   public static final String LAST_AUTHOR = "Last-Author";
-  
+
   public static final String APPLICATION_NAME = "Application-Name";
-  
+
   public static final String CHARACTER_COUNT = "Character Count";
-  
+
   public static final String LAST_PRINTED = "Last-Printed";
-  
+
   public static final String LAST_SAVED = "Last-Save-Date";
-  
+
   public static final String PAGE_COUNT = "Page-Count";
-  
+
   public static final String REVISION_NUMBER = "Revision-Number";
-  
+
   public static final String WORD_COUNT = "Word-Count";
-  
+
   public static final String TEMPLATE = "Template";
-  
+
   public static final String AUTHOR = "Author";
-  
+
 }
Index: src/java/org/apache/nutch/metadata/CreativeCommons.java
===================================================================
--- src/java/org/apache/nutch/metadata/CreativeCommons.java	(revision 1188252)
+++ src/java/org/apache/nutch/metadata/CreativeCommons.java	(working copy)
@@ -16,21 +16,20 @@
  */
 package org.apache.nutch.metadata;
 
-
 /**
  * A collection of Creative Commons properties names.
- *
+ * 
  * @see <a href="http://www.creativecommons.org/">creativecommons.org</a>
- *
+ * 
  * @author Chris Mattmann
  * @author J&eacute;r&ocirc;me Charron
  */
 public interface CreativeCommons {
-  
+
   public final static String LICENSE_URL = "License-Url";
-  
+
   public final static String LICENSE_LOCATION = "License-Location";
-  
+
   public final static String WORK_TYPE = "Work-Type";
-  
+
 }
Index: src/java/org/apache/nutch/tools/proxy/SegmentHandler.java
===================================================================
--- src/java/org/apache/nutch/tools/proxy/SegmentHandler.java	(revision 1188252)
+++ src/java/org/apache/nutch/tools/proxy/SegmentHandler.java	(working copy)
@@ -1,4 +1,5 @@
 package org.apache.nutch.tools.proxy;
+
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -52,42 +53,54 @@
  * XXX should turn this into a plugin?
  */
 public class SegmentHandler extends AbstractTestbedHandler {
-  private static final Logger LOG = LoggerFactory.getLogger(SegmentHandler.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(SegmentHandler.class);
   private Segment seg;
-  
-  private static HashMap<Integer,Integer> protoCodes = new HashMap<Integer,Integer>();
-  
+
+  private static HashMap<Integer, Integer> protoCodes = new HashMap<Integer, Integer>();
+
   static {
-    protoCodes.put(ProtocolStatus.ACCESS_DENIED, HttpServletResponse.SC_UNAUTHORIZED);
-    protoCodes.put(ProtocolStatus.BLOCKED, HttpServletResponse.SC_SERVICE_UNAVAILABLE);
-    protoCodes.put(ProtocolStatus.EXCEPTION, HttpServletResponse.SC_INTERNAL_SERVER_ERROR);
+    protoCodes.put(ProtocolStatus.ACCESS_DENIED,
+        HttpServletResponse.SC_UNAUTHORIZED);
+    protoCodes.put(ProtocolStatus.BLOCKED,
+        HttpServletResponse.SC_SERVICE_UNAVAILABLE);
+    protoCodes.put(ProtocolStatus.EXCEPTION,
+        HttpServletResponse.SC_INTERNAL_SERVER_ERROR);
     protoCodes.put(ProtocolStatus.FAILED, HttpServletResponse.SC_BAD_REQUEST);
     protoCodes.put(ProtocolStatus.GONE, HttpServletResponse.SC_GONE);
-    protoCodes.put(ProtocolStatus.MOVED, HttpServletResponse.SC_MOVED_PERMANENTLY);
-    protoCodes.put(ProtocolStatus.NOTFETCHING, HttpServletResponse.SC_BAD_REQUEST);
+    protoCodes.put(ProtocolStatus.MOVED,
+        HttpServletResponse.SC_MOVED_PERMANENTLY);
+    protoCodes.put(ProtocolStatus.NOTFETCHING,
+        HttpServletResponse.SC_BAD_REQUEST);
     protoCodes.put(ProtocolStatus.NOTFOUND, HttpServletResponse.SC_NOT_FOUND);
-    protoCodes.put(ProtocolStatus.NOTMODIFIED, HttpServletResponse.SC_NOT_MODIFIED);
-    protoCodes.put(ProtocolStatus.PROTO_NOT_FOUND, HttpServletResponse.SC_BAD_REQUEST);
-    protoCodes.put(ProtocolStatus.REDIR_EXCEEDED, HttpServletResponse.SC_BAD_REQUEST);
+    protoCodes.put(ProtocolStatus.NOTMODIFIED,
+        HttpServletResponse.SC_NOT_MODIFIED);
+    protoCodes.put(ProtocolStatus.PROTO_NOT_FOUND,
+        HttpServletResponse.SC_BAD_REQUEST);
+    protoCodes.put(ProtocolStatus.REDIR_EXCEEDED,
+        HttpServletResponse.SC_BAD_REQUEST);
     protoCodes.put(ProtocolStatus.RETRY, HttpServletResponse.SC_BAD_REQUEST);
-    protoCodes.put(ProtocolStatus.ROBOTS_DENIED, HttpServletResponse.SC_FORBIDDEN);
+    protoCodes.put(ProtocolStatus.ROBOTS_DENIED,
+        HttpServletResponse.SC_FORBIDDEN);
     protoCodes.put(ProtocolStatus.SUCCESS, HttpServletResponse.SC_OK);
-    protoCodes.put(ProtocolStatus.TEMP_MOVED, HttpServletResponse.SC_MOVED_TEMPORARILY);
-    protoCodes.put(ProtocolStatus.WOULDBLOCK, HttpServletResponse.SC_BAD_REQUEST);
+    protoCodes.put(ProtocolStatus.TEMP_MOVED,
+        HttpServletResponse.SC_MOVED_TEMPORARILY);
+    protoCodes.put(ProtocolStatus.WOULDBLOCK,
+        HttpServletResponse.SC_BAD_REQUEST);
   }
-  
+
   private static class SegmentPathFilter implements PathFilter {
     public static final SegmentPathFilter INSTANCE = new SegmentPathFilter();
-    
+
     @Override
     public boolean accept(Path p) {
       return p.getName().startsWith("part-");
     }
-    
+
   }
-  
+
   private static class Segment implements Closeable {
-    
+
     private static final Partitioner PARTITIONER = new HashPartitioner();
 
     private FileSystem fs;
@@ -101,7 +114,8 @@
     private MapFile.Reader[] crawl;
     private Configuration conf;
 
-    public Segment(FileSystem fs, Path segmentDir, Configuration conf) throws IOException {
+    public Segment(FileSystem fs, Path segmentDir, Configuration conf)
+        throws IOException {
       this.fs = fs;
       this.segmentDir = segmentDir;
       this.conf = conf;
@@ -112,43 +126,52 @@
         if (crawl == null)
           crawl = getReaders(CrawlDatum.FETCH_DIR_NAME);
       }
-      return (CrawlDatum)getEntry(crawl, url, new CrawlDatum());
+      return (CrawlDatum) getEntry(crawl, url, new CrawlDatum());
     }
-    
+
     public Content getContent(Text url) throws IOException {
       synchronized (cLock) {
         if (content == null)
           content = getReaders(Content.DIR_NAME);
       }
-      return (Content)getEntry(content, url, new Content());
+      return (Content) getEntry(content, url, new Content());
     }
 
     /** Open the output generated by this format. */
     private MapFile.Reader[] getReaders(String subDir) throws IOException {
       Path dir = new Path(segmentDir, subDir);
       FileSystem fs = dir.getFileSystem(conf);
-      Path[] names = FileUtil.stat2Paths(fs.listStatus(dir, SegmentPathFilter.INSTANCE));
+      Path[] names = FileUtil.stat2Paths(fs.listStatus(dir,
+          SegmentPathFilter.INSTANCE));
 
       // sort names, so that hash partitioning works
       Arrays.sort(names);
-      
+
       MapFile.Reader[] parts = new MapFile.Reader[names.length];
       for (int i = 0; i < names.length; i++) {
         parts[i] = new MapFile.Reader(fs, names[i].toString(), conf);
       }
       return parts;
     }
-    
-    private Writable getEntry(MapFile.Reader[] readers, Text url,
-                              Writable entry) throws IOException {
+
+    private Writable getEntry(MapFile.Reader[] readers, Text url, Writable entry)
+        throws IOException {
       return MapFileOutputFormat.getEntry(readers, PARTITIONER, url, entry);
     }
 
     public void close() throws IOException {
-      if (content != null) { closeReaders(content); }
-      if (parseText != null) { closeReaders(parseText); }
-      if (parseData != null) { closeReaders(parseData); }
-      if (crawl != null) { closeReaders(crawl); }
+      if (content != null) {
+        closeReaders(content);
+      }
+      if (parseText != null) {
+        closeReaders(parseText);
+      }
+      if (parseData != null) {
+        closeReaders(parseData);
+      }
+      if (crawl != null) {
+        closeReaders(crawl);
+      }
     }
 
     private void closeReaders(MapFile.Reader[] readers) throws IOException {
@@ -158,14 +181,14 @@
     }
 
   }
-  
+
   public SegmentHandler(Configuration conf, Path name) throws Exception {
     seg = new Segment(FileSystem.get(conf), name, conf);
   }
 
   @Override
   public void handle(Request req, HttpServletResponse res, String target,
-          int dispatch) throws IOException, ServletException {
+      int dispatch) throws IOException, ServletException {
     try {
       String uri = req.getUri().toString();
       LOG.info("URI: " + uri);
@@ -175,17 +198,18 @@
       if (cd != null) {
         addMyHeader(res, "Res", "found");
         LOG.info("-got " + cd.toString());
-        ProtocolStatus ps = (ProtocolStatus)cd.getMetaData().get(Nutch.WRITABLE_PROTO_STATUS_KEY);
+        ProtocolStatus ps = (ProtocolStatus) cd.getMetaData().get(
+            Nutch.WRITABLE_PROTO_STATUS_KEY);
         if (ps != null) {
           Integer TrCode = protoCodes.get(ps.getCode());
           if (TrCode != null) {
-            res.setStatus(TrCode.intValue());            
+            res.setStatus(TrCode.intValue());
           } else {
             res.setStatus(HttpServletResponse.SC_OK);
           }
           addMyHeader(res, "ProtocolStatus", ps.toString());
         } else {
-          res.setStatus(HttpServletResponse.SC_OK);          
+          res.setStatus(HttpServletResponse.SC_OK);
         }
         Content c = seg.getContent(url);
         if (c == null) { // missing content
Index: src/java/org/apache/nutch/tools/proxy/FakeHandler.java
===================================================================
--- src/java/org/apache/nutch/tools/proxy/FakeHandler.java	(revision 1188252)
+++ src/java/org/apache/nutch/tools/proxy/FakeHandler.java	(working copy)
@@ -1,4 +1,5 @@
 package org.apache.nutch.tools.proxy;
+
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -29,22 +30,20 @@
 public class FakeHandler extends AbstractTestbedHandler {
   Random r = new Random(1234567890L); // predictable
 
-  private static final String testA = 
-    "<html><body><h1>Internet Weather Forecast Accuracy</h1>\n" + 
-    "<p>Weather forecasting is a secure and popular online presence, which is understandable. The weather affects most everyone's life, and the Internet can provide information on just about any location at any hour of the day or night. But how accurate is this information? How much can we trust it? Perhaps it is just my skeptical nature (or maybe the seeming unpredictability of nature), but I've never put much weight into weather forecasts - especially those made more than three days in advance. That skepticism progressed to a new high in the Summer of 2004, but I have only now done the research necessary to test the accuracy of online weather forecasts. First the story, then the data.</p>" +
-    "<h2>An Internet Weather Forecast Gone Terribly Awry</h2>" +
-    "<p>It was the Summer of 2004 and my wife and I were gearing up for a trip with another couple to Schlitterbahn in New Braunfels - one of the (if not the) best waterparks ever created. As a matter of course when embarking on a 2.5-hour drive to spend the day in a swimsuit, and given the tendency of the area for natural disasters, we checked the weather. The temperatures looked ideal and, most importantly, the chance of rain was a nice round goose egg.</p>";
-  private static final String testB =
-    "<p>A couple of hours into our Schlitterbahn experience, we got on a bus to leave the 'old section' for the 'new section.' Along the way, clouds gathered and multiple claps of thunder sounded. 'So much for the 0% chance of rain,' I commented. By the time we got to our destination, lightning sightings had led to the slides and pools being evacuated and soon the rain began coming down in torrents - accompanied by voluminous lightning flashes. After at least a half an hour the downpour had subsided, but the lightning showed no sign of letting up, so we began heading back to our vehicles. A hundred yards into the parking lot, we passing a tree that had apparently been split in two during the storm (whether by lightning or wind, I'm not sure). Not but a few yards later, there was a distinct thud and the husband of the couple accompanying us cried out as a near racquetball sized hunk of ice rebounded off of his head and onto the concrete. Soon, similarly sized hail was falling all around us as everyone scampered for cover. Some cowered under overturned trashcans while others were more fortunate and made it indoors.</p>" +
-    "<p>The hail, rain and lightning eventually subsided, but the most alarming news was waiting on cell phone voicemail. A friend who lived in the area had called frantically, knowing we were at the park, as the local news was reporting multiple people had been by struck by lightning at Schlitterbahn during the storm.</p>" +
-    "<p>'So much for the 0% chance of rain,' I repeated.</p></body></html>";
+  private static final String testA = "<html><body><h1>Internet Weather Forecast Accuracy</h1>\n"
+      + "<p>Weather forecasting is a secure and popular online presence, which is understandable. The weather affects most everyone's life, and the Internet can provide information on just about any location at any hour of the day or night. But how accurate is this information? How much can we trust it? Perhaps it is just my skeptical nature (or maybe the seeming unpredictability of nature), but I've never put much weight into weather forecasts - especially those made more than three days in advance. That skepticism progressed to a new high in the Summer of 2004, but I have only now done the research necessary to test the accuracy of online weather forecasts. First the story, then the data.</p>"
+      + "<h2>An Internet Weather Forecast Gone Terribly Awry</h2>"
+      + "<p>It was the Summer of 2004 and my wife and I were gearing up for a trip with another couple to Schlitterbahn in New Braunfels - one of the (if not the) best waterparks ever created. As a matter of course when embarking on a 2.5-hour drive to spend the day in a swimsuit, and given the tendency of the area for natural disasters, we checked the weather. The temperatures looked ideal and, most importantly, the chance of rain was a nice round goose egg.</p>";
+  private static final String testB = "<p>A couple of hours into our Schlitterbahn experience, we got on a bus to leave the 'old section' for the 'new section.' Along the way, clouds gathered and multiple claps of thunder sounded. 'So much for the 0% chance of rain,' I commented. By the time we got to our destination, lightning sightings had led to the slides and pools being evacuated and soon the rain began coming down in torrents - accompanied by voluminous lightning flashes. After at least a half an hour the downpour had subsided, but the lightning showed no sign of letting up, so we began heading back to our vehicles. A hundred yards into the parking lot, we passing a tree that had apparently been split in two during the storm (whether by lightning or wind, I'm not sure). Not but a few yards later, there was a distinct thud and the husband of the couple accompanying us cried out as a near racquetball sized hunk of ice rebounded off of his head and onto the concrete. Soon, similarly sized hail was falling all around us as everyone scampered for cover. Some cowered under overturned trashcans while others were more fortunate and made it indoors.</p>"
+      + "<p>The hail, rain and lightning eventually subsided, but the most alarming news was waiting on cell phone voicemail. A friend who lived in the area had called frantically, knowing we were at the park, as the local news was reporting multiple people had been by struck by lightning at Schlitterbahn during the storm.</p>"
+      + "<p>'So much for the 0% chance of rain,' I repeated.</p></body></html>";
 
   @Override
-  public void handle(Request req, HttpServletResponse res, String target, 
-          int dispatch) throws IOException, ServletException {
+  public void handle(Request req, HttpServletResponse res, String target,
+      int dispatch) throws IOException, ServletException {
     HttpURI u = req.getUri();
     String uri = u.toString();
-    //System.err.println("-faking " + uri.toString());
+    // System.err.println("-faking " + uri.toString());
     addMyHeader(res, "URI", uri);
     // don't pass it down the chain
     req.setHandled(true);
@@ -68,8 +67,10 @@
         base = u.getPath();
       }
       String prefix = u.getScheme() + "://" + u.getHost();
-      if (u.getPort() != 80 && u.getPort() != -1) base += ":" + u.getPort();
-      if (!base.startsWith("/")) prefix += "/";
+      if (u.getPort() != 80 && u.getPort() != -1)
+        base += ":" + u.getPort();
+      if (!base.startsWith("/"))
+        prefix += "/";
       prefix = prefix + base;
       for (int i = 0; i < 10; i++) {
         String link = "<p><a href='" + prefix;
@@ -82,18 +83,20 @@
       // fake a few links to random nonexistent hosts
       for (int i = 0; i < 5; i++) {
         int h = r.nextInt(1000000); // 1 mln hosts
-        String link = "<p><a href='http://www.fake-" + h + ".com/'>fake host " + h + "</a></p>\r\n";
+        String link = "<p><a href='http://www.fake-" + h + ".com/'>fake host "
+            + h + "</a></p>\r\n";
         os.write(link.getBytes());
       }
       // fake a link to the root URL
       String link = "<p><a href='" + u.getScheme() + "://" + u.getHost();
-      if (u.getPort() != 80 && u.getPort() != -1) link += ":" + u.getPort();
+      if (u.getPort() != 80 && u.getPort() != -1)
+        link += ":" + u.getPort();
       link += "/'>site " + u.getHost() + "</a></p>\r\n";
       os.write(link.getBytes());
       os.write(testB.getBytes());
       res.flushBuffer();
     } catch (IOException ioe) {
-    }    
+    }
   }
 
 }
Index: src/java/org/apache/nutch/tools/proxy/LogDebugHandler.java
===================================================================
--- src/java/org/apache/nutch/tools/proxy/LogDebugHandler.java	(revision 1188252)
+++ src/java/org/apache/nutch/tools/proxy/LogDebugHandler.java	(working copy)
@@ -1,4 +1,5 @@
 package org.apache.nutch.tools.proxy;
+
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -31,29 +32,33 @@
 import org.mortbay.jetty.Request;
 
 public class LogDebugHandler extends AbstractTestbedHandler implements Filter {
-  private static final Logger LOG = LoggerFactory.getLogger(LogDebugHandler.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(LogDebugHandler.class);
 
   @Override
   public void handle(Request req, HttpServletResponse res, String target,
-          int dispatch) throws IOException, ServletException {
-    LOG.info("-- " + req.getMethod() + " " + req.getUri().toString() + "\n" + req.getConnection().getRequestFields());
+      int dispatch) throws IOException, ServletException {
+    LOG.info("-- " + req.getMethod() + " " + req.getUri().toString() + "\n"
+        + req.getConnection().getRequestFields());
   }
 
   @Override
   public void doFilter(ServletRequest req, ServletResponse res,
-          FilterChain chain) throws IOException, ServletException {
-    ((HttpServletResponse)res).addHeader("X-Handled-By", "AsyncProxyHandler");
-    ((HttpServletResponse)res).addHeader("X-TestbedHandlers", "AsyncProxyHandler");
+      FilterChain chain) throws IOException, ServletException {
+    ((HttpServletResponse) res).addHeader("X-Handled-By", "AsyncProxyHandler");
+    ((HttpServletResponse) res).addHeader("X-TestbedHandlers",
+        "AsyncProxyHandler");
     try {
       chain.doFilter(req, res);
     } catch (Throwable e) {
-      ((HttpServletResponse)res).sendError(HttpServletResponse.SC_BAD_REQUEST, e.toString());
+      ((HttpServletResponse) res).sendError(HttpServletResponse.SC_BAD_REQUEST,
+          e.toString());
     }
   }
 
   @Override
   public void init(FilterConfig arg0) throws ServletException {
     // TODO Auto-generated method stub
-    
+
   }
 }
Index: src/java/org/apache/nutch/tools/proxy/NotFoundHandler.java
===================================================================
--- src/java/org/apache/nutch/tools/proxy/NotFoundHandler.java	(revision 1188252)
+++ src/java/org/apache/nutch/tools/proxy/NotFoundHandler.java	(working copy)
@@ -1,4 +1,5 @@
 package org.apache.nutch.tools.proxy;
+
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -27,13 +28,13 @@
 
   @Override
   public void handle(Request req, HttpServletResponse res, String target,
-          int dispatch) throws IOException, ServletException {
+      int dispatch) throws IOException, ServletException {
     // don't pass it down the chain
     req.setHandled(true);
     res.addHeader("X-Handled-By", getClass().getSimpleName());
     addMyHeader(res, "URI", req.getUri().toString());
-    res.sendError(HttpServletResponse.SC_NOT_FOUND, "Not found: " +
-            req.getUri().toString());
+    res.sendError(HttpServletResponse.SC_NOT_FOUND, "Not found: "
+        + req.getUri().toString());
   }
 
 }
Index: src/java/org/apache/nutch/tools/proxy/AbstractTestbedHandler.java
===================================================================
--- src/java/org/apache/nutch/tools/proxy/AbstractTestbedHandler.java	(revision 1188252)
+++ src/java/org/apache/nutch/tools/proxy/AbstractTestbedHandler.java	(working copy)
@@ -1,4 +1,5 @@
 package org.apache.nutch.tools.proxy;
+
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -30,16 +31,17 @@
 
   @Override
   public void handle(String target, HttpServletRequest req,
-          HttpServletResponse res, int dispatch) throws IOException,
-          ServletException {
-    Request base_request = (req instanceof Request) ? (Request)req : HttpConnection.getCurrentConnection().getRequest();
+      HttpServletResponse res, int dispatch) throws IOException,
+      ServletException {
+    Request base_request = (req instanceof Request) ? (Request) req
+        : HttpConnection.getCurrentConnection().getRequest();
     res.addHeader("X-TestbedHandlers", this.getClass().getSimpleName());
     handle(base_request, res, target, dispatch);
   }
-  
-  public abstract void handle(Request req, HttpServletResponse res, String target,
-          int dispatch) throws IOException, ServletException;
-  
+
+  public abstract void handle(Request req, HttpServletResponse res,
+      String target, int dispatch) throws IOException, ServletException;
+
   public void addMyHeader(HttpServletResponse res, String name, String value) {
     name = "X-" + this.getClass().getSimpleName() + "-" + name;
     res.addHeader(name, value);
Index: src/java/org/apache/nutch/tools/proxy/DelayHandler.java
===================================================================
--- src/java/org/apache/nutch/tools/proxy/DelayHandler.java	(revision 1188252)
+++ src/java/org/apache/nutch/tools/proxy/DelayHandler.java	(working copy)
@@ -1,4 +1,5 @@
 package org.apache.nutch.tools.proxy;
+
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -25,13 +26,13 @@
 import org.mortbay.jetty.Request;
 
 public class DelayHandler extends AbstractTestbedHandler {
-  
+
   public static final long DEFAULT_DELAY = 2000;
-  
+
   private int delay;
   private boolean random;
   private Random r;
-  
+
   public DelayHandler(int delay) {
     if (delay < 0) {
       delay = -delay;
@@ -43,13 +44,13 @@
 
   @Override
   public void handle(Request req, HttpServletResponse res, String target,
-          int dispatch) throws IOException, ServletException {
+      int dispatch) throws IOException, ServletException {
     try {
       int del = random ? r.nextInt(delay) : delay;
       Thread.sleep(del);
       addMyHeader(res, "Delay", String.valueOf(del));
     } catch (Exception e) {
-      
+
     }
   }
 }
Index: src/java/org/apache/nutch/tools/proxy/TestbedProxy.java
===================================================================
--- src/java/org/apache/nutch/tools/proxy/TestbedProxy.java	(revision 1188252)
+++ src/java/org/apache/nutch/tools/proxy/TestbedProxy.java	(working copy)
@@ -1,4 +1,5 @@
 package org.apache.nutch.tools.proxy;
+
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -44,17 +45,25 @@
    */
   public static void main(String[] args) throws Exception {
     if (args.length == 0) {
-      System.err.println("TestbedProxy [-seg <segment_name> | -segdir <segments>] [-port <nnn>] [-forward] [-fake] [-delay nnn] [-debug]");
-      System.err.println("-seg <segment_name>\tpath to a single segment (can be specified multiple times)");
-      System.err.println("-segdir <segments>\tpath to a parent directory of multiple segments (as above)");
-      System.err.println("-port <nnn>\trun the proxy on port <nnn> (special permissions may be needed for ports < 1024)");
-      System.err.println("-forward\tif specified, requests to all unknown urls will be passed to");
-      System.err.println("\t\toriginal servers. If false (default) unknown urls generate 404 Not Found.");
-      System.err.println("-delay\tdelay every response by nnn seconds. If delay is negative use a random value up to nnn");
-      System.err.println("-fake\tif specified, requests to all unknown urls will succeed with fake content");
+      System.err
+          .println("TestbedProxy [-seg <segment_name> | -segdir <segments>] [-port <nnn>] [-forward] [-fake] [-delay nnn] [-debug]");
+      System.err
+          .println("-seg <segment_name>\tpath to a single segment (can be specified multiple times)");
+      System.err
+          .println("-segdir <segments>\tpath to a parent directory of multiple segments (as above)");
+      System.err
+          .println("-port <nnn>\trun the proxy on port <nnn> (special permissions may be needed for ports < 1024)");
+      System.err
+          .println("-forward\tif specified, requests to all unknown urls will be passed to");
+      System.err
+          .println("\t\toriginal servers. If false (default) unknown urls generate 404 Not Found.");
+      System.err
+          .println("-delay\tdelay every response by nnn seconds. If delay is negative use a random value up to nnn");
+      System.err
+          .println("-fake\tif specified, requests to all unknown urls will succeed with fake content");
       System.exit(-1);
     }
-    
+
     Configuration conf = NutchConfiguration.create();
     int port = conf.getInt("segment.proxy.port", 8181);
     boolean forward = false;
@@ -62,7 +71,7 @@
     boolean delay = false;
     boolean debug = false;
     int delayVal = 0;
-    
+
     HashSet<Path> segs = new HashSet<Path>();
     for (int i = 0; i < args.length; i++) {
       if (args[i].equals("-segdir")) {
@@ -88,28 +97,30 @@
         System.exit(-1);
       }
     }
-    
+
     // Create the server
     Server server = new Server();
     SocketConnector connector = new SocketConnector();
     connector.setPort(port);
     connector.setResolveNames(false);
     server.addConnector(connector);
-    
+
     // create a list of handlers
     HandlerList list = new HandlerList();
     server.addHandler(list);
-    
+
     if (debug) {
       LOG.info("* Added debug handler.");
       list.addHandler(new LogDebugHandler());
     }
- 
+
     if (delay) {
-      LOG.info("* Added delay handler: " + (delayVal < 0 ? "random delay up to " + (-delayVal) : "constant delay of " + delayVal));
+      LOG.info("* Added delay handler: "
+          + (delayVal < 0 ? "random delay up to " + (-delayVal)
+              : "constant delay of " + delayVal));
       list.addHandler(new DelayHandler(delayVal));
     }
-    
+
     // XXX alternatively, we can add the DispatchHandler as the first one,
     // XXX to activate handler plugins and redirect requests to appropriate
     // XXX handlers ... Here we always load these handlers
@@ -122,7 +133,8 @@
         list.addHandler(segment);
         LOG.info("* Added segment handler for: " + p);
       } catch (Exception e) {
-        LOG.warn("Skipping segment '" + p + "': " + StringUtils.stringifyException(e));
+        LOG.warn("Skipping segment '" + p + "': "
+            + StringUtils.stringifyException(e));
       }
     }
     if (forward) {
Index: src/java/org/apache/nutch/tools/arc/ArcRecordReader.java
===================================================================
--- src/java/org/apache/nutch/tools/arc/ArcRecordReader.java	(revision 1188252)
+++ src/java/org/apache/nutch/tools/arc/ArcRecordReader.java	(working copy)
@@ -34,23 +34,29 @@
 import org.apache.hadoop.util.StringUtils;
 
 /**
- * <p>The <code>ArchRecordReader</code> class provides a record reader which 
- * reads records from arc files.</p>
+ * <p>
+ * The <code>ArchRecordReader</code> class provides a record reader which reads
+ * records from arc files.
+ * </p>
  * 
- * <p>Arc files are essentially tars of gzips.  Each record in an arc file is
- * a compressed gzip.  Multiple records are concatenated together to form a
- * complete arc.  For more information on the arc file format see
- * {@link http://www.archive.org/web/researcher/ArcFileFormat.php}.</p>
+ * <p>
+ * Arc files are essentially tars of gzips. Each record in an arc file is a
+ * compressed gzip. Multiple records are concatenated together to form a
+ * complete arc. For more information on the arc file format see {@link http
+ * ://www.archive.org/web/researcher/ArcFileFormat.php}.
+ * </p>
  * 
- * <p>Arc files are used by the internet archive and grub projects.</p>
+ * <p>
+ * Arc files are used by the internet archive and grub projects.
+ * </p>
  * 
  * @see http://www.archive.org/
  * @see http://www.grub.org/
  */
-public class ArcRecordReader
-  implements RecordReader<Text, BytesWritable> {
+public class ArcRecordReader implements RecordReader<Text, BytesWritable> {
 
-  public static final Logger LOG = LoggerFactory.getLogger(ArcRecordReader.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(ArcRecordReader.class);
 
   protected Configuration conf;
   protected long splitStart = 0;
@@ -60,30 +66,32 @@
   protected long fileLen = 0;
   protected FSDataInputStream in;
 
-  private static byte[] MAGIC = {(byte)0x1F, (byte)0x8B};
+  private static byte[] MAGIC = { (byte) 0x1F, (byte) 0x8B };
 
   /**
-   * <p>Returns true if the byte array passed matches the gzip header magic 
-   * number.</p>
+   * <p>
+   * Returns true if the byte array passed matches the gzip header magic number.
+   * </p>
    * 
-   * @param input The byte array to check.
+   * @param input
+   *          The byte array to check.
    * 
    * @return True if the byte array matches the gzip header magic number.
    */
   public static boolean isMagic(byte[] input) {
 
-	// check for null and incorrect length
+    // check for null and incorrect length
     if (input == null || input.length != MAGIC.length) {
       return false;
     }
-    
+
     // check byte by byte
     for (int i = 0; i < MAGIC.length; i++) {
       if (MAGIC[i] != input[i]) {
         return false;
       }
     }
-    
+
     // must match
     return true;
   }
@@ -91,13 +99,16 @@
   /**
    * Constructor that sets the configuration and file split.
    * 
-   * @param conf The job configuration.
-   * @param split The file split to read from.
+   * @param conf
+   *          The job configuration.
+   * @param split
+   *          The file split to read from.
    * 
-   * @throws IOException  If an IO error occurs while initializing file split.
+   * @throws IOException
+   *           If an IO error occurs while initializing file split.
    */
   public ArcRecordReader(Configuration conf, FileSplit split)
-    throws IOException {
+      throws IOException {
 
     Path path = split.getPath();
     FileSystem fs = path.getFileSystem(conf);
@@ -113,8 +124,7 @@
   /**
    * Closes the record reader resources.
    */
-  public void close()
-    throws IOException {
+  public void close() throws IOException {
     this.in.close();
   }
 
@@ -122,14 +132,15 @@
    * Creates a new instance of the <code>Text</code> object for the key.
    */
   public Text createKey() {
-    return (Text)ReflectionUtils.newInstance(Text.class, conf);
+    return (Text) ReflectionUtils.newInstance(Text.class, conf);
   }
 
   /**
    * Creates a new instance of the <code>BytesWritable</code> object for the key
    */
   public BytesWritable createValue() {
-    return (BytesWritable)ReflectionUtils.newInstance(BytesWritable.class, conf);
+    return (BytesWritable) ReflectionUtils.newInstance(BytesWritable.class,
+        conf);
   }
 
   /**
@@ -137,63 +148,64 @@
    * 
    * @return The long of the current position in the file.
    */
-  public long getPos()
-    throws IOException {
+  public long getPos() throws IOException {
     return in.getPos();
   }
 
   /**
-   * Returns the percentage of progress in processing the file.  This will be
+   * Returns the percentage of progress in processing the file. This will be
    * represented as a float from 0 to 1 with 1 being 100% completed.
    * 
    * @return The percentage of progress as a float from 0 to 1.
    */
-  public float getProgress()
-    throws IOException {
-	  
+  public float getProgress() throws IOException {
+
     // if we haven't even started
     if (splitEnd == splitStart) {
       return 0.0f;
+    } else {
+      // the progress is current pos - where we started / length of the split
+      return Math.min(1.0f, (getPos() - splitStart) / (float) splitLen);
     }
-    else {
-      // the progress is current pos - where we started  / length of the split
-      return Math.min(1.0f, (getPos() - splitStart) / (float)splitLen);
-    }
   }
 
   /**
-   * <p>Returns true if the next record in the split is read into the key and 
-   * value pair.  The key will be the arc record header and the values will be
-   * the raw content bytes of the arc record.</p>
+   * <p>
+   * Returns true if the next record in the split is read into the key and value
+   * pair. The key will be the arc record header and the values will be the raw
+   * content bytes of the arc record.
+   * </p>
    * 
-   * @param key The record key
-   * @param value The record value
+   * @param key
+   *          The record key
+   * @param value
+   *          The record value
    * 
    * @return True if the next record is read.
    * 
-   * @throws IOException If an error occurs while reading the record value.
+   * @throws IOException
+   *           If an error occurs while reading the record value.
    */
-  public boolean next(Text key, BytesWritable value)
-    throws IOException {
+  public boolean next(Text key, BytesWritable value) throws IOException {
 
     try {
-      
+
       // get the starting position on the input stream
       long startRead = in.getPos();
       byte[] magicBuffer = null;
-      
+
       // we need this loop to handle false positives in reading of gzip records
       while (true) {
-        
+
         // while we haven't passed the end of the split
         if (startRead >= splitEnd) {
           return false;
         }
-        
+
         // scanning for the gzip header
         boolean foundStart = false;
         while (!foundStart) {
-          
+
           // start at the current file position and scan for 1K at time, break
           // if there is no more to read
           startRead = in.getPos();
@@ -202,13 +214,13 @@
           if (read < 0) {
             break;
           }
-          
-          // scan the byte array for the gzip header magic number.  This happens
+
+          // scan the byte array for the gzip header magic number. This happens
           // byte by byte
           for (int i = 0; i < read - 1; i++) {
             byte[] testMagic = new byte[2];
-            System.arraycopy(magicBuffer, i, testMagic, 0, 2);            
-            if (isMagic(testMagic)) {              
+            System.arraycopy(magicBuffer, i, testMagic, 0, 2);
+            if (isMagic(testMagic)) {
               // set the next start to the current gzip header
               startRead += i;
               foundStart = true;
@@ -216,14 +228,14 @@
             }
           }
         }
-        
+
         // seek to the start of the gzip header
         in.seek(startRead);
         ByteArrayOutputStream baos = null;
         int totalRead = 0;
 
         try {
-          
+
           // read 4K of the gzip at a time putting into a byte array
           byte[] buffer = new byte[4096];
           GZIPInputStream zin = new GZIPInputStream(in);
@@ -233,9 +245,8 @@
             baos.write(buffer, 0, gzipRead);
             totalRead += gzipRead;
           }
-        }
-        catch (Exception e) {
-          
+        } catch (Exception e) {
+
           // there are times we get false positives where the gzip header exists
           // but it is not an actual gzip record, so we ignore it and start
           // over seeking
@@ -248,7 +259,7 @@
 
         // change the output stream to a byte array
         byte[] content = baos.toByteArray();
-        
+
         // the first line of the raw content in arc files is the header
         int eol = 0;
         for (int i = 0; i < content.length; i++) {
@@ -257,34 +268,33 @@
             break;
           }
         }
-        
+
         // create the header and the raw content minus the header
         String header = new String(content, 0, eol).trim();
         byte[] raw = new byte[(content.length - eol) - 1];
         System.arraycopy(content, eol + 1, raw, 0, raw.length);
-        
+
         // populate key and values with the header and raw content.
-        Text keyText = (Text)key;
+        Text keyText = (Text) key;
         keyText.set(header);
-        BytesWritable valueBytes = (BytesWritable)value;
+        BytesWritable valueBytes = (BytesWritable) value;
         valueBytes.set(raw, 0, raw.length);
 
-        // TODO: It would be best to start at the end of the gzip read but 
-        // the bytes read in gzip don't match raw bytes in the file so we 
-        // overshoot the next header.  With this current method you get
+        // TODO: It would be best to start at the end of the gzip read but
+        // the bytes read in gzip don't match raw bytes in the file so we
+        // overshoot the next header. With this current method you get
         // some false positives but don't miss records.
         if (startRead + 1 < fileLen) {
           in.seek(startRead + 1);
         }
-        
+
         // populated the record, now return
         return true;
       }
+    } catch (Exception e) {
+      LOG.equals(StringUtils.stringifyException(e));
     }
-    catch (Exception e) {
-      LOG.equals(StringUtils.stringifyException(e));      
-    }
-    
+
     // couldn't populate the record or there is no next record to read
     return false;
   }
Index: src/java/org/apache/nutch/tools/arc/ArcSegmentCreator.java
===================================================================
--- src/java/org/apache/nutch/tools/arc/ArcSegmentCreator.java	(revision 1188252)
+++ src/java/org/apache/nutch/tools/arc/ArcSegmentCreator.java	(working copy)
@@ -62,18 +62,22 @@
 import org.apache.nutch.util.TimingUtil;
 
 /**
- * <p>The <code>ArcSegmentCreator</code> is a replacement for fetcher that will
- * take arc files as input and produce a nutch segment as output.</p>
+ * <p>
+ * The <code>ArcSegmentCreator</code> is a replacement for fetcher that will
+ * take arc files as input and produce a nutch segment as output.
+ * </p>
  * 
- * <p>Arc files are tars of compressed gzips which are produced by both the
- * internet archive project and the grub distributed crawler project.</p>
+ * <p>
+ * Arc files are tars of compressed gzips which are produced by both the
+ * internet archive project and the grub distributed crawler project.
+ * </p>
  * 
  */
-public class ArcSegmentCreator
-  extends Configured
-  implements Tool, Mapper<Text, BytesWritable, Text, NutchWritable> {
+public class ArcSegmentCreator extends Configured implements Tool,
+    Mapper<Text, BytesWritable, Text, NutchWritable> {
 
-  public static final Logger LOG = LoggerFactory.getLogger(ArcSegmentCreator.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(ArcSegmentCreator.class);
   public static final String URL_VERSION = "arc.url.version";
   private JobConf jobConf;
   private URLFilters urlFilters;
@@ -89,7 +93,9 @@
   }
 
   /**
-   * <p>Constructor that sets the job configuration.</p>
+   * <p>
+   * Constructor that sets the job configuration.
+   * </p>
    * 
    * @param conf
    */
@@ -105,17 +111,19 @@
   public static synchronized String generateSegmentName() {
     try {
       Thread.sleep(1000);
+    } catch (Throwable t) {
     }
-    catch (Throwable t) {
-    }
     return sdf.format(new Date(System.currentTimeMillis()));
   }
 
   /**
-   * <p>Configures the job.  Sets the url filters, scoring filters, url normalizers
-   * and other relevant data.</p>
+   * <p>
+   * Configures the job. Sets the url filters, scoring filters, url normalizers
+   * and other relevant data.
+   * </p>
    * 
-   * @param job The job configuration.
+   * @param job
+   *          The job configuration.
    */
   public void configure(JobConf job) {
 
@@ -133,23 +141,31 @@
   }
 
   /**
-   * <p>Parses the raw content of a single record to create output.  This method
-   * is almost the same as the {@link org.apache.nutch.Fetcher#output} method in
-   * terms of processing and output.  
+   * <p>
+   * Parses the raw content of a single record to create output. This method is
+   * almost the same as the {@link org.apache.nutch.Fetcher#output} method in
+   * terms of processing and output.
    * 
-   * @param output  The job output collector.
-   * @param segmentName The name of the segment to create.
-   * @param key The url of the record.
-   * @param datum The CrawlDatum of the record.
-   * @param content The raw content of the record
-   * @param pstatus The protocol status
-   * @param status The fetch status.
+   * @param output
+   *          The job output collector.
+   * @param segmentName
+   *          The name of the segment to create.
+   * @param key
+   *          The url of the record.
+   * @param datum
+   *          The CrawlDatum of the record.
+   * @param content
+   *          The raw content of the record
+   * @param pstatus
+   *          The protocol status
+   * @param status
+   *          The fetch status.
    * 
    * @return The result of the parse in a ParseStatus object.
    */
-  private ParseStatus output(OutputCollector<Text, NutchWritable> output, String segmentName,
-    Text key, CrawlDatum datum, Content content, ProtocolStatus pstatus,
-    int status) {
+  private ParseStatus output(OutputCollector<Text, NutchWritable> output,
+      String segmentName, Text key, CrawlDatum datum, Content content,
+      ProtocolStatus pstatus, int status) {
 
     // set the fetch status and the fetch time
     datum.setStatus(status);
@@ -165,8 +181,7 @@
       // add score to content metadata so that ParseSegment can pick it up.
       try {
         scfilters.passScoreBeforeParsing(key, datum, content);
-      }
-      catch (Exception e) {
+      } catch (Exception e) {
         if (LOG.isWarnEnabled()) {
           e.printStackTrace(LogUtil.getWarnStream(LOG));
           LOG.warn("Couldn't pass score, url " + key + " (" + e + ")");
@@ -177,16 +192,15 @@
 
         // parse the content
         parseResult = this.parseUtil.parse(content);
-      }
-      catch (Exception e) {
+      } catch (Exception e) {
         LOG.warn("Error parsing: " + key + ": "
-          + StringUtils.stringifyException(e));
+            + StringUtils.stringifyException(e));
       }
 
       // set the content signature
       if (parseResult == null) {
         byte[] signature = SignatureFactory.getSignature(getConf()).calculate(
-          content, new ParseStatus().getEmptyParse(getConf()));
+            content, new ParseStatus().getEmptyParse(getConf()));
         datum.setSignature(signature);
       }
 
@@ -195,7 +209,7 @@
         output.collect(key, new NutchWritable(content));
 
         if (parseResult != null) {
-          for (Entry <Text, Parse> entry : parseResult) {
+          for (Entry<Text, Parse> entry : parseResult) {
             Text url = entry.getKey();
             Parse parse = entry.getValue();
             ParseStatus parseStatus = parse.getData().getStatus();
@@ -205,36 +219,35 @@
               parse = parseStatus.getEmptyParse(getConf());
             }
 
-            // Calculate page signature. 
-            byte[] signature = SignatureFactory.getSignature(getConf()).calculate(
-              content, parse);
+            // Calculate page signature.
+            byte[] signature = SignatureFactory.getSignature(getConf())
+                .calculate(content, parse);
             // Ensure segment name and score are in parseData metadata
-            parse.getData().getContentMeta().set(Nutch.SEGMENT_NAME_KEY,
-              segmentName);
-            parse.getData().getContentMeta().set(Nutch.SIGNATURE_KEY,
-              StringUtil.toHexString(signature));
+            parse.getData().getContentMeta()
+                .set(Nutch.SEGMENT_NAME_KEY, segmentName);
+            parse.getData().getContentMeta()
+                .set(Nutch.SIGNATURE_KEY, StringUtil.toHexString(signature));
             // Pass fetch time to content meta
-            parse.getData().getContentMeta().set(Nutch.FETCH_TIME_KEY,
-              Long.toString(datum.getFetchTime()));
+            parse.getData().getContentMeta()
+                .set(Nutch.FETCH_TIME_KEY, Long.toString(datum.getFetchTime()));
             if (url.equals(key))
               datum.setSignature(signature);
             try {
               scfilters.passScoreAfterParsing(url, content, parse);
-            }
-            catch (Exception e) {
+            } catch (Exception e) {
               if (LOG.isWarnEnabled()) {
                 e.printStackTrace(LogUtil.getWarnStream(LOG));
                 LOG.warn("Couldn't pass score, url " + key + " (" + e + ")");
               }
             }
             output.collect(url, new NutchWritable(new ParseImpl(new ParseText(
-              parse.getText()), parse.getData(), parse.isCanonical())));
+                parse.getText()), parse.getData(), parse.isCanonical())));
           }
         }
-      }
-      catch (IOException e) {
+      } catch (IOException e) {
         if (LOG.isErrorEnabled()) {
-          LOG.error("ArcSegmentCreator caught:" + StringUtils.stringifyException(e));
+          LOG.error("ArcSegmentCreator caught:"
+              + StringUtils.stringifyException(e));
         }
       }
 
@@ -246,42 +259,51 @@
         }
       }
     }
-    
+
     return null;
   }
 
   /**
-   * <p>Logs any error that occurs during conversion.</p>
+   * <p>
+   * Logs any error that occurs during conversion.
+   * </p>
    * 
-   * @param url The url we are parsing.
-   * @param t The error that occured.
+   * @param url
+   *          The url we are parsing.
+   * @param t
+   *          The error that occured.
    */
   private void logError(Text url, Throwable t) {
     if (LOG.isInfoEnabled()) {
-      LOG.info("Conversion of " + url + " failed with: " + 
-          StringUtils.stringifyException(t));
+      LOG.info("Conversion of " + url + " failed with: "
+          + StringUtils.stringifyException(t));
     }
   }
 
   /**
-   * <p>Runs the Map job to translate an arc record into output for Nutch 
-   * segments.</p>
+   * <p>
+   * Runs the Map job to translate an arc record into output for Nutch segments.
+   * </p>
    * 
-   * @param key The arc record header.
-   * @param bytes The arc record raw content bytes.
-   * @param output The output collecter.
-   * @param reporter The progress reporter.
+   * @param key
+   *          The arc record header.
+   * @param bytes
+   *          The arc record raw content bytes.
+   * @param output
+   *          The output collecter.
+   * @param reporter
+   *          The progress reporter.
    */
   public void map(Text key, BytesWritable bytes,
-    OutputCollector<Text, NutchWritable> output, Reporter reporter)
-    throws IOException {
+      OutputCollector<Text, NutchWritable> output, Reporter reporter)
+      throws IOException {
 
     String[] headers = key.toString().split("\\s+");
     String urlStr = headers[0];
     String version = headers[2];
     String contentType = headers[3];
-    
-    // arcs start with a file description.  for now we ignore this as it is not
+
+    // arcs start with a file description. for now we ignore this as it is not
     // a content record
     if (urlStr.startsWith("filedesc://")) {
       LOG.info("Ignoring file header: " + urlStr);
@@ -289,18 +311,17 @@
     }
     LOG.info("Processing: " + urlStr);
 
-    // get the raw  bytes from the arc file, create a new crawldatum
+    // get the raw bytes from the arc file, create a new crawldatum
     Text url = new Text();
     CrawlDatum datum = new CrawlDatum(CrawlDatum.STATUS_DB_FETCHED, interval,
-      1.0f);
+        1.0f);
     String segmentName = getConf().get(Nutch.SEGMENT_NAME_KEY);
 
     // normalize and filter the urls
     try {
       urlStr = normalizers.normalize(urlStr, URLNormalizers.SCOPE_FETCHER);
       urlStr = urlFilters.filter(urlStr); // filter the url
-    }
-    catch (Exception e) {
+    } catch (Exception e) {
       if (LOG.isWarnEnabled()) {
         LOG.warn("Skipping " + url + ":" + e);
       }
@@ -315,37 +336,41 @@
 
         // set the protocol status to success and the crawl status to success
         // create the content from the normalized url and the raw bytes from
-        // the arc file,  TODO: currently this doesn't handle text of errors
+        // the arc file, TODO: currently this doesn't handle text of errors
         // pages (i.e. 404, etc.). We assume we won't get those.
         ProtocolStatus status = ProtocolStatus.STATUS_SUCCESS;
         Content content = new Content(urlStr, urlStr, bytes.get(), contentType,
-          new Metadata(), getConf());
-        
+            new Metadata(), getConf());
+
         // set the url version into the metadata
         content.getMetadata().set(URL_VERSION, version);
         ParseStatus pstatus = null;
         pstatus = output(output, segmentName, url, datum, content, status,
-          CrawlDatum.STATUS_FETCH_SUCCESS);
+            CrawlDatum.STATUS_FETCH_SUCCESS);
         reporter.progress();
-      }
-      catch (Throwable t) { // unexpected exception
+      } catch (Throwable t) { // unexpected exception
         logError(url, t);
         output(output, segmentName, url, datum, null, null,
-          CrawlDatum.STATUS_FETCH_RETRY);
+            CrawlDatum.STATUS_FETCH_RETRY);
       }
     }
   }
 
   /**
-   * <p>Creates the arc files to segments job.</p>
+   * <p>
+   * Creates the arc files to segments job.
+   * </p>
    * 
-   * @param arcFiles The path to the directory holding the arc files
-   * @param segmentsOutDir The output directory for writing the segments
+   * @param arcFiles
+   *          The path to the directory holding the arc files
+   * @param segmentsOutDir
+   *          The output directory for writing the segments
    * 
-   * @throws IOException If an IO error occurs while running the job.
+   * @throws IOException
+   *           If an IO error occurs while running the job.
    */
   public void createSegments(Path arcFiles, Path segmentsOutDir)
-    throws IOException {
+      throws IOException {
 
     SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
     long start = System.currentTimeMillis();
@@ -369,17 +394,17 @@
     JobClient.runJob(job);
 
     long end = System.currentTimeMillis();
-    LOG.info("ArcSegmentCreator: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    LOG.info("ArcSegmentCreator: finished at " + sdf.format(end)
+        + ", elapsed: " + TimingUtil.elapsedTime(start, end));
   }
 
-  public static void main(String args[])
-    throws Exception {
-    int res = ToolRunner.run(NutchConfiguration.create(), new ArcSegmentCreator(), args);
+  public static void main(String args[]) throws Exception {
+    int res = ToolRunner.run(NutchConfiguration.create(),
+        new ArcSegmentCreator(), args);
     System.exit(res);
   }
 
-  public int run(String[] args)
-    throws Exception {
+  public int run(String[] args) throws Exception {
 
     String usage = "Usage: ArcSegmentCreator <arcFiles> <segmentsOutDir>";
 
@@ -396,8 +421,7 @@
       // create the segments from the arc files
       createSegments(arcFiles, segmentsOutDir);
       return 0;
-    }
-    catch (Exception e) {
+    } catch (Exception e) {
       LOG.error("ArcSegmentCreator: " + StringUtils.stringifyException(e));
       return -1;
     }
Index: src/java/org/apache/nutch/tools/arc/ArcInputFormat.java
===================================================================
--- src/java/org/apache/nutch/tools/arc/ArcInputFormat.java	(revision 1188252)
+++ src/java/org/apache/nutch/tools/arc/ArcInputFormat.java	(working copy)
@@ -30,21 +30,22 @@
 /**
  * A input format the reads arc files.
  */
-public class ArcInputFormat
-  extends FileInputFormat<Text, BytesWritable> {
+public class ArcInputFormat extends FileInputFormat<Text, BytesWritable> {
 
   /**
    * Returns the <code>RecordReader</code> for reading the arc file.
    * 
-   * @param split The InputSplit of the arc file to process.
-   * @param job The job configuration.
-   * @param reporter The progress reporter.
+   * @param split
+   *          The InputSplit of the arc file to process.
+   * @param job
+   *          The job configuration.
+   * @param reporter
+   *          The progress reporter.
    */
   public RecordReader<Text, BytesWritable> getRecordReader(InputSplit split,
-      JobConf job, Reporter reporter)
-    throws IOException {
+      JobConf job, Reporter reporter) throws IOException {
     reporter.setStatus(split.toString());
-    return new ArcRecordReader(job, (FileSplit)split);
+    return new ArcRecordReader(job, (FileSplit) split);
   }
 
 }
Index: src/java/org/apache/nutch/tools/ResolveUrls.java
===================================================================
--- src/java/org/apache/nutch/tools/ResolveUrls.java	(revision 1188252)
+++ src/java/org/apache/nutch/tools/ResolveUrls.java	(working copy)
@@ -59,8 +59,7 @@
   /**
    * A Thread which gets the ip address of a single host by name.
    */
-  private static class ResolverThread
-    extends Thread {
+  private static class ResolverThread extends Thread {
 
     private String url = null;
 
@@ -74,14 +73,13 @@
       String host = URLUtil.getHost(url);
       long start = System.currentTimeMillis();
       try {
-        
-        // get the address by name and if no error is thrown then it 
+
+        // get the address by name and if no error is thrown then it
         // is resolved successfully
         InetAddress ia = InetAddress.getByName(host);
         LOG.info("Resolved: " + host);
         numResolved.incrementAndGet();
-      }
-      catch (Exception uhe) {
+      } catch (Exception uhe) {
         LOG.info("Error Resolving: " + host);
         numErrored.incrementAndGet();
       }
@@ -93,8 +91,8 @@
   }
 
   /**
-   * Creates a thread pool for resolving urls.  Reads in the url file on the
-   * local filesystem.  For each url it attempts to resolve it keeping a total
+   * Creates a thread pool for resolving urls. Reads in the url file on the
+   * local filesystem. For each url it attempts to resolve it keeping a total
    * account of the number resolved, errored, and the amount of time.
    */
   public void resolveUrls() {
@@ -103,13 +101,14 @@
 
       // create a thread pool with a fixed number of threads
       pool = Executors.newFixedThreadPool(numThreads);
-      
-      // read in the urls file and loop through each line, one url per line
+
+      // read in the urls file and loop through each line, one url per
+      // line
       BufferedReader buffRead = new BufferedReader(new FileReader(new File(
-        urlsFile)));
+          urlsFile)));
       String urlStr = null;
       while ((urlStr = buffRead.readLine()) != null) {
-        
+
         // spin up a resolver thread per url
         LOG.info("Starting: " + urlStr);
         pool.execute(new ResolverThread(urlStr));
@@ -119,9 +118,8 @@
       // the thread pool to give urls time to finish resolving
       buffRead.close();
       pool.awaitTermination(60, TimeUnit.SECONDS);
-    }
-    catch (Exception e) {
-      
+    } catch (Exception e) {
+
       // on error shutdown the thread pool immediately
       pool.shutdownNow();
       LOG.info(StringUtils.stringifyException(e));
@@ -129,15 +127,16 @@
 
     // shutdown the thread pool and log totals
     pool.shutdown();
-    LOG.info("Total: " + numTotal.get() + ", Resovled: "
-      + numResolved.get() + ", Errored: " + numErrored.get()
-      + ", Average Time: " + totalTime.get() / numTotal.get());
+    LOG.info("Total: " + numTotal.get() + ", Resovled: " + numResolved.get()
+        + ", Errored: " + numErrored.get() + ", Average Time: "
+        + totalTime.get() / numTotal.get());
   }
 
   /**
    * Create a new ResolveUrls with a file from the local file system.
-   *
-   * @param urlsFile The local urls file, one url per line.
+   * 
+   * @param urlsFile
+   *          The local urls file, one url per line.
    */
   public ResolveUrls(String urlsFile) {
     this(urlsFile, 100);
@@ -145,10 +144,12 @@
 
   /**
    * Create a new ResolveUrls with a urls file and a number of threads for the
-   * Thread pool.  Number of threads is 100 by default.
+   * Thread pool. Number of threads is 100 by default.
    * 
-   * @param urlsFile The local urls file, one url per line.
-   * @param numThreads The number of threads used to resolve urls in parallel.
+   * @param urlsFile
+   *          The local urls file, one url per line.
+   * @param numThreads
+   *          The number of threads used to resolve urls in parallel.
    */
   public ResolveUrls(String urlsFile, int numThreads) {
     this.urlsFile = urlsFile;
@@ -161,12 +162,12 @@
   public static void main(String[] args) {
 
     Options options = new Options();
-    Option helpOpts = OptionBuilder.withArgName("help").withDescription(
-      "show this help message").create("help");
-    Option urlOpts = OptionBuilder.withArgName("urls").hasArg().withDescription(
-      "the urls file to check").create("urls");
-    Option numThreadOpts = OptionBuilder.withArgName("numThreads").hasArgs().withDescription(
-      "the number of threads to use").create("numThreads");
+    Option helpOpts = OptionBuilder.withArgName("help")
+        .withDescription("show this help message").create("help");
+    Option urlOpts = OptionBuilder.withArgName("urls").hasArg()
+        .withDescription("the urls file to check").create("urls");
+    Option numThreadOpts = OptionBuilder.withArgName("numThreads").hasArgs()
+        .withDescription("the number of threads to use").create("numThreads");
     options.addOption(helpOpts);
     options.addOption(urlOpts);
     options.addOption(numThreadOpts);
@@ -191,8 +192,7 @@
       }
       ResolveUrls resolve = new ResolveUrls(urls, numThreads);
       resolve.resolveUrls();
-    }
-    catch (Exception e) {
+    } catch (Exception e) {
       LOG.error("ResolveUrls: " + StringUtils.stringifyException(e));
     }
   }
Index: src/java/org/apache/nutch/tools/DmozParser.java
===================================================================
--- src/java/org/apache/nutch/tools/DmozParser.java	(revision 1188252)
+++ src/java/org/apache/nutch/tools/DmozParser.java	(working copy)
@@ -36,16 +36,15 @@
 import org.apache.nutch.util.LogUtil;
 import org.apache.nutch.util.NutchConfiguration;
 
-
 /** Utility that converts DMOZ RDF into a flat file of URLs to be injected. */
 public class DmozParser {
   public static final Logger LOG = LoggerFactory.getLogger(DmozParser.class);
-  
-    long pages = 0;
 
+  long pages = 0;
+
   /**
-   * This filter fixes characters that might offend our parser.
-   * This lets us be tolerant of errors that might appear in the input XML.
+   * This filter fixes characters that might offend our parser. This lets us be
+   * tolerant of errors that might appear in the input XML.
    */
   private static class XMLCharFilter extends FilterReader {
     private boolean lastBad = false;
@@ -57,9 +56,9 @@
     public int read() throws IOException {
       int c = in.read();
       int value = c;
-      if (c != -1 && !(XMLChar.isValid(c)))     // fix invalid characters
+      if (c != -1 && !(XMLChar.isValid(c))) // fix invalid characters
         value = 'X';
-      else if (lastBad && c == '<') {           // fix mis-matched brackets
+      else if (lastBad && c == '<') { // fix mis-matched brackets
         in.mark(1);
         if (in.read() != '/')
           value = 'X';
@@ -70,37 +69,35 @@
       return value;
     }
 
-    public int read(char[] cbuf, int off, int len)
-      throws IOException {
+    public int read(char[] cbuf, int off, int len) throws IOException {
       int n = in.read(cbuf, off, len);
       if (n != -1) {
         for (int i = 0; i < n; i++) {
-          char c = cbuf[off+i];
+          char c = cbuf[off + i];
           char value = c;
-          if (!(XMLChar.isValid(c)))            // fix invalid characters
+          if (!(XMLChar.isValid(c))) // fix invalid characters
             value = 'X';
-          else if (lastBad && c == '<') {       // fix mis-matched brackets
-            if (i != n-1 && cbuf[off+i+1] != '/')
+          else if (lastBad && c == '<') { // fix mis-matched brackets
+            if (i != n - 1 && cbuf[off + i + 1] != '/')
               value = 'X';
           }
           lastBad = (c == 65533);
-          cbuf[off+i] = value;
+          cbuf[off + i] = value;
         }
       }
       return n;
     }
   }
 
-
   /**
-   * The RDFProcessor receives tag messages during a parse
-   * of RDF XML data.  We build whatever structures we need
-   * from these messages.
+   * The RDFProcessor receives tag messages during a parse of RDF XML data. We
+   * build whatever structures we need from these messages.
    */
   private class RDFProcessor extends DefaultHandler {
     String curURL = null, curSection = null;
-    boolean titlePending = false, descPending = false, insideAdultSection = false;
-    Pattern topicPattern = null; 
+    boolean titlePending = false, descPending = false,
+        insideAdultSection = false;
+    Pattern topicPattern = null;
     StringBuffer title = new StringBuffer(), desc = new StringBuffer();
     XMLReader reader;
     int subsetDenom;
@@ -109,10 +106,12 @@
     Locator location;
 
     /**
-     * Pass in an XMLReader, plus a flag as to whether we 
-     * should include adult material.
+     * Pass in an XMLReader, plus a flag as to whether we should include adult
+     * material.
      */
-    public RDFProcessor(XMLReader reader, int subsetDenom, boolean includeAdult, int skew, Pattern topicPattern) throws IOException {
+    public RDFProcessor(XMLReader reader, int subsetDenom,
+        boolean includeAdult, int skew, Pattern topicPattern)
+        throws IOException {
       this.reader = reader;
       this.subsetDenom = subsetDenom;
       this.includeAdult = includeAdult;
@@ -128,20 +127,21 @@
     /**
      * Start of an XML elt
      */
-    public void startElement(String namespaceURI, String localName, String qName, Attributes atts) throws SAXException {
+    public void startElement(String namespaceURI, String localName,
+        String qName, Attributes atts) throws SAXException {
       if ("Topic".equals(qName)) {
         curSection = atts.getValue("r:id");
       } else if ("ExternalPage".equals(qName)) {
         // Porn filter
-        if ((! includeAdult) && curSection.startsWith("Top/Adult")) {
+        if ((!includeAdult) && curSection.startsWith("Top/Adult")) {
           return;
         }
-          
+
         if (topicPattern != null && !topicPattern.matcher(curSection).matches()) {
           return;
         }
 
-        // Subset denominator filter.  
+        // Subset denominator filter.
         // Only emit with a chance of 1/denominator.
         String url = atts.getValue("about");
         int hashValue = MD5Hash.digest(url).hashCode();
@@ -174,18 +174,18 @@
      * Termination of XML elt
      */
     public void endElement(String namespaceURI, String localName, String qName)
-      throws SAXException {
+        throws SAXException {
       if (curURL != null) {
         if ("ExternalPage".equals(qName)) {
           //
-          // Inc the number of pages, insert the page, and 
+          // Inc the number of pages, insert the page, and
           // possibly print status.
           //
-          System.out.println(curURL); 
+          System.out.println(curURL);
           pages++;
 
           //
-          // Clear out the link text.  This is what
+          // Clear out the link text. This is what
           // you would use for adding to the linkdb.
           //
           if (title.length() > 0) {
@@ -220,15 +220,13 @@
     }
 
     /**
-     * From time to time the Parser will set the "current location"
-     * by calling this function.  It's useful for emitting locations
-     * for error messages.
+     * From time to time the Parser will set the "current location" by calling
+     * this function. It's useful for emitting locations for error messages.
      */
     public void setDocumentLocator(Locator locator) {
       location = locator;
     }
 
-
     //
     // Interface ErrorHandler
     //
@@ -249,12 +247,12 @@
     public void errorError(SAXParseException spe) {
       if (LOG.isErrorEnabled()) {
         LOG.error("Fatal err: " + spe.toString() + ": " + spe.getMessage());
-        LOG.error("Last known line is " + location.getLineNumber() +
-                  ", column " + location.getColumnNumber());
+        LOG.error("Last known line is " + location.getLineNumber()
+            + ", column " + location.getColumnNumber());
         spe.printStackTrace(LogUtil.getErrorStream(LOG));
       }
     }
-        
+
     /**
      * Emit exception warning message
      */
@@ -267,34 +265,33 @@
   }
 
   /**
-   * Iterate through all the items in this structured DMOZ file.
-   * Add each URL to the web db.
+   * Iterate through all the items in this structured DMOZ file. Add each URL to
+   * the web db.
    */
   public void parseDmozFile(File dmozFile, int subsetDenom,
-                            boolean includeAdult,
-                            int skew,
-                            Pattern topicPattern)
+      boolean includeAdult, int skew, Pattern topicPattern)
 
-    throws IOException, SAXException, ParserConfigurationException {
+  throws IOException, SAXException, ParserConfigurationException {
 
     SAXParserFactory parserFactory = SAXParserFactory.newInstance();
     SAXParser parser = parserFactory.newSAXParser();
     XMLReader reader = parser.getXMLReader();
 
     // Create our own processor to receive SAX events
-    RDFProcessor rp =
-      new RDFProcessor(reader, subsetDenom, includeAdult,
-                       skew, topicPattern);
+    RDFProcessor rp = new RDFProcessor(reader, subsetDenom, includeAdult, skew,
+        topicPattern);
     reader.setContentHandler(rp);
     reader.setErrorHandler(rp);
     LOG.info("skew = " + rp.hashSkew);
 
     //
-    // Open filtered text stream.  The TextFilter makes sure that
+    // Open filtered text stream. The TextFilter makes sure that
     // only appropriate XML-approved Text characters are received.
     // Any non-conforming characters are silently skipped.
     //
-    XMLCharFilter in = new XMLCharFilter(new BufferedReader(new InputStreamReader(new BufferedInputStream(new FileInputStream(dmozFile)), "UTF-8")));
+    XMLCharFilter in = new XMLCharFilter(new BufferedReader(
+        new InputStreamReader(new BufferedInputStream(new FileInputStream(
+            dmozFile)), "UTF-8")));
     try {
       InputSource is = new InputSource(in);
       reader.parse(is);
@@ -309,18 +306,17 @@
     }
   }
 
-  private static void addTopicsFromFile(String topicFile,
-                                        Vector<String> topics)
-  throws IOException {
+  private static void addTopicsFromFile(String topicFile, Vector<String> topics)
+      throws IOException {
     BufferedReader in = null;
     try {
-      in = new BufferedReader(new InputStreamReader(new FileInputStream(topicFile), "UTF-8"));
+      in = new BufferedReader(new InputStreamReader(new FileInputStream(
+          topicFile), "UTF-8"));
       String line = null;
       while ((line = in.readLine()) != null) {
         topics.addElement(new String(line));
       }
-    } 
-    catch (Exception e) {
+    } catch (Exception e) {
       if (LOG.isErrorEnabled()) {
         LOG.error(e.toString());
         e.printStackTrace(LogUtil.getErrorStream(LOG));
@@ -330,18 +326,19 @@
       in.close();
     }
   }
-    
+
   /**
-   * Command-line access.  User may add URLs via a flat text file
-   * or the structured DMOZ file.  By default, we ignore Adult
-   * material (as categorized by DMOZ).
+   * Command-line access. User may add URLs via a flat text file or the
+   * structured DMOZ file. By default, we ignore Adult material (as categorized
+   * by DMOZ).
    */
   public static void main(String argv[]) throws Exception {
     if (argv.length < 1) {
-      System.err.println("Usage: DmozParser <dmoz_file> [-subset <subsetDenominator>] [-includeAdultMaterial] [-skew skew] [-topicFile <topic list file>] [-topic <topic> [-topic <topic> [...]]]");
+      System.err
+          .println("Usage: DmozParser <dmoz_file> [-subset <subsetDenominator>] [-includeAdultMaterial] [-skew skew] [-topicFile <topic list file>] [-topic <topic> [-topic <topic> [...]]]");
       return;
     }
-    
+
     //
     // Parse the command line, figure out what kind of
     // URL file we need to load
@@ -350,9 +347,9 @@
     int skew = 0;
     String dmozFile = argv[0];
     boolean includeAdult = false;
-    Pattern topicPattern = null; 
+    Pattern topicPattern = null;
     Vector<String> topics = new Vector<String>();
-    
+
     Configuration conf = NutchConfiguration.create();
     FileSystem fs = FileSystem.get(conf);
     try {
@@ -360,16 +357,16 @@
         if ("-includeAdultMaterial".equals(argv[i])) {
           includeAdult = true;
         } else if ("-subset".equals(argv[i])) {
-          subsetDenom = Integer.parseInt(argv[i+1]);
+          subsetDenom = Integer.parseInt(argv[i + 1]);
           i++;
         } else if ("-topic".equals(argv[i])) {
-          topics.addElement(argv[i+1]); 
+          topics.addElement(argv[i + 1]);
           i++;
         } else if ("-topicFile".equals(argv[i])) {
-          addTopicsFromFile(argv[i+1], topics);
+          addTopicsFromFile(argv[i + 1], topics);
           i++;
         } else if ("-skew".equals(argv[i])) {
-          skew = Integer.parseInt(argv[i+1]);
+          skew = Integer.parseInt(argv[i + 1]);
           i++;
         }
       }
@@ -377,21 +374,21 @@
       DmozParser parser = new DmozParser();
 
       if (!topics.isEmpty()) {
-        String regExp = new String("^("); 
+        String regExp = new String("^(");
         int j = 0;
-        for ( ; j < topics.size() - 1; ++j) {
+        for (; j < topics.size() - 1; ++j) {
           regExp = regExp.concat(topics.get(j));
           regExp = regExp.concat("|");
         }
         regExp = regExp.concat(topics.get(j));
-        regExp = regExp.concat(").*"); 
+        regExp = regExp.concat(").*");
         LOG.info("Topic selection pattern = " + regExp);
-        topicPattern = Pattern.compile(regExp); 
+        topicPattern = Pattern.compile(regExp);
       }
 
-      parser.parseDmozFile(new File(dmozFile), subsetDenom,
-                           includeAdult, skew, topicPattern);
-      
+      parser.parseDmozFile(new File(dmozFile), subsetDenom, includeAdult, skew,
+          topicPattern);
+
     } finally {
       fs.close();
     }
Index: src/java/org/apache/nutch/tools/CrawlDBScanner.java
===================================================================
--- src/java/org/apache/nutch/tools/CrawlDBScanner.java	(revision 1188252)
+++ src/java/org/apache/nutch/tools/CrawlDBScanner.java	(working copy)
@@ -57,17 +57,21 @@
  */
 
 public class CrawlDBScanner extends Configured implements Tool,
-    Mapper<Text,CrawlDatum,Text,CrawlDatum>, Reducer<Text,CrawlDatum,Text,CrawlDatum> {
+    Mapper<Text, CrawlDatum, Text, CrawlDatum>,
+    Reducer<Text, CrawlDatum, Text, CrawlDatum> {
 
-  public static final Logger LOG = LoggerFactory.getLogger(CrawlDBScanner.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(CrawlDBScanner.class);
 
-  public CrawlDBScanner() {}
+  public CrawlDBScanner() {
+  }
 
   public CrawlDBScanner(Configuration conf) {
     setConf(conf);
   }
 
-  public void close() {}
+  public void close() {
+  }
 
   private String regex = null;
   private String status = null;
@@ -78,11 +82,14 @@
   }
 
   public void map(Text url, CrawlDatum crawlDatum,
-      OutputCollector<Text,CrawlDatum> output, Reporter reporter) throws IOException {
+      OutputCollector<Text, CrawlDatum> output, Reporter reporter)
+      throws IOException {
 
     // check status
     if (status != null
-        && !status.equalsIgnoreCase(CrawlDatum.getStatusName(crawlDatum.getStatus()))) return;
+        && !status.equalsIgnoreCase(CrawlDatum.getStatusName(crawlDatum
+            .getStatus())))
+      return;
 
     // if URL matched regexp dump it
     if (url.toString().matches(regex)) {
@@ -91,7 +98,8 @@
   }
 
   public void reduce(Text key, Iterator<CrawlDatum> values,
-      OutputCollector<Text,CrawlDatum> output, Reporter reporter) throws IOException {
+      OutputCollector<Text, CrawlDatum> output, Reporter reporter)
+      throws IOException {
     while (values.hasNext()) {
       CrawlDatum val = values.next();
       output.collect(key, val);
@@ -110,7 +118,8 @@
     job.setJobName("Scan : " + crawlDb + " for URLS matching : " + regex);
 
     job.set("CrawlDBScanner.regex", regex);
-    if (status != null) job.set("CrawlDBScanner.status", status);
+    if (status != null)
+      job.set("CrawlDBScanner.status", status);
 
     FileInputFormat.addInputPath(job, new Path(crawlDb, CrawlDb.CURRENT_NAME));
     job.setInputFormat(SequenceFileInputFormat.class);
@@ -147,11 +156,13 @@
     }
 
     long end = System.currentTimeMillis();
-    LOG.info("CrawlDb scanner: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    LOG.info("CrawlDb scanner: finished at " + sdf.format(end) + ", elapsed: "
+        + TimingUtil.elapsedTime(start, end));
   }
 
   public static void main(String args[]) throws Exception {
-    int res = ToolRunner.run(NutchConfiguration.create(), new CrawlDBScanner(), args);
+    int res = ToolRunner.run(NutchConfiguration.create(), new CrawlDBScanner(),
+        args);
     System.exit(res);
   }
 
Index: src/java/org/apache/nutch/tools/FreeGenerator.java
===================================================================
--- src/java/org/apache/nutch/tools/FreeGenerator.java	(revision 1188252)
+++ src/java/org/apache/nutch/tools/FreeGenerator.java	(working copy)
@@ -54,21 +54,22 @@
 import org.apache.nutch.util.TimingUtil;
 
 /**
- * This tool generates fetchlists (segments to be fetched) from plain text
- * files containing one URL per line. It's useful when arbitrary URL-s need to
- * be fetched without adding them first to the CrawlDb, or during testing.
+ * This tool generates fetchlists (segments to be fetched) from plain text files
+ * containing one URL per line. It's useful when arbitrary URL-s need to be
+ * fetched without adding them first to the CrawlDb, or during testing.
  * 
  * @author Andrzej Bialecki
  */
 public class FreeGenerator extends Configured implements Tool {
-  private static final Logger LOG = LoggerFactory.getLogger(FreeGenerator.class);
-  
+  private static final Logger LOG = LoggerFactory
+      .getLogger(FreeGenerator.class);
+
   private static final String FILTER_KEY = "free.generator.filter";
   private static final String NORMALIZE_KEY = "free.generator.normalize";
 
-  public static class FG extends MapReduceBase
-  implements Mapper<WritableComparable, Text, Text, Generator.SelectorEntry>,
-  Reducer<Text, Generator.SelectorEntry, Text, CrawlDatum> {
+  public static class FG extends MapReduceBase implements
+      Mapper<WritableComparable, Text, Text, Generator.SelectorEntry>,
+      Reducer<Text, Generator.SelectorEntry, Text, CrawlDatum> {
     private URLNormalizers normalizers = null;
     private URLFilters filters = null;
     private ScoringFilters scfilters;
@@ -86,16 +87,18 @@
         normalizers = new URLNormalizers(job, URLNormalizers.SCOPE_INJECT);
       }
     }
-    
+
     Generator.SelectorEntry entry = new Generator.SelectorEntry();
 
-    public void map(WritableComparable key, Text value, OutputCollector<Text,
-        Generator.SelectorEntry> output, Reporter reporter) throws IOException {
+    public void map(WritableComparable key, Text value,
+        OutputCollector<Text, Generator.SelectorEntry> output, Reporter reporter)
+        throws IOException {
       // value is a line of text
       String urlString = value.toString();
       try {
         if (normalizers != null) {
-          urlString = normalizers.normalize(urlString, URLNormalizers.SCOPE_INJECT);
+          urlString = normalizers.normalize(urlString,
+              URLNormalizers.SCOPE_INJECT);
         }
         if (urlString != null && filters != null) {
           urlString = filters.filter(urlString);
@@ -105,7 +108,8 @@
           scfilters.injectedScore(url, datum);
         }
       } catch (Exception e) {
-        LOG.warn("Error adding url '" + value.toString() + "', skipping: " + StringUtils.stringifyException(e));
+        LOG.warn("Error adding url '" + value.toString() + "', skipping: "
+            + StringUtils.stringifyException(e));
         return;
       }
       if (urlString == null) {
@@ -120,11 +124,13 @@
     }
 
     public void reduce(Text key, Iterator<Generator.SelectorEntry> values,
-        OutputCollector<Text, CrawlDatum> output, Reporter reporter) throws IOException {
-      // pick unique urls from values - discard the reduce key due to hash collisions
+        OutputCollector<Text, CrawlDatum> output, Reporter reporter)
+        throws IOException {
+      // pick unique urls from values - discard the reduce key due to hash
+      // collisions
       HashMap<Text, CrawlDatum> unique = new HashMap<Text, CrawlDatum>();
       while (values.hasNext()) {
-        Generator.SelectorEntry entry = (Generator.SelectorEntry)values.next();
+        Generator.SelectorEntry entry = (Generator.SelectorEntry) values.next();
         unique.put(entry.url, entry.datum);
       }
       // output unique urls
@@ -133,15 +139,20 @@
       }
     }
   }
-  
+
   public int run(String[] args) throws Exception {
     if (args.length < 2) {
-      System.err.println("Usage: FreeGenerator <inputDir> <segmentsDir> [-filter] [-normalize]");
-      System.err.println("\tinputDir\tinput directory containing one or more input files.");
-      System.err.println("\t\tEach text file contains a list of URLs, one URL per line");
-      System.err.println("\tsegmentsDir\toutput directory, where new segment will be created");
+      System.err
+          .println("Usage: FreeGenerator <inputDir> <segmentsDir> [-filter] [-normalize]");
+      System.err
+          .println("\tinputDir\tinput directory containing one or more input files.");
+      System.err
+          .println("\t\tEach text file contains a list of URLs, one URL per line");
+      System.err
+          .println("\tsegmentsDir\toutput directory, where new segment will be created");
       System.err.println("\t-filter\trun current URLFilters on input URLs");
-      System.err.println("\t-normalize\trun current URLNormalizers on input URLs");
+      System.err
+          .println("\t-normalize\trun current URLNormalizers on input URLs");
       return -1;
     }
     boolean filter = false;
@@ -158,7 +169,7 @@
         }
       }
     }
-    
+
     SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
     long start = System.currentTimeMillis();
     LOG.info("FreeGenerator: starting at " + sdf.format(start));
@@ -179,8 +190,8 @@
     job.setOutputKeyClass(Text.class);
     job.setOutputValueClass(CrawlDatum.class);
     job.setOutputKeyComparatorClass(Generator.HashComparator.class);
-    FileOutputFormat.setOutputPath(job, new Path(args[1],
-        new Path(segName, CrawlDatum.GENERATE_DIR_NAME)));
+    FileOutputFormat.setOutputPath(job, new Path(args[1], new Path(segName,
+        CrawlDatum.GENERATE_DIR_NAME)));
     try {
       JobClient.runJob(job);
     } catch (Exception e) {
@@ -188,12 +199,14 @@
       return -1;
     }
     long end = System.currentTimeMillis();
-    LOG.info("FreeGenerator: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    LOG.info("FreeGenerator: finished at " + sdf.format(end) + ", elapsed: "
+        + TimingUtil.elapsedTime(start, end));
     return 0;
   }
 
   public static void main(String[] args) throws Exception {
-    int res = ToolRunner.run(NutchConfiguration.create(), new FreeGenerator(), args);
+    int res = ToolRunner.run(NutchConfiguration.create(), new FreeGenerator(),
+        args);
     System.exit(res);
   }
 }
Index: src/java/org/apache/nutch/tools/Benchmark.java
===================================================================
--- src/java/org/apache/nutch/tools/Benchmark.java	(revision 1188252)
+++ src/java/org/apache/nutch/tools/Benchmark.java	(working copy)
@@ -52,13 +52,14 @@
     int res = ToolRunner.run(conf, new Benchmark(), args);
     System.exit(res);
   }
-  
+
   private static String getDate() {
-    return new SimpleDateFormat("yyyyMMddHHmmss").format
-      (new Date(System.currentTimeMillis()));
+    return new SimpleDateFormat("yyyyMMddHHmmss").format(new Date(System
+        .currentTimeMillis()));
   }
- 
-  private void createSeeds(FileSystem fs, Path seedsDir, int count) throws Exception {
+
+  private void createSeeds(FileSystem fs, Path seedsDir, int count)
+      throws Exception {
     OutputStream os = fs.create(new Path(seedsDir, "seeds"));
     for (int i = 0; i < count; i++) {
       String url = "http://www.test-" + i + ".com/\r\n";
@@ -67,9 +68,9 @@
     os.flush();
     os.close();
   }
-  
+
   public static final class BenchmarkResults {
-    Map<String,Map<String,Long>> timings = new HashMap<String,Map<String,Long>>();
+    Map<String, Map<String, Long>> timings = new HashMap<String, Map<String, Long>>();
     List<String> runs = new ArrayList<String>();
     List<String> stages = new ArrayList<String>();
     int seeds, depth, threads;
@@ -77,7 +78,7 @@
     long topN;
     long elapsed;
     String plugins;
-    
+
     public void addTiming(String stage, String run, long timing) {
       if (!runs.contains(run)) {
         runs.add(run);
@@ -85,14 +86,14 @@
       if (!stages.contains(stage)) {
         stages.add(stage);
       }
-      Map<String,Long> t = timings.get(stage);
+      Map<String, Long> t = timings.get(stage);
       if (t == null) {
-        t = new HashMap<String,Long>();
+        t = new HashMap<String, Long>();
         timings.put(stage, t);
       }
       t.put(run, timing);
     }
-    
+
     public String toString() {
       StringBuilder sb = new StringBuilder();
       sb.append("* Plugins:\t" + plugins + "\n");
@@ -103,8 +104,9 @@
       sb.append("* Delete:\t" + delete + "\n");
       sb.append("* TOTAL ELAPSED:\t" + elapsed + "\n");
       for (String stage : stages) {
-        Map<String,Long> timing = timings.get(stage);
-        if (timing == null) continue;
+        Map<String, Long> timing = timings.get(stage);
+        if (timing == null)
+          continue;
         sb.append("- stage: " + stage + "\n");
         for (String r : runs) {
           Long Time = timing.get(r);
@@ -116,15 +118,16 @@
       }
       return sb.toString();
     }
-    
+
     public List<String> getStages() {
       return stages;
     }
+
     public List<String> getRuns() {
       return runs;
     }
   }
-  
+
   public int run(String[] args) throws Exception {
     String plugins = "protocol-http|parse-tika|scoring-opic|urlfilter-regex|urlnormalizer-pass";
     int seeds = 1;
@@ -132,17 +135,24 @@
     int threads = 10;
     boolean delete = true;
     long topN = Long.MAX_VALUE;
-    
+
     if (args.length == 0) {
-      System.err.println("Usage: Benchmark [-seeds NN] [-depth NN] [-threads NN] [-keep] [-maxPerHost NN] [-plugins <regex>]");
-      System.err.println("\t-seeds NN\tcreate NN unique hosts in a seed list (default: 1)");
+      System.err
+          .println("Usage: Benchmark [-seeds NN] [-depth NN] [-threads NN] [-keep] [-maxPerHost NN] [-plugins <regex>]");
+      System.err
+          .println("\t-seeds NN\tcreate NN unique hosts in a seed list (default: 1)");
       System.err.println("\t-depth NN\tperform NN crawl cycles (default: 10)");
-      System.err.println("\t-threads NN\tuse NN threads per Fetcher task (default: 10)");
-      System.err.println("\t-keep\tkeep segment data (default: delete after updatedb)");
+      System.err
+          .println("\t-threads NN\tuse NN threads per Fetcher task (default: 10)");
+      System.err
+          .println("\t-keep\tkeep segment data (default: delete after updatedb)");
       System.err.println("\t-plugins <regex>\toverride 'plugin.includes'.");
-      System.err.println("\tNOTE: if not specified, this is reset to: " + plugins);
-      System.err.println("\tNOTE: if 'default' is specified then a value set in nutch-default/nutch-site is used.");
-      System.err.println("\t-maxPerHost NN\tmax. # of URLs per host in a fetchlist");
+      System.err.println("\tNOTE: if not specified, this is reset to: "
+          + plugins);
+      System.err
+          .println("\tNOTE: if 'default' is specified then a value set in nutch-default/nutch-site is used.");
+      System.err
+          .println("\t-maxPerHost NN\tmax. # of URLs per host in a fetchlist");
       return -1;
     }
     int maxPerHost = Integer.MAX_VALUE;
@@ -164,13 +174,15 @@
         return -1;
       }
     }
-    BenchmarkResults res = benchmark(seeds, depth, threads, maxPerHost, topN, delete, plugins);
+    BenchmarkResults res = benchmark(seeds, depth, threads, maxPerHost, topN,
+        delete, plugins);
     System.out.println(res);
     return 0;
   }
-  
-  public BenchmarkResults benchmark(int seeds, int depth, int threads, int maxPerHost,
-        long topN, boolean delete, String plugins) throws Exception {
+
+  public BenchmarkResults benchmark(int seeds, int depth, int threads,
+      int maxPerHost, long topN, boolean delete, String plugins)
+      throws Exception {
     Configuration conf = getConf();
     conf.set("http.proxy.host", "localhost");
     conf.setInt("http.proxy.port", 8181);
@@ -180,11 +192,12 @@
       conf.set("plugin.includes", plugins);
     }
     conf.setInt(Generator.GENERATOR_MAX_COUNT, maxPerHost);
-    conf.set(Generator.GENERATOR_COUNT_MODE, Generator.GENERATOR_COUNT_VALUE_HOST);
-    JobConf job = new NutchJob(getConf());    
+    conf.set(Generator.GENERATOR_COUNT_MODE,
+        Generator.GENERATOR_COUNT_VALUE_HOST);
+    JobConf job = new NutchJob(getConf());
     FileSystem fs = FileSystem.get(job);
-    Path dir = new Path(getConf().get("hadoop.tmp.dir"),
-            "bench-" + System.currentTimeMillis());
+    Path dir = new Path(getConf().get("hadoop.tmp.dir"), "bench-"
+        + System.currentTimeMillis());
     fs.mkdirs(dir);
     Path rootUrlDir = new Path(dir, "seed");
     fs.mkdirs(rootUrlDir);
@@ -194,7 +207,7 @@
       LOG.info("crawl started in: " + dir);
       LOG.info("rootUrlDir = " + rootUrlDir);
       LOG.info("threads = " + threads);
-      LOG.info("depth = " + depth);      
+      LOG.info("depth = " + depth);
     }
     BenchmarkResults res = new BenchmarkResults();
     res.delete = delete;
@@ -213,17 +226,17 @@
     ParseSegment parseSegment = new ParseSegment(getConf());
     CrawlDb crawlDbTool = new CrawlDb(getConf());
     LinkDb linkDbTool = new LinkDb(getConf());
-      
+
     // initialize crawlDb
     long start = System.currentTimeMillis();
     injector.inject(crawlDb, rootUrlDir);
     long delta = System.currentTimeMillis() - start;
     res.addTiming("inject", "0", delta);
     int i;
-    for (i = 0; i < depth; i++) {             // generate new segment
+    for (i = 0; i < depth; i++) { // generate new segment
       start = System.currentTimeMillis();
-      Path[] segs = generator.generate(crawlDb, segments, -1, topN, System
-          .currentTimeMillis());
+      Path[] segs = generator.generate(crawlDb, segments, -1, topN,
+          System.currentTimeMillis());
       delta = System.currentTimeMillis() - start;
       res.addTiming("generate", i + "", delta);
       if (segs == null) {
@@ -231,12 +244,12 @@
         break;
       }
       start = System.currentTimeMillis();
-      fetcher.fetch(segs[0], threads);  // fetch it
+      fetcher.fetch(segs[0], threads); // fetch it
       delta = System.currentTimeMillis() - start;
       res.addTiming("fetch", i + "", delta);
       if (!Fetcher.isParsing(job)) {
         start = System.currentTimeMillis();
-        parseSegment.parse(segs[0]);    // parse it, if needed
+        parseSegment.parse(segs[0]); // parse it, if needed
         delta = System.currentTimeMillis() - start;
         res.addTiming("parse", i + "", delta);
       }
@@ -258,7 +271,9 @@
     if (i == 0) {
       LOG.warn("No URLs to fetch - check your seed list and URL filters.");
     }
-    if (LOG.isInfoEnabled()) { LOG.info("crawl finished: " + dir); }
+    if (LOG.isInfoEnabled()) {
+      LOG.info("crawl finished: " + dir);
+    }
     res.elapsed = System.currentTimeMillis() - res.elapsed;
     CrawlDbReader dbreader = new CrawlDbReader();
     dbreader.processStatJob(crawlDb.toString(), conf, false);
Index: src/java/org/apache/nutch/protocol/RobotRules.java
===================================================================
--- src/java/org/apache/nutch/protocol/RobotRules.java	(revision 1188252)
+++ src/java/org/apache/nutch/protocol/RobotRules.java	(working copy)
@@ -35,9 +35,8 @@
   public long getCrawlDelay();
 
   /**
-   * Returns <code>false</code> if the <code>robots.txt</code> file
-   * prohibits us from accessing the given <code>url</code>, or
-   * <code>true</code> otherwise.
+   * Returns <code>false</code> if the <code>robots.txt</code> file prohibits us
+   * from accessing the given <code>url</code>, or <code>true</code> otherwise.
    */
   public boolean isAllowed(URL url);
 
Index: src/java/org/apache/nutch/protocol/Protocol.java
===================================================================
--- src/java/org/apache/nutch/protocol/Protocol.java	(revision 1188252)
+++ src/java/org/apache/nutch/protocol/Protocol.java	(working copy)
@@ -25,12 +25,11 @@
 import org.apache.nutch.crawl.CrawlDatum;
 import org.apache.nutch.plugin.Pluggable;
 
-
-/** A retriever of url content.  Implemented by protocol extensions. */
+/** A retriever of url content. Implemented by protocol extensions. */
 public interface Protocol extends Pluggable, Configurable {
   /** The name of the extension point. */
   public final static String X_POINT_ID = Protocol.class.getName();
-  
+
   /**
    * Property name. If in the current configuration this property is set to
    * true, protocol implementations should handle "politeness" limits
@@ -49,14 +48,18 @@
    */
   public final static String CHECK_ROBOTS = "protocol.plugin.check.robots";
 
-  /** Returns the {@link Content} for a fetchlist entry.
+  /**
+   * Returns the {@link Content} for a fetchlist entry.
    */
   ProtocolOutput getProtocolOutput(Text url, CrawlDatum datum);
 
   /**
    * Retrieve robot rules applicable for this url.
-   * @param url url to check
-   * @param datum page datum
+   * 
+   * @param url
+   *          url to check
+   * @param datum
+   *          page datum
    * @return robot rules (specific for this url or default), never null
    */
   RobotRules getRobotRules(Text url, CrawlDatum datum);
Index: src/java/org/apache/nutch/protocol/ProtocolOutput.java
===================================================================
--- src/java/org/apache/nutch/protocol/ProtocolOutput.java	(revision 1188252)
+++ src/java/org/apache/nutch/protocol/ProtocolOutput.java	(working copy)
@@ -18,8 +18,9 @@
 package org.apache.nutch.protocol;
 
 /**
- * Simple aggregate to pass from protocol plugins both content and
- * protocol status.
+ * Simple aggregate to pass from protocol plugins both content and protocol
+ * status.
+ * 
  * @author Andrzej Bialecki &lt;ab@getopt.org&gt;
  */
 public class ProtocolOutput {
@@ -30,12 +31,12 @@
     this.content = content;
     this.status = status;
   }
-  
+
   public ProtocolOutput(Content content) {
     this.content = content;
     this.status = ProtocolStatus.STATUS_SUCCESS;
   }
-  
+
   public Content getContent() {
     return content;
   }
Index: src/java/org/apache/nutch/protocol/ProtocolStatus.java
===================================================================
--- src/java/org/apache/nutch/protocol/ProtocolStatus.java	(revision 1188252)
+++ src/java/org/apache/nutch/protocol/ProtocolStatus.java	(working copy)
@@ -30,65 +30,76 @@
  * @author Andrzej Bialecki
  */
 public class ProtocolStatus implements Writable {
-  
+
   private final static byte VERSION = 2;
-  
+
   /** Content was retrieved without errors. */
-  public static final int SUCCESS              = 1;
+  public static final int SUCCESS = 1;
   /** Content was not retrieved. Any further errors may be indicated in args. */
-  public static final int FAILED               = 2;
-  
-  /** This protocol was not found.  Application may attempt to retry later. */
-  public static final int PROTO_NOT_FOUND      = 10;
+  public static final int FAILED = 2;
+
+  /** This protocol was not found. Application may attempt to retry later. */
+  public static final int PROTO_NOT_FOUND = 10;
   /** Resource is gone. */
-  public static final int GONE                 = 11;
+  public static final int GONE = 11;
   /** Resource has moved permanently. New url should be found in args. */
-  public static final int MOVED                = 12;
+  public static final int MOVED = 12;
   /** Resource has moved temporarily. New url should be found in args. */
-  public static final int TEMP_MOVED           = 13;
+  public static final int TEMP_MOVED = 13;
   /** Resource was not found. */
-  public static final int NOTFOUND             = 14;
+  public static final int NOTFOUND = 14;
   /** Temporary failure. Application may retry immediately. */
-  public static final int RETRY                = 15;
-  /** Unspecified exception occured. Further information may be provided in args. */
-  public static final int EXCEPTION            = 16;
+  public static final int RETRY = 15;
+  /**
+   * Unspecified exception occured. Further information may be provided in args.
+   */
+  public static final int EXCEPTION = 16;
   /** Access denied - authorization required, but missing/incorrect. */
-  public static final int ACCESS_DENIED        = 17;
+  public static final int ACCESS_DENIED = 17;
   /** Access denied by robots.txt rules. */
-  public static final int ROBOTS_DENIED        = 18;
+  public static final int ROBOTS_DENIED = 18;
   /** Too many redirects. */
-  public static final int REDIR_EXCEEDED       = 19;
+  public static final int REDIR_EXCEEDED = 19;
   /** Not fetching. */
-  public static final int NOTFETCHING          = 20;
+  public static final int NOTFETCHING = 20;
   /** Unchanged since the last fetch. */
-  public static final int NOTMODIFIED          = 21;
-  /** Request was refused by protocol plugins, because it would block.
-   * The expected number of milliseconds to wait before retry may be provided
-   * in args. */
-  public static final int WOULDBLOCK           = 22;
+  public static final int NOTMODIFIED = 21;
+  /**
+   * Request was refused by protocol plugins, because it would block. The
+   * expected number of milliseconds to wait before retry may be provided in
+   * args.
+   */
+  public static final int WOULDBLOCK = 22;
   /** Thread was blocked http.max.delays times during fetching. */
-  public static final int BLOCKED              = 23;
-   
+  public static final int BLOCKED = 23;
+
   // Useful static instances for status codes that don't usually require any
   // additional arguments.
-  public static final ProtocolStatus STATUS_SUCCESS = new ProtocolStatus(SUCCESS);
+  public static final ProtocolStatus STATUS_SUCCESS = new ProtocolStatus(
+      SUCCESS);
   public static final ProtocolStatus STATUS_FAILED = new ProtocolStatus(FAILED);
   public static final ProtocolStatus STATUS_GONE = new ProtocolStatus(GONE);
-  public static final ProtocolStatus STATUS_NOTFOUND = new ProtocolStatus(NOTFOUND);
+  public static final ProtocolStatus STATUS_NOTFOUND = new ProtocolStatus(
+      NOTFOUND);
   public static final ProtocolStatus STATUS_RETRY = new ProtocolStatus(RETRY);
-  public static final ProtocolStatus STATUS_ROBOTS_DENIED = new ProtocolStatus(ROBOTS_DENIED);
-  public static final ProtocolStatus STATUS_REDIR_EXCEEDED = new ProtocolStatus(REDIR_EXCEEDED);
-  public static final ProtocolStatus STATUS_NOTFETCHING = new ProtocolStatus(NOTFETCHING);
-  public static final ProtocolStatus STATUS_NOTMODIFIED = new ProtocolStatus(NOTMODIFIED);
-  public static final ProtocolStatus STATUS_WOULDBLOCK = new ProtocolStatus(WOULDBLOCK);
-  public static final ProtocolStatus STATUS_BLOCKED = new ProtocolStatus(BLOCKED);
-  
+  public static final ProtocolStatus STATUS_ROBOTS_DENIED = new ProtocolStatus(
+      ROBOTS_DENIED);
+  public static final ProtocolStatus STATUS_REDIR_EXCEEDED = new ProtocolStatus(
+      REDIR_EXCEEDED);
+  public static final ProtocolStatus STATUS_NOTFETCHING = new ProtocolStatus(
+      NOTFETCHING);
+  public static final ProtocolStatus STATUS_NOTMODIFIED = new ProtocolStatus(
+      NOTMODIFIED);
+  public static final ProtocolStatus STATUS_WOULDBLOCK = new ProtocolStatus(
+      WOULDBLOCK);
+  public static final ProtocolStatus STATUS_BLOCKED = new ProtocolStatus(
+      BLOCKED);
+
   private int code;
   private long lastModified;
   private String[] args;
-  
-  private static final HashMap<Integer, String> codeToName =
-    new HashMap<Integer, String>();
+
+  private static final HashMap<Integer, String> codeToName = new HashMap<Integer, String>();
   static {
     codeToName.put(new Integer(SUCCESS), "success");
     codeToName.put(new Integer(FAILED), "failed");
@@ -107,40 +118,41 @@
     codeToName.put(new Integer(WOULDBLOCK), "wouldblock");
     codeToName.put(new Integer(BLOCKED), "blocked");
   }
-  
+
   public ProtocolStatus() {
-    
+
   }
 
   public ProtocolStatus(int code, String[] args) {
     this.code = code;
     this.args = args;
   }
-  
+
   public ProtocolStatus(int code, String[] args, long lastModified) {
     this.code = code;
     this.args = args;
     this.lastModified = lastModified;
   }
-  
+
   public ProtocolStatus(int code) {
     this(code, null);
   }
-  
+
   public ProtocolStatus(int code, long lastModified) {
     this(code, null, lastModified);
   }
-  
+
   public ProtocolStatus(int code, Object message) {
     this(code, message, 0L);
   }
-  
+
   public ProtocolStatus(int code, Object message, long lastModified) {
     this.code = code;
     this.lastModified = lastModified;
-    if (message != null) this.args = new String[]{String.valueOf(message)};
+    if (message != null)
+      this.args = new String[] { String.valueOf(message) };
   }
-  
+
   public ProtocolStatus(Throwable t) {
     this(EXCEPTION, t);
   }
@@ -150,10 +162,10 @@
     res.readFields(in);
     return res;
   }
-  
+
   public void readFields(DataInput in) throws IOException {
     byte version = in.readByte();
-    switch(version) {
+    switch (version) {
     case 1:
       code = in.readByte();
       lastModified = in.readLong();
@@ -168,10 +180,10 @@
       throw new VersionMismatchException(VERSION, version);
     }
   }
-  
+
   public void write(DataOutput out) throws IOException {
     out.writeByte(VERSION);
-    out.writeByte((byte)code);
+    out.writeByte((byte) code);
     out.writeLong(lastModified);
     if (args == null) {
       out.writeInt(-1);
@@ -183,7 +195,7 @@
   public void setArgs(String[] args) {
     this.args = args;
   }
-  
+
   public String[] getArgs() {
     return args;
   }
@@ -195,74 +207,77 @@
   public String getName() {
     return codeToName.get(this.code);
   }
-  
+
   public void setCode(int code) {
     this.code = code;
   }
-  
+
   public boolean isSuccess() {
-    return code == SUCCESS; 
+    return code == SUCCESS;
   }
-  
+
   public boolean isTransientFailure() {
-    return
-        code == ACCESS_DENIED ||
-        code == EXCEPTION ||
-        code == REDIR_EXCEEDED ||
-        code == RETRY ||
-        code == TEMP_MOVED ||
-        code == WOULDBLOCK ||
-        code == PROTO_NOT_FOUND; 
+    return code == ACCESS_DENIED || code == EXCEPTION || code == REDIR_EXCEEDED
+        || code == RETRY || code == TEMP_MOVED || code == WOULDBLOCK
+        || code == PROTO_NOT_FOUND;
   }
-  
+
   public boolean isPermanentFailure() {
-    return
-        code == FAILED ||
-        code == GONE ||
-        code == MOVED ||
-        code == NOTFOUND ||
-        code == ROBOTS_DENIED;
+    return code == FAILED || code == GONE || code == MOVED || code == NOTFOUND
+        || code == ROBOTS_DENIED;
   }
-  
+
   public String getMessage() {
-    if (args != null && args.length > 0) return args[0];
+    if (args != null && args.length > 0)
+      return args[0];
     return null;
   }
-  
+
   public void setMessage(String msg) {
-    if (args != null && args.length > 0) args[0] = msg;
-    else args = new String[] {msg};
+    if (args != null && args.length > 0)
+      args[0] = msg;
+    else
+      args = new String[] { msg };
   }
-  
+
   public long getLastModified() {
     return lastModified;
   }
-  
+
   public void setLastModified(long lastModified) {
     this.lastModified = lastModified;
   }
-  
+
   public boolean equals(Object o) {
-    if (o == null) return false;
-    if (!(o instanceof ProtocolStatus)) return false;
-    ProtocolStatus other = (ProtocolStatus)o;
-    if (this.code != other.code || this.lastModified != other.lastModified) return false;
+    if (o == null)
+      return false;
+    if (!(o instanceof ProtocolStatus))
+      return false;
+    ProtocolStatus other = (ProtocolStatus) o;
+    if (this.code != other.code || this.lastModified != other.lastModified)
+      return false;
     if (this.args == null) {
-      if (other.args == null) return true;
-      else return false;
+      if (other.args == null)
+        return true;
+      else
+        return false;
     } else {
-      if (other.args == null) return false;
-      if (other.args.length != this.args.length) return false;
+      if (other.args == null)
+        return false;
+      if (other.args.length != this.args.length)
+        return false;
       for (int i = 0; i < this.args.length; i++) {
-        if (!this.args[i].equals(other.args[i])) return false;
+        if (!this.args[i].equals(other.args[i]))
+          return false;
       }
     }
     return true;
   }
-  
+
   public String toString() {
     StringBuffer res = new StringBuffer();
-    res.append(codeToName.get(new Integer(code)) + "(" + code + "), lastModified=" + lastModified);
+    res.append(codeToName.get(new Integer(code)) + "(" + code
+        + "), lastModified=" + lastModified);
     if (args != null) {
       if (args.length == 1) {
         res.append(": " + String.valueOf(args[0]));
Index: src/java/org/apache/nutch/protocol/ProtocolFactory.java
===================================================================
--- src/java/org/apache/nutch/protocol/ProtocolFactory.java	(revision 1188252)
+++ src/java/org/apache/nutch/protocol/ProtocolFactory.java	(working copy)
@@ -38,7 +38,8 @@
  */
 public class ProtocolFactory {
 
-  public static final Logger LOG = LoggerFactory.getLogger(ProtocolFactory.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(ProtocolFactory.class);
 
   private ExtensionPoint extensionPoint;
 
@@ -59,7 +60,8 @@
    * 
    * @param urlString
    *          Url String
-   * @return The appropriate {@link Protocol} implementation for a given {@link URL}.
+   * @return The appropriate {@link Protocol} implementation for a given
+   *         {@link URL}.
    * @throws ProtocolNotFound
    *           when Protocol can not be found for urlString
    */
@@ -106,13 +108,14 @@
     }
     return null;
   }
-  
-  boolean contains(String what, String where){
-    String parts[]=where.split("[, ]");
-    for(int i=0;i<parts.length;i++) {
-      if(parts[i].equals(what)) return true;
+
+  boolean contains(String what, String where) {
+    String parts[] = where.split("[, ]");
+    for (int i = 0; i < parts.length; i++) {
+      if (parts[i].equals(what))
+        return true;
     }
     return false;
   }
-  
+
 }
Index: src/java/org/apache/nutch/protocol/Content.java
===================================================================
--- src/java/org/apache/nutch/protocol/Content.java	(revision 1188252)
+++ src/java/org/apache/nutch/protocol/Content.java	(working copy)
@@ -43,7 +43,7 @@
 import org.apache.nutch.util.MimeUtil;
 import org.apache.nutch.util.NutchConfiguration;
 
-public final class Content implements Writable{
+public final class Content implements Writable {
 
   public static final String DIR_NAME = "content";
 
@@ -122,11 +122,11 @@
       metadata.readFields(in); // read meta data
       break;
     default:
-      throw new VersionMismatchException((byte)2, oldVersion);
+      throw new VersionMismatchException((byte) 2, oldVersion);
     }
 
   }
-  
+
   public final void readFields(DataInput in) throws IOException {
     metadata.clear();
     int sizeOrVersion = in.readInt();
@@ -144,14 +144,14 @@
         metadata.readFields(in);
         break;
       default:
-        throw new VersionMismatchException((byte)VERSION, (byte)version);
+        throw new VersionMismatchException((byte) VERSION, (byte) version);
       }
     } else { // size
       byte[] compressed = new byte[sizeOrVersion];
       in.readFully(compressed, 0, compressed.length);
       ByteArrayInputStream deflated = new ByteArrayInputStream(compressed);
-      DataInput inflater =
-        new DataInputStream(new InflaterInputStream(deflated));
+      DataInput inflater = new DataInputStream(
+          new InflaterInputStream(deflated));
       readFieldsCompressed(inflater);
     }
   }
@@ -185,8 +185,9 @@
     return url;
   }
 
-  /** The base url for relative links contained in the content.
-   * Maybe be different from url if the request redirected.
+  /**
+   * The base url for relative links contained in the content. Maybe be
+   * different from url if the request redirected.
    */
   public String getBaseUrl() {
     return base;
@@ -201,7 +202,9 @@
     this.content = content;
   }
 
-  /** The media type of the retrieved content.
+  /**
+   * The media type of the retrieved content.
+   * 
    * @see <a href="http://www.iana.org/assignments/media-types/">
    *      http://www.iana.org/assignments/media-types/</a>
    */
@@ -259,13 +262,12 @@
     }
     Options opts = new Options();
     Configuration conf = NutchConfiguration.create();
-    
-    GenericOptionsParser parser =
-      new GenericOptionsParser(conf, opts, argv);
-    
+
+    GenericOptionsParser parser = new GenericOptionsParser(conf, opts, argv);
+
     String[] remainingArgs = parser.getRemainingArgs();
     FileSystem fs = FileSystem.get(conf);
-    
+
     try {
       int recno = Integer.parseInt(remainingArgs[0]);
       String segment = remainingArgs[1];
Index: src/java/org/apache/nutch/protocol/ProtocolNotFound.java
===================================================================
--- src/java/org/apache/nutch/protocol/ProtocolNotFound.java	(revision 1188252)
+++ src/java/org/apache/nutch/protocol/ProtocolNotFound.java	(working copy)
@@ -22,7 +22,7 @@
   private String url;
 
   public ProtocolNotFound(String url) {
-    this(url, "protocol not found for url="+url);
+    this(url, "protocol not found for url=" + url);
   }
 
   public ProtocolNotFound(String url, String message) {
@@ -30,5 +30,7 @@
     this.url = url;
   }
 
-  public String getUrl() { return url; }
+  public String getUrl() {
+    return url;
+  }
 }
Index: src/java/org/apache/nutch/segment/SegmentMerger.java
===================================================================
--- src/java/org/apache/nutch/segment/SegmentMerger.java	(revision 1188252)
+++ src/java/org/apache/nutch/segment/SegmentMerger.java	(working copy)
@@ -72,40 +72,47 @@
  * <p>
  * Also, it's possible to slice the resulting segment into chunks of fixed size.
  * </p>
- * <h3>Important Notes</h3>
- * <h4>Which parts are merged?</h4>
- * <p>It doesn't make sense to merge data from segments, which are at different stages
- * of processing (e.g. one unfetched segment, one fetched but not parsed, and
- * one fetched and parsed). Therefore, prior to merging, the tool will determine
- * the lowest common set of input data, and only this data will be merged.
- * This may have some unintended consequences:
- * e.g. if majority of input segments are fetched and parsed, but one of them is unfetched,
- * the tool will fall back to just merging fetchlists, and it will skip all other data
- * from all segments.</p>
+ * <h3>Important Notes</h3> <h4>Which parts are merged?</h4>
+ * <p>
+ * It doesn't make sense to merge data from segments, which are at different
+ * stages of processing (e.g. one unfetched segment, one fetched but not parsed,
+ * and one fetched and parsed). Therefore, prior to merging, the tool will
+ * determine the lowest common set of input data, and only this data will be
+ * merged. This may have some unintended consequences: e.g. if majority of input
+ * segments are fetched and parsed, but one of them is unfetched, the tool will
+ * fall back to just merging fetchlists, and it will skip all other data from
+ * all segments.
+ * </p>
  * <h4>Merging fetchlists</h4>
- * <p>Merging segments, which contain just fetchlists (i.e. prior to fetching)
- * is not recommended, because this tool (unlike the {@link org.apache.nutch.crawl.Generator}
- * doesn't ensure that fetchlist parts for each map task are disjoint.</p>
  * <p>
+ * Merging segments, which contain just fetchlists (i.e. prior to fetching) is
+ * not recommended, because this tool (unlike the
+ * {@link org.apache.nutch.crawl.Generator} doesn't ensure that fetchlist parts
+ * for each map task are disjoint.
+ * </p>
+ * <p>
  * <h4>Duplicate content</h4>
- * Merging segments removes older content whenever possible (see below). However,
- * this is NOT the same as de-duplication, which in addition removes identical
- * content found at different URL-s. In other words, running DeleteDuplicates is
- * still necessary.
+ * Merging segments removes older content whenever possible (see below).
+ * However, this is NOT the same as de-duplication, which in addition removes
+ * identical content found at different URL-s. In other words, running
+ * DeleteDuplicates is still necessary.
  * </p>
- * <p>For some types of data (especially ParseText) it's not possible to determine
- * which version is really older. Therefore the tool always uses segment names as
- * timestamps, for all types of input data. Segment names are compared in forward lexicographic
- * order (0-9a-zA-Z), and data from segments with "higher" names will prevail.
- * It follows then that it is extremely important that segments be named in an
- * increasing lexicographic order as their creation time increases.</p>
  * <p>
+ * For some types of data (especially ParseText) it's not possible to determine
+ * which version is really older. Therefore the tool always uses segment names
+ * as timestamps, for all types of input data. Segment names are compared in
+ * forward lexicographic order (0-9a-zA-Z), and data from segments with "higher"
+ * names will prevail. It follows then that it is extremely important that
+ * segments be named in an increasing lexicographic order as their creation time
+ * increases.
+ * </p>
+ * <p>
  * <h4>Merging and indexes</h4>
  * Merged segment gets a different name. Since Indexer embeds segment names in
- * indexes, any indexes originally created for the input segments will NOT work with the
- * merged segment. Newly created merged segment(s) need to be indexed afresh.
- * This tool doesn't use existing indexes in any way, so if
- * you plan to merge segments you don't have to index them prior to merging.
+ * indexes, any indexes originally created for the input segments will NOT work
+ * with the merged segment. Newly created merged segment(s) need to be indexed
+ * afresh. This tool doesn't use existing indexes in any way, so if you plan to
+ * merge segments you don't have to index them prior to merging.
  * 
  * 
  * @author Andrzej Bialecki
@@ -113,7 +120,8 @@
 public class SegmentMerger extends Configured implements
     Mapper<Text, MetaWrapper, Text, MetaWrapper>,
     Reducer<Text, MetaWrapper, Text, MetaWrapper> {
-  private static final Logger LOG = LoggerFactory.getLogger(SegmentMerger.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(SegmentMerger.class);
 
   private static final String SEGMENT_PART_KEY = "part";
   private static final String SEGMENT_SLICE_KEY = "slice";
@@ -123,20 +131,21 @@
   private SegmentMergeFilters mergeFilters = null;
   private long sliceSize = -1;
   private long curCount = 0;
-  
+
   /**
-   * Wraps inputs in an {@link MetaWrapper}, to permit merging different
-   * types in reduce and use additional metadata.
+   * Wraps inputs in an {@link MetaWrapper}, to permit merging different types
+   * in reduce and use additional metadata.
    */
   public static class ObjectInputFormat extends
-    SequenceFileInputFormat<Text, MetaWrapper> {
-    
+      SequenceFileInputFormat<Text, MetaWrapper> {
+
     @Override
-    public RecordReader<Text, MetaWrapper> getRecordReader(final InputSplit split,
-        final JobConf job, Reporter reporter) throws IOException {
+    public RecordReader<Text, MetaWrapper> getRecordReader(
+        final InputSplit split, final JobConf job, Reporter reporter)
+        throws IOException {
 
       reporter.setStatus(split.toString());
-      
+
       // find part name
       SegmentPart segmentPart;
       final String spString;
@@ -147,10 +156,10 @@
       } catch (IOException e) {
         throw new RuntimeException("Cannot identify segment:", e);
       }
-      
-      SequenceFile.Reader reader =
-        new SequenceFile.Reader(FileSystem.get(job), fSplit.getPath(), job);
-      
+
+      SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(job),
+          fSplit.getPath(), job);
+
       final Writable w;
       try {
         w = (Writable) reader.getValueClass().newInstance();
@@ -163,13 +172,14 @@
           // ignore
         }
       }
-      final SequenceFileRecordReader<Text,Writable> splitReader =
-        new SequenceFileRecordReader<Text,Writable>(job, (FileSplit)split);
+      final SequenceFileRecordReader<Text, Writable> splitReader = new SequenceFileRecordReader<Text, Writable>(
+          job, (FileSplit) split);
 
       try {
         return new SequenceFileRecordReader<Text, MetaWrapper>(job, fSplit) {
-          
-          public synchronized boolean next(Text key, MetaWrapper wrapper) throws IOException {
+
+          public synchronized boolean next(Text key, MetaWrapper wrapper)
+              throws IOException {
             LOG.debug("Running OIF.next()");
 
             boolean res = splitReader.next(key, w);
@@ -177,17 +187,17 @@
             wrapper.setMeta(SEGMENT_PART_KEY, spString);
             return res;
           }
-          
+
           @Override
           public synchronized void close() throws IOException {
             splitReader.close();
           }
-          
+
           @Override
           public MetaWrapper createValue() {
             return new MetaWrapper();
           }
-          
+
         };
       } catch (IOException e) {
         throw new RuntimeException("Cannot create RecordReader: ", e);
@@ -195,11 +205,14 @@
     }
   }
 
-  public static class SegmentOutputFormat extends FileOutputFormat<Text, MetaWrapper> {
+  public static class SegmentOutputFormat extends
+      FileOutputFormat<Text, MetaWrapper> {
     private static final String DEFAULT_SLICE = "default";
-    
+
     @Override
-    public RecordWriter<Text, MetaWrapper> getRecordWriter(final FileSystem fs, final JobConf job, final String name, final Progressable progress) throws IOException {
+    public RecordWriter<Text, MetaWrapper> getRecordWriter(final FileSystem fs,
+        final JobConf job, final String name, final Progressable progress)
+        throws IOException {
       return new RecordWriter<Text, MetaWrapper>() {
         MapFile.Writer c_out = null;
         MapFile.Writer f_out = null;
@@ -209,24 +222,26 @@
         SequenceFile.Writer p_out = null;
         HashMap sliceWriters = new HashMap();
         String segmentName = job.get("segment.merger.segmentName");
-        
+
         public void write(Text key, MetaWrapper wrapper) throws IOException {
           // unwrap
           SegmentPart sp = SegmentPart.parse(wrapper.getMeta(SEGMENT_PART_KEY));
-          Writable o = (Writable)wrapper.get();
+          Writable o = (Writable) wrapper.get();
           String slice = wrapper.getMeta(SEGMENT_SLICE_KEY);
           if (o instanceof CrawlDatum) {
             if (sp.partName.equals(CrawlDatum.GENERATE_DIR_NAME)) {
               g_out = ensureSequenceFile(slice, CrawlDatum.GENERATE_DIR_NAME);
               g_out.append(key, o);
             } else if (sp.partName.equals(CrawlDatum.FETCH_DIR_NAME)) {
-              f_out = ensureMapFile(slice, CrawlDatum.FETCH_DIR_NAME, CrawlDatum.class);
+              f_out = ensureMapFile(slice, CrawlDatum.FETCH_DIR_NAME,
+                  CrawlDatum.class);
               f_out.append(key, o);
             } else if (sp.partName.equals(CrawlDatum.PARSE_DIR_NAME)) {
               p_out = ensureSequenceFile(slice, CrawlDatum.PARSE_DIR_NAME);
               p_out.append(key, o);
             } else {
-              throw new IOException("Cannot determine segment part: " + sp.partName);
+              throw new IOException("Cannot determine segment part: "
+                  + sp.partName);
             }
           } else if (o instanceof Content) {
             c_out = ensureMapFile(slice, Content.DIR_NAME, Content.class);
@@ -234,9 +249,11 @@
           } else if (o instanceof ParseData) {
             // update the segment name inside contentMeta - required by Indexer
             if (slice == null) {
-              ((ParseData)o).getContentMeta().set(Nutch.SEGMENT_NAME_KEY, segmentName);
+              ((ParseData) o).getContentMeta().set(Nutch.SEGMENT_NAME_KEY,
+                  segmentName);
             } else {
-              ((ParseData)o).getContentMeta().set(Nutch.SEGMENT_NAME_KEY, segmentName + "-" + slice);
+              ((ParseData) o).getContentMeta().set(Nutch.SEGMENT_NAME_KEY,
+                  segmentName + "-" + slice);
             }
             pd_out = ensureMapFile(slice, ParseData.DIR_NAME, ParseData.class);
             pd_out.append(key, o);
@@ -245,20 +262,26 @@
             pt_out.append(key, o);
           }
         }
-        
+
         // lazily create SequenceFile-s.
-        private SequenceFile.Writer ensureSequenceFile(String slice, String dirName) throws IOException {
-          if (slice == null) slice = DEFAULT_SLICE;
-          SequenceFile.Writer res = (SequenceFile.Writer)sliceWriters.get(slice + dirName);
-          if (res != null) return res;
+        private SequenceFile.Writer ensureSequenceFile(String slice,
+            String dirName) throws IOException {
+          if (slice == null)
+            slice = DEFAULT_SLICE;
+          SequenceFile.Writer res = (SequenceFile.Writer) sliceWriters
+              .get(slice + dirName);
+          if (res != null)
+            return res;
           Path wname;
           Path out = FileOutputFormat.getOutputPath(job);
           if (slice == DEFAULT_SLICE) {
-            wname = new Path(new Path(new Path(out, segmentName), dirName), name);
+            wname = new Path(new Path(new Path(out, segmentName), dirName),
+                name);
           } else {
-            wname = new Path(new Path(new Path(out, segmentName + "-" + slice), dirName), name);
+            wname = new Path(new Path(new Path(out, segmentName + "-" + slice),
+                dirName), name);
           }
-          res = SequenceFile.createWriter(fs, job, wname, Text.class, 
+          res = SequenceFile.createWriter(fs, job, wname, Text.class,
               CrawlDatum.class,
               SequenceFileOutputFormat.getOutputCompressionType(job), progress);
           sliceWriters.put(slice + dirName, res);
@@ -266,23 +289,30 @@
         }
 
         // lazily create MapFile-s.
-        private MapFile.Writer ensureMapFile(String slice, String dirName, Class<? extends Writable> clazz) throws IOException {
-          if (slice == null) slice = DEFAULT_SLICE;
-          MapFile.Writer res = (MapFile.Writer)sliceWriters.get(slice + dirName);
-          if (res != null) return res;
+        private MapFile.Writer ensureMapFile(String slice, String dirName,
+            Class<? extends Writable> clazz) throws IOException {
+          if (slice == null)
+            slice = DEFAULT_SLICE;
+          MapFile.Writer res = (MapFile.Writer) sliceWriters.get(slice
+              + dirName);
+          if (res != null)
+            return res;
           Path wname;
           Path out = FileOutputFormat.getOutputPath(job);
           if (slice == DEFAULT_SLICE) {
-            wname = new Path(new Path(new Path(out, segmentName), dirName), name);
+            wname = new Path(new Path(new Path(out, segmentName), dirName),
+                name);
           } else {
-            wname = new Path(new Path(new Path(out, segmentName + "-" + slice), dirName), name);
+            wname = new Path(new Path(new Path(out, segmentName + "-" + slice),
+                dirName), name);
           }
-          CompressionType compType = 
-              SequenceFileOutputFormat.getOutputCompressionType(job);
+          CompressionType compType = SequenceFileOutputFormat
+              .getOutputCompressionType(job);
           if (clazz.isAssignableFrom(ParseText.class)) {
             compType = CompressionType.RECORD;
           }
-          res = new MapFile.Writer(job, fs, wname.toString(), Text.class, clazz, compType, progress);
+          res = new MapFile.Writer(job, fs, wname.toString(), Text.class,
+              clazz, compType, progress);
           sliceWriters.put(slice + dirName, res);
           return res;
         }
@@ -292,9 +322,9 @@
           while (it.hasNext()) {
             Object o = it.next();
             if (o instanceof SequenceFile.Writer) {
-              ((SequenceFile.Writer)o).close();
+              ((SequenceFile.Writer) o).close();
             } else {
-              ((MapFile.Writer)o).close();
+              ((MapFile.Writer) o).close();
             }
           }
         }
@@ -305,14 +335,15 @@
   public SegmentMerger() {
     super(null);
   }
-  
+
   public SegmentMerger(Configuration conf) {
     super(conf);
   }
-  
+
   public void setConf(Configuration conf) {
     super.setConf(conf);
-    if (conf == null) return;
+    if (conf == null)
+      return;
     if (conf.getBoolean("segment.merger.filter", false)) {
       filters = new URLFilters(conf);
       mergeFilters = new SegmentMergeFilters(conf);
@@ -334,15 +365,18 @@
       sliceSize = sliceSize / conf.getNumReduceTasks();
     }
   }
-  
+
   private Text newKey = new Text();
-  
+
   public void map(Text key, MetaWrapper value,
-      OutputCollector<Text, MetaWrapper> output, Reporter reporter) throws IOException {
+      OutputCollector<Text, MetaWrapper> output, Reporter reporter)
+      throws IOException {
     String url = key.toString();
     if (normalizers != null) {
       try {
-        url = normalizers.normalize(url, URLNormalizers.SCOPE_DEFAULT); // normalize the url
+        url = normalizers.normalize(url, URLNormalizers.SCOPE_DEFAULT); // normalize
+                                                                        // the
+                                                                        // url
       } catch (Exception e) {
         LOG.warn("Skipping " + url + ":" + e.getMessage());
         url = null;
@@ -356,7 +390,7 @@
         url = null;
       }
     }
-    if(url != null) {
+    if (url != null) {
       newKey.set(url);
       output.collect(newKey, value);
     }
@@ -364,12 +398,13 @@
 
   /**
    * NOTE: in selecting the latest version we rely exclusively on the segment
-   * name (not all segment data contain time information). Therefore it is extremely
-   * important that segments be named in an increasing lexicographic order as
-   * their creation time increases.
+   * name (not all segment data contain time information). Therefore it is
+   * extremely important that segments be named in an increasing lexicographic
+   * order as their creation time increases.
    */
   public void reduce(Text key, Iterator<MetaWrapper> values,
-      OutputCollector<Text, MetaWrapper> output, Reporter reporter) throws IOException {
+      OutputCollector<Text, MetaWrapper> output, Reporter reporter)
+      throws IOException {
     CrawlDatum lastG = null;
     CrawlDatum lastF = null;
     CrawlDatum lastSig = null;
@@ -382,18 +417,17 @@
     String lastCname = null;
     String lastPDname = null;
     String lastPTname = null;
-    TreeMap<String, ArrayList<CrawlDatum>> linked =
-      new TreeMap<String, ArrayList<CrawlDatum>>();
+    TreeMap<String, ArrayList<CrawlDatum>> linked = new TreeMap<String, ArrayList<CrawlDatum>>();
     while (values.hasNext()) {
       MetaWrapper wrapper = values.next();
       Object o = wrapper.get();
       String spString = wrapper.getMeta(SEGMENT_PART_KEY);
       if (spString == null) {
-        throw new IOException("Null segment part, key=" + key);        
+        throw new IOException("Null segment part, key=" + key);
       }
       SegmentPart sp = SegmentPart.parse(spString);
       if (o instanceof CrawlDatum) {
-        CrawlDatum val = (CrawlDatum)o;
+        CrawlDatum val = (CrawlDatum) o;
         // check which output dir it belongs to
         if (sp.partName.equals(CrawlDatum.GENERATE_DIR_NAME)) {
           if (lastG == null) {
@@ -443,43 +477,43 @@
         }
       } else if (o instanceof Content) {
         if (lastC == null) {
-          lastC = (Content)o;
+          lastC = (Content) o;
           lastCname = sp.segmentName;
         } else {
           if (lastCname.compareTo(sp.segmentName) < 0) {
-            lastC = (Content)o;
+            lastC = (Content) o;
             lastCname = sp.segmentName;
           }
         }
       } else if (o instanceof ParseData) {
         if (lastPD == null) {
-          lastPD = (ParseData)o;
+          lastPD = (ParseData) o;
           lastPDname = sp.segmentName;
         } else {
           if (lastPDname.compareTo(sp.segmentName) < 0) {
-            lastPD = (ParseData)o;
+            lastPD = (ParseData) o;
             lastPDname = sp.segmentName;
           }
         }
       } else if (o instanceof ParseText) {
         if (lastPT == null) {
-          lastPT = (ParseText)o;
+          lastPT = (ParseText) o;
           lastPTname = sp.segmentName;
         } else {
           if (lastPTname.compareTo(sp.segmentName) < 0) {
-            lastPT = (ParseText)o;
+            lastPT = (ParseText) o;
             lastPTname = sp.segmentName;
           }
         }
       }
     }
-	// perform filtering based on full merge record
-    if (mergeFilters != null && 
-    	 !mergeFilters.filter(key, lastG, lastF, lastSig, lastC, lastPD, lastPT, 
-    			 			   linked.isEmpty() ? null : linked.lastEntry().getValue())){
+    // perform filtering based on full merge record
+    if (mergeFilters != null
+        && !mergeFilters.filter(key, lastG, lastF, lastSig, lastC, lastPD,
+            lastPT, linked.isEmpty() ? null : linked.lastEntry().getValue())) {
       return;
     }
-    	
+
     curCount++;
     String sliceName = null;
     MetaWrapper wrapper = new MetaWrapper();
@@ -545,10 +579,12 @@
     }
   }
 
-  public void merge(Path out, Path[] segs, boolean filter, boolean normalize, long slice) throws Exception {
+  public void merge(Path out, Path[] segs, boolean filter, boolean normalize,
+      long slice) throws Exception {
     String segmentName = Generator.generateSegmentName();
     if (LOG.isInfoEnabled()) {
-      LOG.info("Merging " + segs.length + " segments to " + out + "/" + segmentName);
+      LOG.info("Merging " + segs.length + " segments to " + out + "/"
+          + segmentName);
     }
     JobConf job = new NutchJob(getConf());
     job.setJobName("mergesegs " + out + "/" + segmentName);
@@ -589,17 +625,24 @@
       pt = pt && fs.exists(ptDir);
     }
     StringBuffer sb = new StringBuffer();
-    if (c) sb.append(" " + Content.DIR_NAME);
-    if (g) sb.append(" " + CrawlDatum.GENERATE_DIR_NAME);
-    if (f) sb.append(" " + CrawlDatum.FETCH_DIR_NAME);
-    if (p) sb.append(" " + CrawlDatum.PARSE_DIR_NAME);
-    if (pd) sb.append(" " + ParseData.DIR_NAME);
-    if (pt) sb.append(" " + ParseText.DIR_NAME);
+    if (c)
+      sb.append(" " + Content.DIR_NAME);
+    if (g)
+      sb.append(" " + CrawlDatum.GENERATE_DIR_NAME);
+    if (f)
+      sb.append(" " + CrawlDatum.FETCH_DIR_NAME);
+    if (p)
+      sb.append(" " + CrawlDatum.PARSE_DIR_NAME);
+    if (pd)
+      sb.append(" " + ParseData.DIR_NAME);
+    if (pt)
+      sb.append(" " + ParseText.DIR_NAME);
     if (LOG.isInfoEnabled()) {
       LOG.info("SegmentMerger: using segment data from:" + sb.toString());
     }
     for (int i = 0; i < segs.length; i++) {
-      if (segs[i] == null) continue;
+      if (segs[i] == null)
+        continue;
       if (g) {
         Path gDir = new Path(segs[i], CrawlDatum.GENERATE_DIR_NAME);
         FileInputFormat.addInputPath(job, gDir);
@@ -632,9 +675,9 @@
     job.setOutputKeyClass(Text.class);
     job.setOutputValueClass(MetaWrapper.class);
     job.setOutputFormat(SegmentOutputFormat.class);
-    
+
     setConf(job);
-    
+
     JobClient.runJob(job);
   }
 
@@ -643,12 +686,17 @@
    */
   public static void main(String[] args) throws Exception {
     if (args.length < 2) {
-      System.err.println("SegmentMerger output_dir (-dir segments | seg1 seg2 ...) [-filter] [-slice NNNN]");
-      System.err.println("\toutput_dir\tname of the parent dir for output segment slice(s)");
-      System.err.println("\t-dir segments\tparent dir containing several segments");
+      System.err
+          .println("SegmentMerger output_dir (-dir segments | seg1 seg2 ...) [-filter] [-slice NNNN]");
+      System.err
+          .println("\toutput_dir\tname of the parent dir for output segment slice(s)");
+      System.err
+          .println("\t-dir segments\tparent dir containing several segments");
       System.err.println("\tseg1 seg2 ...\tlist of segment dirs");
-      System.err.println("\t-filter\t\tfilter out URL-s prohibited by current URLFilters");
-      System.err.println("\t-slice NNNN\tcreate many output segments, each containing NNNN URLs");
+      System.err
+          .println("\t-filter\t\tfilter out URL-s prohibited by current URLFilters");
+      System.err
+          .println("\t-slice NNNN\tcreate many output segments, each containing NNNN URLs");
       return;
     }
     Configuration conf = NutchConfiguration.create();
@@ -680,7 +728,8 @@
       return;
     }
     SegmentMerger merger = new SegmentMerger(conf);
-    merger.merge(out, segs.toArray(new Path[segs.size()]), filter, normalize, sliceSize);
+    merger.merge(out, segs.toArray(new Path[segs.size()]), filter, normalize,
+        sliceSize);
   }
 
 }
Index: src/java/org/apache/nutch/segment/SegmentReader.java
===================================================================
--- src/java/org/apache/nutch/segment/SegmentReader.java	(revision 1188252)
+++ src/java/org/apache/nutch/segment/SegmentReader.java	(working copy)
@@ -77,7 +77,7 @@
   public static final Logger LOG = LoggerFactory.getLogger(SegmentReader.class);
 
   long recNo = 0L;
-  
+
   private boolean co, fe, ge, pa, pd, pt;
   private FileSystem fs;
 
@@ -86,32 +86,37 @@
     private Text newKey = new Text();
 
     public void map(WritableComparable key, Writable value,
-        OutputCollector<Text, NutchWritable> collector, Reporter reporter) throws IOException {
+        OutputCollector<Text, NutchWritable> collector, Reporter reporter)
+        throws IOException {
       // convert on the fly from old formats with UTF8 keys
       if (key instanceof UTF8) {
         newKey.set(key.toString());
         key = newKey;
       }
-      collector.collect((Text)key, new NutchWritable(value));
+      collector.collect((Text) key, new NutchWritable(value));
     }
-    
+
   }
 
   /** Implements a text output format */
   public static class TextOutputFormat extends
       FileOutputFormat<WritableComparable, Writable> {
     public RecordWriter<WritableComparable, Writable> getRecordWriter(
-        final FileSystem fs, JobConf job,
-        String name, final Progressable progress) throws IOException {
+        final FileSystem fs, JobConf job, String name,
+        final Progressable progress) throws IOException {
 
-      final Path segmentDumpFile = new Path(FileOutputFormat.getOutputPath(job), name);
+      final Path segmentDumpFile = new Path(
+          FileOutputFormat.getOutputPath(job), name);
 
       // Get the old copy out of the way
-      if (fs.exists(segmentDumpFile)) fs.delete(segmentDumpFile, true);
+      if (fs.exists(segmentDumpFile))
+        fs.delete(segmentDumpFile, true);
 
-      final PrintStream printStream = new PrintStream(fs.create(segmentDumpFile));
+      final PrintStream printStream = new PrintStream(
+          fs.create(segmentDumpFile));
       return new RecordWriter<WritableComparable, Writable>() {
-        public synchronized void write(WritableComparable key, Writable value) throws IOException {
+        public synchronized void write(WritableComparable key, Writable value)
+            throws IOException {
           printStream.println(value);
         }
 
@@ -125,9 +130,9 @@
   public SegmentReader() {
     super(null);
   }
-  
-  public SegmentReader(Configuration conf, boolean co, boolean fe, boolean ge, boolean pa,
-          boolean pd, boolean pt) {
+
+  public SegmentReader(Configuration conf, boolean co, boolean fe, boolean ge,
+      boolean pa, boolean pd, boolean pt) {
     super(conf);
     this.co = co;
     this.fe = fe;
@@ -167,12 +172,12 @@
     job.setBoolean("segment.reader.pt", this.pt);
     return job;
   }
-  
-  public void close() {}
 
+  public void close() {
+  }
+
   public void reduce(Text key, Iterator<NutchWritable> values,
-      OutputCollector<Text, Text> output, Reporter reporter)
-          throws IOException {
+      OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
     StringBuffer dump = new StringBuffer();
 
     dump.append("\nRecno:: ").append(recNo++).append("\n");
@@ -195,7 +200,7 @@
   }
 
   public void dump(Path segment, Path output) throws IOException {
-    
+
     if (LOG.isInfoEnabled()) {
       LOG.info("SegmentReader: dump segment: " + segment);
     }
@@ -203,20 +208,30 @@
     JobConf job = createJobConf();
     job.setJobName("read " + segment);
 
-    if (ge) FileInputFormat.addInputPath(job, new Path(segment, CrawlDatum.GENERATE_DIR_NAME));
-    if (fe) FileInputFormat.addInputPath(job, new Path(segment, CrawlDatum.FETCH_DIR_NAME));
-    if (pa) FileInputFormat.addInputPath(job, new Path(segment, CrawlDatum.PARSE_DIR_NAME));
-    if (co) FileInputFormat.addInputPath(job, new Path(segment, Content.DIR_NAME));
-    if (pd) FileInputFormat.addInputPath(job, new Path(segment, ParseData.DIR_NAME));
-    if (pt) FileInputFormat.addInputPath(job, new Path(segment, ParseText.DIR_NAME));
+    if (ge)
+      FileInputFormat.addInputPath(job, new Path(segment,
+          CrawlDatum.GENERATE_DIR_NAME));
+    if (fe)
+      FileInputFormat.addInputPath(job, new Path(segment,
+          CrawlDatum.FETCH_DIR_NAME));
+    if (pa)
+      FileInputFormat.addInputPath(job, new Path(segment,
+          CrawlDatum.PARSE_DIR_NAME));
+    if (co)
+      FileInputFormat.addInputPath(job, new Path(segment, Content.DIR_NAME));
+    if (pd)
+      FileInputFormat.addInputPath(job, new Path(segment, ParseData.DIR_NAME));
+    if (pt)
+      FileInputFormat.addInputPath(job, new Path(segment, ParseText.DIR_NAME));
 
     job.setInputFormat(SequenceFileInputFormat.class);
     job.setMapperClass(InputCompatMapper.class);
     job.setReducerClass(SegmentReader.class);
 
-    Path tempDir = new Path(job.get("hadoop.tmp.dir", "/tmp") + "/segread-" + new java.util.Random().nextInt());
+    Path tempDir = new Path(job.get("hadoop.tmp.dir", "/tmp") + "/segread-"
+        + new java.util.Random().nextInt());
     fs.delete(tempDir, true);
-    
+
     FileOutputFormat.setOutputPath(job, tempDir);
     job.setOutputFormat(TextOutputFormat.class);
     job.setOutputKeyClass(Text.class);
@@ -229,22 +244,25 @@
 
     // remove the old file
     fs.delete(dumpFile, true);
-    FileStatus[] fstats = fs.listStatus(tempDir, HadoopFSUtil.getPassAllFilter());
+    FileStatus[] fstats = fs.listStatus(tempDir,
+        HadoopFSUtil.getPassAllFilter());
     Path[] files = HadoopFSUtil.getPaths(fstats);
 
     PrintWriter writer = null;
     int currentRecordNumber = 0;
     if (files.length > 0) {
-      writer = new PrintWriter(new BufferedWriter(new OutputStreamWriter(fs.create(dumpFile))));
+      writer = new PrintWriter(new BufferedWriter(new OutputStreamWriter(
+          fs.create(dumpFile))));
       try {
         for (int i = 0; i < files.length; i++) {
           Path partFile = (Path) files[i];
           try {
-            currentRecordNumber = append(fs, job, partFile, writer, currentRecordNumber);
+            currentRecordNumber = append(fs, job, partFile, writer,
+                currentRecordNumber);
           } catch (IOException exception) {
             if (LOG.isWarnEnabled()) {
-              LOG.warn("Couldn't copy the content of " + partFile.toString() +
-                       " into " + dumpFile.toString());
+              LOG.warn("Couldn't copy the content of " + partFile.toString()
+                  + " into " + dumpFile.toString());
               LOG.warn(exception.getMessage());
             }
           }
@@ -254,13 +272,16 @@
       }
     }
     fs.delete(tempDir);
-    if (LOG.isInfoEnabled()) { LOG.info("SegmentReader: done"); }
+    if (LOG.isInfoEnabled()) {
+      LOG.info("SegmentReader: done");
+    }
   }
 
   /** Appends two files and updates the Recno counter */
-  private int append(FileSystem fs, Configuration conf, Path src, PrintWriter writer, int currentRecordNumber)
-          throws IOException {
-    BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(src)));
+  private int append(FileSystem fs, Configuration conf, Path src,
+      PrintWriter writer, int currentRecordNumber) throws IOException {
+    BufferedReader reader = new BufferedReader(new InputStreamReader(
+        fs.open(src)));
     try {
       String line = reader.readLine();
       while (line != null) {
@@ -277,89 +298,101 @@
   }
 
   private static final String[][] keys = new String[][] {
-          {"co", "Content::\n"},
-          {"ge", "Crawl Generate::\n"},
-          {"fe", "Crawl Fetch::\n"},
-          {"pa", "Crawl Parse::\n"},
-          {"pd", "ParseData::\n"},
-          {"pt", "ParseText::\n"}
-  };
+      { "co", "Content::\n" }, { "ge", "Crawl Generate::\n" },
+      { "fe", "Crawl Fetch::\n" }, { "pa", "Crawl Parse::\n" },
+      { "pd", "ParseData::\n" }, { "pt", "ParseText::\n" } };
 
   public void get(final Path segment, final Text key, Writer writer,
-          final Map<String, List<Writable>> results) throws Exception {
+      final Map<String, List<Writable>> results) throws Exception {
     LOG.info("SegmentReader: get '" + key + "'");
     ArrayList<Thread> threads = new ArrayList<Thread>();
-    if (co) threads.add(new Thread() {
-      public void run() {
-        try {
-          List<Writable> res = getMapRecords(new Path(segment, Content.DIR_NAME), key);
-          results.put("co", res);
-        } catch (Exception e) {
-          e.printStackTrace(LogUtil.getWarnStream(LOG));
+    if (co)
+      threads.add(new Thread() {
+        public void run() {
+          try {
+            List<Writable> res = getMapRecords(new Path(segment,
+                Content.DIR_NAME), key);
+            results.put("co", res);
+          } catch (Exception e) {
+            e.printStackTrace(LogUtil.getWarnStream(LOG));
+          }
         }
-      }
-    });
-    if (fe) threads.add(new Thread() {
-      public void run() {
-        try {
-          List<Writable> res = getMapRecords(new Path(segment, CrawlDatum.FETCH_DIR_NAME), key);
-          results.put("fe", res);
-        } catch (Exception e) {
-          e.printStackTrace(LogUtil.getWarnStream(LOG));
+      });
+    if (fe)
+      threads.add(new Thread() {
+        public void run() {
+          try {
+            List<Writable> res = getMapRecords(new Path(segment,
+                CrawlDatum.FETCH_DIR_NAME), key);
+            results.put("fe", res);
+          } catch (Exception e) {
+            e.printStackTrace(LogUtil.getWarnStream(LOG));
+          }
         }
-      }
-    });
-    if (ge) threads.add(new Thread() {
-      public void run() {
-        try {
-          List<Writable> res = getSeqRecords(new Path(segment, CrawlDatum.GENERATE_DIR_NAME), key);
-          results.put("ge", res);
-        } catch (Exception e) {
-          e.printStackTrace(LogUtil.getWarnStream(LOG));
+      });
+    if (ge)
+      threads.add(new Thread() {
+        public void run() {
+          try {
+            List<Writable> res = getSeqRecords(new Path(segment,
+                CrawlDatum.GENERATE_DIR_NAME), key);
+            results.put("ge", res);
+          } catch (Exception e) {
+            e.printStackTrace(LogUtil.getWarnStream(LOG));
+          }
         }
-      }
-    });
-    if (pa) threads.add(new Thread() {
-      public void run() {
-        try {
-          List<Writable> res = getSeqRecords(new Path(segment, CrawlDatum.PARSE_DIR_NAME), key);
-          results.put("pa", res);
-        } catch (Exception e) {
-          e.printStackTrace(LogUtil.getWarnStream(LOG));
+      });
+    if (pa)
+      threads.add(new Thread() {
+        public void run() {
+          try {
+            List<Writable> res = getSeqRecords(new Path(segment,
+                CrawlDatum.PARSE_DIR_NAME), key);
+            results.put("pa", res);
+          } catch (Exception e) {
+            e.printStackTrace(LogUtil.getWarnStream(LOG));
+          }
         }
-      }
-    });
-    if (pd) threads.add(new Thread() {
-      public void run() {
-        try {
-          List<Writable> res = getMapRecords(new Path(segment, ParseData.DIR_NAME), key);
-          results.put("pd", res);
-        } catch (Exception e) {
-          e.printStackTrace(LogUtil.getWarnStream(LOG));
+      });
+    if (pd)
+      threads.add(new Thread() {
+        public void run() {
+          try {
+            List<Writable> res = getMapRecords(new Path(segment,
+                ParseData.DIR_NAME), key);
+            results.put("pd", res);
+          } catch (Exception e) {
+            e.printStackTrace(LogUtil.getWarnStream(LOG));
+          }
         }
-      }
-    });
-    if (pt) threads.add(new Thread() {
-      public void run() {
-        try {
-          List<Writable> res = getMapRecords(new Path(segment, ParseText.DIR_NAME), key);
-          results.put("pt", res);
-        } catch (Exception e) {
-          e.printStackTrace(LogUtil.getWarnStream(LOG));
+      });
+    if (pt)
+      threads.add(new Thread() {
+        public void run() {
+          try {
+            List<Writable> res = getMapRecords(new Path(segment,
+                ParseText.DIR_NAME), key);
+            results.put("pt", res);
+          } catch (Exception e) {
+            e.printStackTrace(LogUtil.getWarnStream(LOG));
+          }
         }
-      }
-    });
+      });
     Iterator<Thread> it = threads.iterator();
-    while (it.hasNext()) it.next().start();
+    while (it.hasNext())
+      it.next().start();
     int cnt;
     do {
       cnt = 0;
       try {
         Thread.sleep(5000);
-      } catch (Exception e) {};
+      } catch (Exception e) {
+      }
+      ;
       it = threads.iterator();
       while (it.hasNext()) {
-        if (it.next().isAlive()) cnt++;
+        if (it.next().isAlive())
+          cnt++;
       }
       if ((cnt > 0) && (LOG.isDebugEnabled())) {
         LOG.debug("(" + cnt + " to retrieve)");
@@ -376,15 +409,16 @@
       writer.flush();
     }
   }
-  
+
   private List<Writable> getMapRecords(Path dir, Text key) throws Exception {
-    MapFile.Reader[] readers = MapFileOutputFormat.getReaders(fs, dir, getConf());
+    MapFile.Reader[] readers = MapFileOutputFormat.getReaders(fs, dir,
+        getConf());
     ArrayList<Writable> res = new ArrayList<Writable>();
     Class keyClass = readers[0].getKeyClass();
     Class valueClass = readers[0].getValueClass();
     if (!keyClass.getName().equals("org.apache.hadoop.io.Text"))
       throw new IOException("Incompatible key (" + keyClass.getName() + ")");
-    Writable value = (Writable)valueClass.newInstance();
+    Writable value = (Writable) valueClass.newInstance();
     // we don't know the partitioning schema
     for (int i = 0; i < readers.length; i++) {
       if (readers[i].get(key, value) != null)
@@ -395,14 +429,15 @@
   }
 
   private List<Writable> getSeqRecords(Path dir, Text key) throws Exception {
-    SequenceFile.Reader[] readers = SequenceFileOutputFormat.getReaders(getConf(), dir);
+    SequenceFile.Reader[] readers = SequenceFileOutputFormat.getReaders(
+        getConf(), dir);
     ArrayList<Writable> res = new ArrayList<Writable>();
     Class keyClass = readers[0].getKeyClass();
     Class valueClass = readers[0].getValueClass();
     if (!keyClass.getName().equals("org.apache.hadoop.io.Text"))
       throw new IOException("Incompatible key (" + keyClass.getName() + ")");
-    Writable aKey = (Writable)keyClass.newInstance();
-    Writable value = (Writable)valueClass.newInstance();
+    Writable aKey = (Writable) keyClass.newInstance();
+    Writable value = (Writable) valueClass.newInstance();
     for (int i = 0; i < readers.length; i++) {
       while (readers[i].next(aKey, value)) {
         if (aKey.equals(key))
@@ -422,41 +457,55 @@
     public long parsed = -1L;
     public long parseErrors = -1L;
   }
-  
+
   SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");
-  
+
   public void list(List<Path> dirs, Writer writer) throws Exception {
-    writer.write("NAME\t\tGENERATED\tFETCHER START\t\tFETCHER END\t\tFETCHED\tPARSED\n");
+    writer
+        .write("NAME\t\tGENERATED\tFETCHER START\t\tFETCHER END\t\tFETCHED\tPARSED\n");
     for (int i = 0; i < dirs.size(); i++) {
       Path dir = dirs.get(i);
       SegmentReaderStats stats = new SegmentReaderStats();
       getStats(dir, stats);
       writer.write(dir.getName() + "\t");
-      if (stats.generated == -1) writer.write("?");
-      else writer.write(stats.generated + "");
+      if (stats.generated == -1)
+        writer.write("?");
+      else
+        writer.write(stats.generated + "");
       writer.write("\t\t");
-      if (stats.start == -1) writer.write("?\t");
-      else writer.write(sdf.format(new Date(stats.start)));
+      if (stats.start == -1)
+        writer.write("?\t");
+      else
+        writer.write(sdf.format(new Date(stats.start)));
       writer.write("\t");
-      if (stats.end == -1) writer.write("?");
-      else writer.write(sdf.format(new Date(stats.end)));
+      if (stats.end == -1)
+        writer.write("?");
+      else
+        writer.write(sdf.format(new Date(stats.end)));
       writer.write("\t");
-      if (stats.fetched == -1) writer.write("?");
-      else writer.write(stats.fetched + "");
+      if (stats.fetched == -1)
+        writer.write("?");
+      else
+        writer.write(stats.fetched + "");
       writer.write("\t");
-      if (stats.parsed == -1) writer.write("?");
-      else writer.write(stats.parsed + "");
+      if (stats.parsed == -1)
+        writer.write("?");
+      else
+        writer.write(stats.parsed + "");
       writer.write("\n");
       writer.flush();
     }
   }
-  
-  public void getStats(Path segment, final SegmentReaderStats stats) throws Exception {
-    SequenceFile.Reader[] readers = SequenceFileOutputFormat.getReaders(getConf(), new Path(segment, CrawlDatum.GENERATE_DIR_NAME));
+
+  public void getStats(Path segment, final SegmentReaderStats stats)
+      throws Exception {
+    SequenceFile.Reader[] readers = SequenceFileOutputFormat.getReaders(
+        getConf(), new Path(segment, CrawlDatum.GENERATE_DIR_NAME));
     long cnt = 0L;
     Text key = new Text();
     for (int i = 0; i < readers.length; i++) {
-      while (readers[i].next(key)) cnt++;
+      while (readers[i].next(key))
+        cnt++;
       readers[i].close();
     }
     stats.generated = cnt;
@@ -466,12 +515,15 @@
       long start = Long.MAX_VALUE;
       long end = Long.MIN_VALUE;
       CrawlDatum value = new CrawlDatum();
-      MapFile.Reader[] mreaders = MapFileOutputFormat.getReaders(fs, fetchDir, getConf());
+      MapFile.Reader[] mreaders = MapFileOutputFormat.getReaders(fs, fetchDir,
+          getConf());
       for (int i = 0; i < mreaders.length; i++) {
         while (mreaders[i].next(key, value)) {
           cnt++;
-          if (value.getFetchTime() < start) start = value.getFetchTime();
-          if (value.getFetchTime() > end) end = value.getFetchTime();
+          if (value.getFetchTime() < start)
+            start = value.getFetchTime();
+          if (value.getFetchTime() > end)
+            end = value.getFetchTime();
         }
         mreaders[i].close();
       }
@@ -484,11 +536,13 @@
       cnt = 0L;
       long errors = 0L;
       ParseData value = new ParseData();
-      MapFile.Reader[] mreaders = MapFileOutputFormat.getReaders(fs, parseDir, getConf());
+      MapFile.Reader[] mreaders = MapFileOutputFormat.getReaders(fs, parseDir,
+          getConf());
       for (int i = 0; i < mreaders.length; i++) {
         while (mreaders[i].next(key, value)) {
           cnt++;
-          if (!value.getStatus().isSuccess()) errors++;
+          if (!value.getStatus().isSuccess())
+            errors++;
         }
         mreaders[i].close();
       }
@@ -496,7 +550,7 @@
       stats.parseErrors = errors;
     }
   }
-  
+
   private static final int MODE_DUMP = 0;
 
   private static final int MODE_LIST = 1;
@@ -513,7 +567,8 @@
       mode = MODE_DUMP;
     else if (args[0].equals("-list"))
       mode = MODE_LIST;
-    else if (args[0].equals("-get")) mode = MODE_GET;
+    else if (args[0].equals("-get"))
+      mode = MODE_GET;
 
     boolean co = true;
     boolean fe = true;
@@ -545,63 +600,69 @@
     }
     Configuration conf = NutchConfiguration.create();
     final FileSystem fs = FileSystem.get(conf);
-    SegmentReader segmentReader = new SegmentReader(conf, co, fe, ge, pa, pd, pt);
+    SegmentReader segmentReader = new SegmentReader(conf, co, fe, ge, pa, pd,
+        pt);
     // collect required args
     switch (mode) {
-      case MODE_DUMP:
-        String input = args[1];
-        if (input == null) {
-          System.err.println("Missing required argument: <segment_dir>");
-          usage();
-          return;
-        }
-        String output = args.length > 2 ? args[2] : null;
-        if (output == null) {
-          System.err.println("Missing required argument: <output>");
-          usage();
-          return;
-        }
-        segmentReader.dump(new Path(input), new Path(output));
+    case MODE_DUMP:
+      String input = args[1];
+      if (input == null) {
+        System.err.println("Missing required argument: <segment_dir>");
+        usage();
         return;
-      case MODE_LIST:
-        ArrayList<Path> dirs = new ArrayList<Path>();
-        for (int i = 1; i < args.length; i++) {
-          if (args[i] == null) continue;
-          if (args[i].equals("-dir")) {
-            Path dir = new Path(args[++i]);
-            FileStatus[] fstats = fs.listStatus(dir, HadoopFSUtil.getPassDirectoriesFilter(fs));
-            Path[] files = HadoopFSUtil.getPaths(fstats);
-            if (files != null && files.length > 0) {
-              dirs.addAll(Arrays.asList(files));
-            }
-          } else dirs.add(new Path(args[i]));
-        }
-        segmentReader.list(dirs, new OutputStreamWriter(System.out, "UTF-8"));
+      }
+      String output = args.length > 2 ? args[2] : null;
+      if (output == null) {
+        System.err.println("Missing required argument: <output>");
+        usage();
         return;
-      case MODE_GET:
-        input = args[1];
-        if (input == null) {
-          System.err.println("Missing required argument: <segment_dir>");
-          usage();
-          return;
-        }
-        String key = args.length > 2 ? args[2] : null;
-        if (key == null) {
-          System.err.println("Missing required argument: <keyValue>");
-          usage();
-          return;
-        }
-        segmentReader.get(new Path(input), new Text(key), new OutputStreamWriter(System.out, "UTF-8"), new HashMap<String, List<Writable>>());
+      }
+      segmentReader.dump(new Path(input), new Path(output));
+      return;
+    case MODE_LIST:
+      ArrayList<Path> dirs = new ArrayList<Path>();
+      for (int i = 1; i < args.length; i++) {
+        if (args[i] == null)
+          continue;
+        if (args[i].equals("-dir")) {
+          Path dir = new Path(args[++i]);
+          FileStatus[] fstats = fs.listStatus(dir,
+              HadoopFSUtil.getPassDirectoriesFilter(fs));
+          Path[] files = HadoopFSUtil.getPaths(fstats);
+          if (files != null && files.length > 0) {
+            dirs.addAll(Arrays.asList(files));
+          }
+        } else
+          dirs.add(new Path(args[i]));
+      }
+      segmentReader.list(dirs, new OutputStreamWriter(System.out, "UTF-8"));
+      return;
+    case MODE_GET:
+      input = args[1];
+      if (input == null) {
+        System.err.println("Missing required argument: <segment_dir>");
+        usage();
         return;
-      default:
-        System.err.println("Invalid operation: " + args[0]);
+      }
+      String key = args.length > 2 ? args[2] : null;
+      if (key == null) {
+        System.err.println("Missing required argument: <keyValue>");
         usage();
         return;
+      }
+      segmentReader.get(new Path(input), new Text(key), new OutputStreamWriter(
+          System.out, "UTF-8"), new HashMap<String, List<Writable>>());
+      return;
+    default:
+      System.err.println("Invalid operation: " + args[0]);
+      usage();
+      return;
     }
   }
 
   private static void usage() {
-    System.err.println("Usage: SegmentReader (-dump ... | -list ... | -get ...) [general options]\n");
+    System.err
+        .println("Usage: SegmentReader (-dump ... | -list ... | -get ...) [general options]\n");
     System.err.println("* General options:");
     System.err.println("\t-nocontent\tignore content directory");
     System.err.println("\t-nofetch\tignore crawl_fetch directory");
@@ -610,21 +671,32 @@
     System.err.println("\t-noparsedata\tignore parse_data directory");
     System.err.println("\t-noparsetext\tignore parse_text directory");
     System.err.println();
-    System.err.println("* SegmentReader -dump <segment_dir> <output> [general options]");
-    System.err.println("  Dumps content of a <segment_dir> as a text file to <output>.\n");
+    System.err
+        .println("* SegmentReader -dump <segment_dir> <output> [general options]");
+    System.err
+        .println("  Dumps content of a <segment_dir> as a text file to <output>.\n");
     System.err.println("\t<segment_dir>\tname of the segment directory.");
-    System.err.println("\t<output>\tname of the (non-existent) output directory.");
+    System.err
+        .println("\t<output>\tname of the (non-existent) output directory.");
     System.err.println();
-    System.err.println("* SegmentReader -list (<segment_dir1> ... | -dir <segments>) [general options]");
-    System.err.println("  List a synopsis of segments in specified directories, or all segments in");
-    System.err.println("  a directory <segments>, and print it on System.out\n");
-    System.err.println("\t<segment_dir1> ...\tlist of segment directories to process");
-    System.err.println("\t-dir <segments>\t\tdirectory that contains multiple segments");
+    System.err
+        .println("* SegmentReader -list (<segment_dir1> ... | -dir <segments>) [general options]");
+    System.err
+        .println("  List a synopsis of segments in specified directories, or all segments in");
+    System.err
+        .println("  a directory <segments>, and print it on System.out\n");
+    System.err
+        .println("\t<segment_dir1> ...\tlist of segment directories to process");
+    System.err
+        .println("\t-dir <segments>\t\tdirectory that contains multiple segments");
     System.err.println();
-    System.err.println("* SegmentReader -get <segment_dir> <keyValue> [general options]");
-    System.err.println("  Get a specified record from a segment, and print it on System.out.\n");
+    System.err
+        .println("* SegmentReader -get <segment_dir> <keyValue> [general options]");
+    System.err
+        .println("  Get a specified record from a segment, and print it on System.out.\n");
     System.err.println("\t<segment_dir>\tname of the segment directory.");
     System.err.println("\t<keyValue>\tvalue of the key (url).");
-    System.err.println("\t\tNote: put double-quotes around strings with spaces.");
+    System.err
+        .println("\t\tNote: put double-quotes around strings with spaces.");
   }
 }
Index: src/java/org/apache/nutch/segment/SegmentPart.java
===================================================================
--- src/java/org/apache/nutch/segment/SegmentPart.java	(revision 1188252)
+++ src/java/org/apache/nutch/segment/SegmentPart.java	(working copy)
@@ -30,16 +30,16 @@
   public String segmentName;
   /** Name of the segment part (ie. one of subdirectories inside a segment). */
   public String partName;
-  
+
   public SegmentPart() {
-    
+
   }
-  
+
   public SegmentPart(String segmentName, String partName) {
     this.segmentName = segmentName;
     this.partName = partName;
   }
-  
+
   /**
    * Return a String representation of this class, in the form
    * "segmentName/partName".
@@ -47,23 +47,27 @@
   public String toString() {
     return segmentName + "/" + partName;
   }
-  
+
   /**
    * Create SegmentPart from a FileSplit.
+   * 
    * @param split
-   * @return A {@link SegmentPart} resultant from a 
-   * {@link FileSplit}.
+   * @return A {@link SegmentPart} resultant from a {@link FileSplit}.
    * @throws Exception
    */
   public static SegmentPart get(FileSplit split) throws IOException {
     return get(split.getPath().toString());
   }
-  
+
   /**
    * Create SegmentPart from a full path of a location inside any segment part.
-   * @param path full path into a segment part (may include "part-xxxxx" components)
+   * 
+   * @param path
+   *          full path into a segment part (may include "part-xxxxx"
+   *          components)
    * @return SegmentPart instance describing this part.
-   * @throws IOException if any required path components are missing.
+   * @throws IOException
+   *           if any required path components are missing.
    */
   public static SegmentPart get(String path) throws IOException {
     // find part name
@@ -87,12 +91,15 @@
     String segment = dir.substring(idx + 1);
     return new SegmentPart(segment, part);
   }
-  
+
   /**
    * Create SegmentPart from a String in format "segmentName/partName".
-   * @param string input String
+   * 
+   * @param string
+   *          input String
    * @return parsed instance of SegmentPart
-   * @throws IOException if "/" is missing.
+   * @throws IOException
+   *           if "/" is missing.
    */
   public static SegmentPart parse(String string) throws IOException {
     int idx = string.indexOf('/');
Index: src/java/org/apache/nutch/segment/SegmentMergeFilters.java
===================================================================
--- src/java/org/apache/nutch/segment/SegmentMergeFilters.java	(revision 1188252)
+++ src/java/org/apache/nutch/segment/SegmentMergeFilters.java	(working copy)
@@ -39,7 +39,8 @@
  * 
  */
 public class SegmentMergeFilters {
-  private static final Logger LOG = LoggerFactory.getLogger(SegmentMergeFilters.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(SegmentMergeFilters.class);
   private SegmentMergeFilter[] filters;
 
   public SegmentMergeFilters(Configuration conf) {
@@ -72,9 +73,7 @@
       if (!filter.filter(key, generateData, fetchData, sigData, content,
           parseData, parseText, linked)) {
         if (LOG.isTraceEnabled())
-          LOG
-              .trace("Key " + key + " dropped by "
-                  + filter.getClass().getName());
+          LOG.trace("Key " + key + " dropped by " + filter.getClass().getName());
         return false;
       }
     }
Index: src/java/org/apache/nutch/segment/ContentAsTextInputFormat.java
===================================================================
--- src/java/org/apache/nutch/segment/ContentAsTextInputFormat.java	(revision 1188252)
+++ src/java/org/apache/nutch/segment/ContentAsTextInputFormat.java	(working copy)
@@ -31,14 +31,14 @@
 
 /**
  * An input format that takes Nutch Content objects and converts them to text
- * while converting newline endings to spaces.  This format is useful for working
+ * while converting newline endings to spaces. This format is useful for working
  * with Nutch content objects in Hadoop Streaming with other languages.
  */
-public class ContentAsTextInputFormat
-  extends SequenceFileInputFormat<Text, Text> {
+public class ContentAsTextInputFormat extends
+    SequenceFileInputFormat<Text, Text> {
 
-  private static class ContentAsTextRecordReader
-    implements RecordReader<Text, Text> {
+  private static class ContentAsTextRecordReader implements
+      RecordReader<Text, Text> {
 
     private final SequenceFileRecordReader<Text, Content> sequenceFileRecordReader;
 
@@ -46,11 +46,11 @@
     private Content innerValue;
 
     public ContentAsTextRecordReader(Configuration conf, FileSplit split)
-      throws IOException {
+        throws IOException {
       sequenceFileRecordReader = new SequenceFileRecordReader<Text, Content>(
-        conf, split);
-      innerKey = (Text)sequenceFileRecordReader.createKey();
-      innerValue = (Content)sequenceFileRecordReader.createValue();
+          conf, split);
+      innerKey = (Text) sequenceFileRecordReader.createKey();
+      innerValue = (Content) sequenceFileRecordReader.createValue();
     }
 
     public Text createKey() {
@@ -61,9 +61,8 @@
       return new Text();
     }
 
-    public synchronized boolean next(Text key, Text value)
-      throws IOException {
-      
+    public synchronized boolean next(Text key, Text value) throws IOException {
+
       // convert the content object to text
       Text tKey = key;
       Text tValue = value;
@@ -72,26 +71,23 @@
       }
       tKey.set(innerKey.toString());
       String contentAsStr = new String(innerValue.getContent());
-      
+
       // replace new line endings with spaces
       contentAsStr = contentAsStr.replaceAll("\n", " ");
       value.set(contentAsStr);
-     
+
       return true;
     }
 
-    public float getProgress()
-      throws IOException {
+    public float getProgress() throws IOException {
       return sequenceFileRecordReader.getProgress();
     }
 
-    public synchronized long getPos()
-      throws IOException {
+    public synchronized long getPos() throws IOException {
       return sequenceFileRecordReader.getPos();
     }
 
-    public synchronized void close()
-      throws IOException {
+    public synchronized void close() throws IOException {
       sequenceFileRecordReader.close();
     }
   }
@@ -101,10 +97,9 @@
   }
 
   public RecordReader<Text, Text> getRecordReader(InputSplit split,
-    JobConf job, Reporter reporter)
-    throws IOException {
+      JobConf job, Reporter reporter) throws IOException {
 
     reporter.setStatus(split.toString());
-    return new ContentAsTextRecordReader(job, (FileSplit)split);
+    return new ContentAsTextRecordReader(job, (FileSplit) split);
   }
 }
Index: src/java/org/apache/nutch/scoring/webgraph/WebGraph.java
===================================================================
--- src/java/org/apache/nutch/scoring/webgraph/WebGraph.java	(revision 1188252)
+++ src/java/org/apache/nutch/scoring/webgraph/WebGraph.java	(working copy)
@@ -89,9 +89,7 @@
  * complete before the WebGraph is updated and some type of analysis, such as
  * LinkRank, is run to update scores in the Node database in a stable fashion.
  */
-public class WebGraph
-  extends Configured
-  implements Tool {
+public class WebGraph extends Configured implements Tool {
 
   public static final Logger LOG = LoggerFactory.getLogger(WebGraph.class);
   public static final String LOCK_NAME = ".locked";
@@ -104,10 +102,9 @@
    * by domain and host can be ignored. The number of Outlinks out to a given
    * page or domain can also be limited.
    */
-  public static class OutlinkDb
-    extends Configured
-    implements Mapper<Text, Writable, Text, LinkDatum>,
-    Reducer<Text, LinkDatum, Text, LinkDatum> {
+  public static class OutlinkDb extends Configured implements
+      Mapper<Text, Writable, Text, LinkDatum>,
+      Reducer<Text, LinkDatum, Text, LinkDatum> {
 
     // ignoring internal domains, internal hosts
     private boolean ignoreDomain = true;
@@ -124,7 +121,8 @@
     /**
      * Normalizes and trims extra whitespace from the given url.
      * 
-     * @param url The url to normalize.
+     * @param url
+     *          The url to normalize.
      * 
      * @return The normalized url.
      */
@@ -136,10 +134,9 @@
 
           // normalize and trim the url
           normalized = urlNormalizers.normalize(url,
-            URLNormalizers.SCOPE_DEFAULT);
+              URLNormalizers.SCOPE_DEFAULT);
           normalized = normalized.trim();
-        }
-        catch (Exception e) {
+        } catch (Exception e) {
           LOG.warn("Skipping " + url + ":" + e);
           normalized = null;
         }
@@ -151,7 +148,8 @@
      * Returns the fetch time from the parse data or the current system time if
      * the fetch time doesn't exist.
      * 
-     * @param data The parse data.
+     * @param data
+     *          The parse data.
      * 
      * @return The fetch time as a long.
      */
@@ -164,8 +162,7 @@
 
         // get the fetch time from the parse data
         fetchTime = Long.parseLong(fetchTimeStr);
-      }
-      catch (Exception e) {
+      } catch (Exception e) {
         fetchTime = System.currentTimeMillis();
       }
       return fetchTime;
@@ -201,8 +198,8 @@
      * maps out new LinkDatum objects from new crawls ParseData.
      */
     public void map(Text key, Writable value,
-      OutputCollector<Text, LinkDatum> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, LinkDatum> output, Reporter reporter)
+        throws IOException {
 
       // normalize url, stop processing if null
       String url = normalizeUrl(key.toString());
@@ -214,7 +211,7 @@
 
         // get the parse data and the outlinks from the parse data, along with
         // the fetch time for those links
-        ParseData data = (ParseData)value;
+        ParseData data = (ParseData) value;
         long fetchTime = getFetchTime(data);
         Outlink[] outlinkAr = data.getOutlinks();
         Map<String, String> outlinkMap = new LinkedHashMap<String, String>();
@@ -230,7 +227,7 @@
             // url is existing
             boolean existingUrl = outlinkMap.containsKey(toUrl);
             if (toUrl != null
-              && (!existingUrl || (existingUrl && outlinkMap.get(toUrl) == null))) {
+                && (!existingUrl || (existingUrl && outlinkMap.get(toUrl) == null))) {
               outlinkMap.put(toUrl, outlink.getAnchor());
             }
           }
@@ -242,17 +239,16 @@
           LinkDatum datum = new LinkDatum(outlinkUrl, anchor, fetchTime);
           output.collect(key, datum);
         }
-      }
-      else if (value instanceof LinkDatum) {
+      } else if (value instanceof LinkDatum) {
 
         // collect existing outlinks from existing OutlinkDb
-        output.collect(key, (LinkDatum)value);
+        output.collect(key, (LinkDatum) value);
       }
     }
 
     public void reduce(Text key, Iterator<LinkDatum> values,
-      OutputCollector<Text, LinkDatum> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, LinkDatum> output, Reporter reporter)
+        throws IOException {
 
       // aggregate all outlinks, get the most recent timestamp for a fetch
       // which should be the timestamp for all of the most recent outlinks
@@ -266,7 +262,7 @@
         if (mostRecent == 0L || mostRecent < timestamp) {
           mostRecent = timestamp;
         }
-        outlinkList.add((LinkDatum)WritableUtils.clone(next, conf));
+        outlinkList.add((LinkDatum) WritableUtils.clone(next, conf));
       }
 
       // get the url, domain, and host for the url
@@ -291,10 +287,11 @@
         // outlinks must be the most recent and conform to internal url and
         // limiting rules, if it does collect it
         if (datum.getTimestamp() == mostRecent
-          && (!limitPages || (limitPages && !pages.contains(toPage)))
-          && (!limitDomains || (limitDomains && !domains.contains(toDomain)))
-          && (!ignoreHost || (ignoreHost && !toHost.equalsIgnoreCase(host)))
-          && (!ignoreDomain || (ignoreDomain && !toDomain.equalsIgnoreCase(domain)))) {
+            && (!limitPages || (limitPages && !pages.contains(toPage)))
+            && (!limitDomains || (limitDomains && !domains.contains(toDomain)))
+            && (!ignoreHost || (ignoreHost && !toHost.equalsIgnoreCase(host)))
+            && (!ignoreDomain || (ignoreDomain && !toDomain
+                .equalsIgnoreCase(domain)))) {
           output.collect(key, datum);
           pages.add(toPage);
           domains.add(toDomain);
@@ -311,9 +308,8 @@
    * OutlinkDb LinkDatum objects and are regenerated each time the WebGraph is
    * updated.
    */
-  private static class InlinkDb
-    extends Configured
-    implements Mapper<Text, LinkDatum, Text, LinkDatum> {
+  private static class InlinkDb extends Configured implements
+      Mapper<Text, LinkDatum, Text, LinkDatum> {
 
     private JobConf conf;
     private long timestamp;
@@ -348,8 +344,8 @@
      * new system timestamp, type and to and from url switched.
      */
     public void map(Text key, LinkDatum datum,
-      OutputCollector<Text, LinkDatum> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, LinkDatum> output, Reporter reporter)
+        throws IOException {
 
       // get the to and from url and the anchor
       String fromUrl = key.toString();
@@ -367,9 +363,8 @@
    * Creates the Node database which consists of the number of in and outlinks
    * for each url and a score slot for analysis programs such as LinkRank.
    */
-  private static class NodeDb
-    extends Configured
-    implements Reducer<Text, LinkDatum, Text, Node> {
+  private static class NodeDb extends Configured implements
+      Reducer<Text, LinkDatum, Text, Node> {
 
     private JobConf conf;
 
@@ -401,8 +396,8 @@
      * score of 0.0 for each url (node) in the webgraph.
      */
     public void reduce(Text key, Iterator<LinkDatum> values,
-      OutputCollector<Text, Node> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, Node> output, Reporter reporter)
+        throws IOException {
 
       Node node = new Node();
       int numInlinks = 0;
@@ -413,8 +408,7 @@
         LinkDatum next = values.next();
         if (next.getLinkType() == LinkDatum.INLINK) {
           numInlinks++;
-        }
-        else if (next.getLinkType() == LinkDatum.OUTLINK) {
+        } else if (next.getLinkType() == LinkDatum.OUTLINK) {
           numOutlinks++;
         }
       }
@@ -432,14 +426,17 @@
    * Node. If a current WebGraph exists then it is updated, if it doesn't exist
    * then a new WebGraph database is created.
    * 
-   * @param webGraphDb The WebGraph to create or update.
-   * @param segments The array of segments used to update the WebGraph. Newer
-   * segments and fetch times will overwrite older segments.
+   * @param webGraphDb
+   *          The WebGraph to create or update.
+   * @param segments
+   *          The array of segments used to update the WebGraph. Newer segments
+   *          and fetch times will overwrite older segments.
    * 
-   * @throws IOException If an error occurs while processing the WebGraph.
+   * @throws IOException
+   *           If an error occurs while processing the WebGraph.
    */
   public void createWebGraph(Path webGraphDb, Path[] segments)
-    throws IOException {
+      throws IOException {
 
     SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
     long start = System.currentTimeMillis();
@@ -467,7 +464,7 @@
     }
 
     Path tempOutlinkDb = new Path(outlinkDb + "-"
-      + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
+        + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
     JobConf outlinkJob = new NutchJob(conf);
     outlinkJob.setJobName("Outlinkdb: " + outlinkDb);
 
@@ -503,9 +500,8 @@
       LOG.info("OutlinkDb: installing " + outlinkDb);
       FSUtils.replace(fs, outlinkDb, tempOutlinkDb, true);
       LOG.info("OutlinkDb: finished");
-    }
-    catch (IOException e) {
-      
+    } catch (IOException e) {
+
       // remove lock file and and temporary directory if an error occurs
       LockUtil.removeLockFile(fs, lock);
       if (fs.exists(tempOutlinkDb)) {
@@ -518,7 +514,7 @@
     // inlink and temp link database paths
     Path inlinkDb = new Path(webGraphDb, INLINK_DIR);
     Path tempInlinkDb = new Path(inlinkDb + "-"
-      + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
+        + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
 
     JobConf inlinkJob = new NutchJob(conf);
     inlinkJob.setJobName("Inlinkdb " + inlinkDb);
@@ -534,21 +530,20 @@
     inlinkJob.setOutputFormat(MapFileOutputFormat.class);
 
     try {
-      
+
       // run the inlink and replace any old with new
       LOG.info("InlinkDb: running");
       JobClient.runJob(inlinkJob);
       LOG.info("InlinkDb: installing " + inlinkDb);
       FSUtils.replace(fs, inlinkDb, tempInlinkDb, true);
       LOG.info("InlinkDb: finished");
-    }
-    catch (IOException e) {
-      
+    } catch (IOException e) {
+
       // remove lock file and and temporary directory if an error occurs
       LockUtil.removeLockFile(fs, lock);
       if (fs.exists(tempInlinkDb)) {
         fs.delete(tempInlinkDb, true);
-      }      
+      }
       LOG.error(StringUtils.stringifyException(e));
       throw e;
     }
@@ -556,7 +551,7 @@
     // node and temp node database paths
     Path nodeDb = new Path(webGraphDb, NODE_DIR);
     Path tempNodeDb = new Path(nodeDb + "-"
-      + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
+        + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
 
     JobConf nodeJob = new NutchJob(conf);
     nodeJob.setJobName("NodeDb " + nodeDb);
@@ -574,21 +569,20 @@
     nodeJob.setOutputFormat(MapFileOutputFormat.class);
 
     try {
-      
+
       // run the node job and replace old nodedb with new
       LOG.info("NodeDb: running");
       JobClient.runJob(nodeJob);
       LOG.info("NodeDb: installing " + nodeDb);
       FSUtils.replace(fs, nodeDb, tempNodeDb, true);
       LOG.info("NodeDb: finished");
-    }
-    catch (IOException e) {
-      
+    } catch (IOException e) {
+
       // remove lock file and and temporary directory if an error occurs
       LockUtil.removeLockFile(fs, lock);
       if (fs.exists(tempNodeDb)) {
         fs.delete(tempNodeDb, true);
-      }      
+      }
       LOG.error(StringUtils.stringifyException(e));
       throw e;
     }
@@ -597,11 +591,11 @@
     LockUtil.removeLockFile(fs, lock);
 
     long end = System.currentTimeMillis();
-    LOG.info("WebGraphDb: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    LOG.info("WebGraphDb: finished at " + sdf.format(end) + ", elapsed: "
+        + TimingUtil.elapsedTime(start, end));
   }
 
-  public static void main(String[] args)
-    throws Exception {
+  public static void main(String[] args) throws Exception {
     int res = ToolRunner.run(NutchConfiguration.create(), new WebGraph(), args);
     System.exit(res);
   }
@@ -609,18 +603,17 @@
   /**
    * Parses command link arguments and runs the WebGraph jobs.
    */
-  public int run(String[] args)
-    throws Exception {
+  public int run(String[] args) throws Exception {
 
     Options options = new Options();
-    Option helpOpts = OptionBuilder.withArgName("help").withDescription(
-      "show this help message").create("help");
-    Option webGraphDbOpts = OptionBuilder.withArgName("webgraphdb").hasArg().withDescription(
-      "the web graph database to use").create("webgraphdb");
-    Option segOpts = OptionBuilder.withArgName("segment").hasArgs().withDescription(
-      "the segment(s) to use").create("segment");
-    Option segDirOpts = OptionBuilder.withArgName("segmentDir").hasArgs().withDescription(
-      "the segment directory to use").create("segmentDir");
+    Option helpOpts = OptionBuilder.withArgName("help")
+        .withDescription("show this help message").create("help");
+    Option webGraphDbOpts = OptionBuilder.withArgName("webgraphdb").hasArg()
+        .withDescription("the web graph database to use").create("webgraphdb");
+    Option segOpts = OptionBuilder.withArgName("segment").hasArgs()
+        .withDescription("the segment(s) to use").create("segment");
+    Option segDirOpts = OptionBuilder.withArgName("segmentDir").hasArgs()
+        .withDescription("the segment directory to use").create("segmentDir");
     options.addOption(helpOpts);
     options.addOption(webGraphDbOpts);
     options.addOption(segOpts);
@@ -631,8 +624,7 @@
 
       CommandLine line = parser.parse(options, args);
       if (line.hasOption("help") || !line.hasOption("webgraphdb")
-        || (!line.hasOption("segment") && !line.hasOption("segmentDir"))
-) {
+          || (!line.hasOption("segment") && !line.hasOption("segmentDir"))) {
         HelpFormatter formatter = new HelpFormatter();
         formatter.printHelp("WebGraph", options);
         return -1;
@@ -655,14 +647,14 @@
       if (line.hasOption("segmentDir")) {
         Path dir = new Path(line.getOptionValue("segmentDir"));
         FileSystem fs = dir.getFileSystem(getConf());
-        FileStatus[] fstats = fs.listStatus(dir, HadoopFSUtil.getPassDirectoriesFilter(fs));
+        FileStatus[] fstats = fs.listStatus(dir,
+            HadoopFSUtil.getPassDirectoriesFilter(fs));
         segPaths = HadoopFSUtil.getPaths(fstats);
       }
 
       createWebGraph(new Path(webGraphDb), segPaths);
       return 0;
-    }
-    catch (Exception e) {
+    } catch (Exception e) {
       LOG.error("WebGraph: " + StringUtils.stringifyException(e));
       return -2;
     }
Index: src/java/org/apache/nutch/scoring/webgraph/ScoreUpdater.java
===================================================================
--- src/java/org/apache/nutch/scoring/webgraph/ScoreUpdater.java	(revision 1188252)
+++ src/java/org/apache/nutch/scoring/webgraph/ScoreUpdater.java	(working copy)
@@ -58,13 +58,12 @@
 
 /**
  * Updates the score from the WebGraph node database into the crawl database.
- * Any score that is not in the node database is set to the clear score in the 
+ * Any score that is not in the node database is set to the clear score in the
  * crawl database.
  */
-public class ScoreUpdater
-  extends Configured
-  implements Tool, Mapper<Text, Writable, Text, ObjectWritable>,
-  Reducer<Text, ObjectWritable, Text, CrawlDatum> {
+public class ScoreUpdater extends Configured implements Tool,
+    Mapper<Text, Writable, Text, ObjectWritable>,
+    Reducer<Text, ObjectWritable, Text, CrawlDatum> {
 
   public static final Logger LOG = LoggerFactory.getLogger(ScoreUpdater.class);
 
@@ -80,8 +79,8 @@
    * Changes input into ObjectWritables.
    */
   public void map(Text key, Writable value,
-    OutputCollector<Text, ObjectWritable> output, Reporter reporter)
-    throws IOException {
+      OutputCollector<Text, ObjectWritable> output, Reporter reporter)
+      throws IOException {
 
     ObjectWritable objWrite = new ObjectWritable();
     objWrite.set(value);
@@ -93,8 +92,8 @@
    * with a cleared score.
    */
   public void reduce(Text key, Iterator<ObjectWritable> values,
-    OutputCollector<Text, CrawlDatum> output, Reporter reporter)
-    throws IOException {
+      OutputCollector<Text, CrawlDatum> output, Reporter reporter)
+      throws IOException {
 
     String url = key.toString();
     Node node = null;
@@ -106,34 +105,31 @@
       ObjectWritable next = values.next();
       Object value = next.get();
       if (value instanceof Node) {
-        node = (Node)value;
+        node = (Node) value;
+      } else if (value instanceof CrawlDatum) {
+        datum = (CrawlDatum) value;
       }
-      else if (value instanceof CrawlDatum) {
-        datum = (CrawlDatum)value;
-      }
     }
 
-    // datum should never be null, could happen if somehow the url was 
+    // datum should never be null, could happen if somehow the url was
     // normalized or changed after being pulled from the crawldb
     if (datum != null) {
 
       if (node != null) {
-        
+
         // set the inlink score in the nodedb
         float inlinkScore = node.getInlinkScore();
         datum.setScore(inlinkScore);
         LOG.debug(url + ": setting to score " + inlinkScore);
-      }
-      else {
-        
+      } else {
+
         // clear out the score in the crawldb
         datum.setScore(clearScore);
         LOG.debug(url + ": setting to clear score of " + clearScore);
       }
 
       output.collect(key, datum);
-    }
-    else {
+    } else {
       LOG.debug(url + ": no datum");
     }
   }
@@ -142,16 +138,18 @@
   }
 
   /**
-   * Updates the inlink score in the web graph node databsae into the crawl 
+   * Updates the inlink score in the web graph node databsae into the crawl
    * database.
    * 
-   * @param crawlDb The crawl database to update
-   * @param webGraphDb The webgraph database to use.
+   * @param crawlDb
+   *          The crawl database to update
+   * @param webGraphDb
+   *          The webgraph database to use.
    * 
-   * @throws IOException If an error occurs while updating the scores.
+   * @throws IOException
+   *           If an error occurs while updating the scores.
    */
-  public void update(Path crawlDb, Path webGraphDb)
-    throws IOException {
+  public void update(Path crawlDb, Path webGraphDb) throws IOException {
 
     SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
     long start = System.currentTimeMillis();
@@ -164,8 +162,8 @@
     LOG.info("Running crawldb update " + crawlDb);
     Path nodeDb = new Path(webGraphDb, WebGraph.NODE_DIR);
     Path crawlDbCurrent = new Path(crawlDb, CrawlDb.CURRENT_NAME);
-    Path newCrawlDb = new Path(crawlDb,
-      Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
+    Path newCrawlDb = new Path(crawlDb, Integer.toString(new Random()
+        .nextInt(Integer.MAX_VALUE)));
 
     // run the updater job outputting to the temp crawl database
     JobConf updater = new NutchJob(conf);
@@ -184,10 +182,9 @@
 
     try {
       JobClient.runJob(updater);
-    }
-    catch (IOException e) {
+    } catch (IOException e) {
       LOG.error(StringUtils.stringifyException(e));
-      
+
       // remove the temp crawldb on error
       if (fs.exists(newCrawlDb)) {
         fs.delete(newCrawlDb, true);
@@ -200,29 +197,28 @@
     CrawlDb.install(updater, crawlDb);
 
     long end = System.currentTimeMillis();
-    LOG.info("ScoreUpdater: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    LOG.info("ScoreUpdater: finished at " + sdf.format(end) + ", elapsed: "
+        + TimingUtil.elapsedTime(start, end));
   }
 
-  public static void main(String[] args)
-    throws Exception {
+  public static void main(String[] args) throws Exception {
     int res = ToolRunner.run(NutchConfiguration.create(), new ScoreUpdater(),
-      args);
+        args);
     System.exit(res);
   }
 
   /**
    * Runs the ScoreUpdater tool.
    */
-  public int run(String[] args)
-    throws Exception {
+  public int run(String[] args) throws Exception {
 
     Options options = new Options();
-    Option helpOpts = OptionBuilder.withArgName("help").withDescription(
-      "show this help message").create("help");
-    Option crawlDbOpts = OptionBuilder.withArgName("crawldb").hasArg().withDescription(
-      "the crawldb to use").create("crawldb");
-    Option webGraphOpts = OptionBuilder.withArgName("webgraphdb").hasArg().withDescription(
-      "the webgraphdb to use").create("webgraphdb");
+    Option helpOpts = OptionBuilder.withArgName("help")
+        .withDescription("show this help message").create("help");
+    Option crawlDbOpts = OptionBuilder.withArgName("crawldb").hasArg()
+        .withDescription("the crawldb to use").create("crawldb");
+    Option webGraphOpts = OptionBuilder.withArgName("webgraphdb").hasArg()
+        .withDescription("the webgraphdb to use").create("webgraphdb");
     options.addOption(helpOpts);
     options.addOption(crawlDbOpts);
     options.addOption(webGraphOpts);
@@ -232,7 +228,7 @@
 
       CommandLine line = parser.parse(options, args);
       if (line.hasOption("help") || !line.hasOption("webgraphdb")
-        || !line.hasOption("crawldb")) {
+          || !line.hasOption("crawldb")) {
         HelpFormatter formatter = new HelpFormatter();
         formatter.printHelp("ScoreUpdater", options);
         return -1;
@@ -242,8 +238,7 @@
       String webGraphDb = line.getOptionValue("webgraphdb");
       update(new Path(crawlDb), new Path(webGraphDb));
       return 0;
-    }
-    catch (Exception e) {
+    } catch (Exception e) {
       LOG.error("ScoreUpdater: " + StringUtils.stringifyException(e));
       return -1;
     }
Index: src/java/org/apache/nutch/scoring/webgraph/NodeDumper.java
===================================================================
--- src/java/org/apache/nutch/scoring/webgraph/NodeDumper.java	(revision 1188252)
+++ src/java/org/apache/nutch/scoring/webgraph/NodeDumper.java	(working copy)
@@ -61,16 +61,12 @@
  * have been run. For link analysis score a program such as LinkRank will need
  * to have been run which updates the NodeDb of the WebGraph.
  */
-public class NodeDumper
-  extends Configured
-  implements Tool {
+public class NodeDumper extends Configured implements Tool {
 
   public static final Logger LOG = LoggerFactory.getLogger(NodeDumper.class);
 
   private static enum DumpType {
-    INLINKS,
-    OUTLINKS,
-    SCORES
+    INLINKS, OUTLINKS, SCORES
   }
 
   /**
@@ -78,10 +74,9 @@
    * on the command line, the top urls could be for number of inlinks, for
    * number of outlinks, or for link analysis score.
    */
-  public static class Sorter
-    extends Configured
-    implements Mapper<Text, Node, FloatWritable, Text>,
-    Reducer<FloatWritable, Text, Text, FloatWritable> {
+  public static class Sorter extends Configured implements
+      Mapper<Text, Node, FloatWritable, Text>,
+      Reducer<FloatWritable, Text, Text, FloatWritable> {
 
     private JobConf conf;
     private boolean inlinks = false;
@@ -109,17 +104,15 @@
      * score.
      */
     public void map(Text key, Node node,
-      OutputCollector<FloatWritable, Text> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<FloatWritable, Text> output, Reporter reporter)
+        throws IOException {
 
       float number = 0;
       if (inlinks) {
         number = node.getNumInlinks();
-      }
-      else if (outlinks) {
+      } else if (outlinks) {
         number = node.getNumOutlinks();
-      }
-      else {
+      } else {
         number = node.getInlinkScore();
       }
 
@@ -131,8 +124,8 @@
      * Flips and collects the url and numeric sort value.
      */
     public void reduce(FloatWritable key, Iterator<Text> values,
-      OutputCollector<Text, FloatWritable> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, FloatWritable> output, Reporter reporter)
+        throws IOException {
 
       // take the negative of the negative to get original value, sometimes 0
       // value are a little weird
@@ -142,7 +135,7 @@
 
       // collect all values, this time with the url as key
       while (values.hasNext() && (numCollected < topn)) {
-        Text url = (Text)WritableUtils.clone(values.next(), conf);
+        Text url = (Text) WritableUtils.clone(values.next(), conf);
         output.collect(url, number);
         numCollected++;
       }
@@ -152,15 +145,17 @@
   /**
    * Runs the process to dump the top urls out to a text file.
    * 
-   * @param webGraphDb The WebGraph from which to pull values.
+   * @param webGraphDb
+   *          The WebGraph from which to pull values.
    * 
    * @param topN
    * @param output
    * 
-   * @throws IOException If an error occurs while dumping the top values.
+   * @throws IOException
+   *           If an error occurs while dumping the top values.
    */
-  public void dumpNodes(Path webGraphDb, DumpType type, long topN, Path output, boolean asEff)
-    throws IOException {
+  public void dumpNodes(Path webGraphDb, DumpType type, long topN, Path output,
+      boolean asEff) throws IOException {
 
     SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
     long start = System.currentTimeMillis();
@@ -194,45 +189,44 @@
     try {
       LOG.info("NodeDumper: running");
       JobClient.runJob(dumper);
-    }
-    catch (IOException e) {
+    } catch (IOException e) {
       LOG.error(StringUtils.stringifyException(e));
       throw e;
     }
     long end = System.currentTimeMillis();
-    LOG.info("NodeDumper: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    LOG.info("NodeDumper: finished at " + sdf.format(end) + ", elapsed: "
+        + TimingUtil.elapsedTime(start, end));
   }
 
-  public static void main(String[] args)
-    throws Exception {
+  public static void main(String[] args) throws Exception {
     int res = ToolRunner.run(NutchConfiguration.create(), new NodeDumper(),
-      args);
+        args);
     System.exit(res);
   }
 
   /**
    * Runs the node dumper tool.
    */
-  public int run(String[] args)
-    throws Exception {
+  public int run(String[] args) throws Exception {
 
     Options options = new Options();
-    Option helpOpts = OptionBuilder.withArgName("help").withDescription(
-      "show this help message").create("help");
-    Option webGraphDbOpts = OptionBuilder.withArgName("webgraphdb").hasArg().withDescription(
-      "the web graph database to use").create("webgraphdb");
-    Option inlinkOpts = OptionBuilder.withArgName("inlinks").withDescription(
-      "show highest inlinks").create("inlinks");
-    Option outlinkOpts = OptionBuilder.withArgName("outlinks").withDescription(
-      "show highest outlinks").create("outlinks");
-    Option scoreOpts = OptionBuilder.withArgName("scores").withDescription(
-      "show highest scores").create("scores");
-    Option topNOpts = OptionBuilder.withArgName("topn").hasOptionalArg().withDescription(
-      "show topN scores").create("topn");
-    Option outputOpts = OptionBuilder.withArgName("output").hasArg().withDescription(
-      "the output directory to use").create("output");
-    Option effOpts = OptionBuilder.withArgName("asEff").withDescription(
-      "Solr ExternalFileField compatible output format").create("asEff");
+    Option helpOpts = OptionBuilder.withArgName("help")
+        .withDescription("show this help message").create("help");
+    Option webGraphDbOpts = OptionBuilder.withArgName("webgraphdb").hasArg()
+        .withDescription("the web graph database to use").create("webgraphdb");
+    Option inlinkOpts = OptionBuilder.withArgName("inlinks")
+        .withDescription("show highest inlinks").create("inlinks");
+    Option outlinkOpts = OptionBuilder.withArgName("outlinks")
+        .withDescription("show highest outlinks").create("outlinks");
+    Option scoreOpts = OptionBuilder.withArgName("scores")
+        .withDescription("show highest scores").create("scores");
+    Option topNOpts = OptionBuilder.withArgName("topn").hasOptionalArg()
+        .withDescription("show topN scores").create("topn");
+    Option outputOpts = OptionBuilder.withArgName("output").hasArg()
+        .withDescription("the output directory to use").create("output");
+    Option effOpts = OptionBuilder.withArgName("asEff")
+        .withDescription("Solr ExternalFileField compatible output format")
+        .create("asEff");
     options.addOption(helpOpts);
     options.addOption(webGraphDbOpts);
     options.addOption(inlinkOpts);
@@ -256,21 +250,20 @@
       boolean inlinks = line.hasOption("inlinks");
       boolean outlinks = line.hasOption("outlinks");
       boolean scores = line.hasOption("scores");
-      long topN = (line.hasOption("topn")
-        ? Long.parseLong(line.getOptionValue("topn")) : Long.MAX_VALUE);
+      long topN = (line.hasOption("topn") ? Long.parseLong(line
+          .getOptionValue("topn")) : Long.MAX_VALUE);
 
       // get the correct dump type
       String output = line.getOptionValue("output");
-      DumpType type = (inlinks ? DumpType.INLINKS : outlinks
-        ? DumpType.OUTLINKS : DumpType.SCORES);
+      DumpType type = (inlinks ? DumpType.INLINKS
+          : outlinks ? DumpType.OUTLINKS : DumpType.SCORES);
 
       // Use ExternalFileField?
       boolean asEff = line.hasOption("asEff");
 
       dumpNodes(new Path(webGraphDb), type, topN, new Path(output), asEff);
       return 0;
-    }
-    catch (Exception e) {
+    } catch (Exception e) {
       LOG.error("NodeDumper: " + StringUtils.stringifyException(e));
       return -2;
     }
Index: src/java/org/apache/nutch/scoring/webgraph/Node.java
===================================================================
--- src/java/org/apache/nutch/scoring/webgraph/Node.java	(revision 1188252)
+++ src/java/org/apache/nutch/scoring/webgraph/Node.java	(working copy)
@@ -25,12 +25,11 @@
 
 /**
  * A class which holds the number of inlinks and outlinks for a given url along
- * with an inlink score from a link analysis program and any metadata.  
+ * with an inlink score from a link analysis program and any metadata.
  * 
  * The Node is the core unit of the NodeDb in the WebGraph.
  */
-public class Node
-  implements Writable {
+public class Node implements Writable {
 
   private int numInlinks = 0;
   private int numOutlinks = 0;
@@ -77,8 +76,7 @@
     this.metadata = metadata;
   }
 
-  public void readFields(DataInput in)
-    throws IOException {
+  public void readFields(DataInput in) throws IOException {
 
     numInlinks = in.readInt();
     numOutlinks = in.readInt();
@@ -87,8 +85,7 @@
     metadata.readFields(in);
   }
 
-  public void write(DataOutput out)
-    throws IOException {
+  public void write(DataOutput out) throws IOException {
 
     out.writeInt(numInlinks);
     out.writeInt(numOutlinks);
@@ -98,8 +95,8 @@
 
   public String toString() {
     return "num inlinks: " + numInlinks + ", num outlinks: " + numOutlinks
-      + ", inlink score: " + inlinkScore + ", outlink score: "
-      + getOutlinkScore() + ", metadata: " + metadata.toString();
+        + ", inlink score: " + inlinkScore + ", outlink score: "
+        + getOutlinkScore() + ", metadata: " + metadata.toString();
   }
 
 }
Index: src/java/org/apache/nutch/scoring/webgraph/LinkDatum.java
===================================================================
--- src/java/org/apache/nutch/scoring/webgraph/LinkDatum.java	(revision 1188252)
+++ src/java/org/apache/nutch/scoring/webgraph/LinkDatum.java	(working copy)
@@ -27,8 +27,7 @@
  * A class for holding link information including the url, anchor text, a score,
  * the timestamp of the link and a link type.
  */
-public class LinkDatum
-  implements Writable {
+public class LinkDatum implements Writable {
 
   public final static byte INLINK = 1;
   public final static byte OUTLINK = 2;
@@ -49,7 +48,8 @@
   /**
    * Creates a LinkDatum with a given url. Timestamp is set to current time.
    * 
-   * @param url The link url.
+   * @param url
+   *          The link url.
    */
   public LinkDatum(String url) {
     this(url, "", System.currentTimeMillis());
@@ -59,8 +59,10 @@
    * Creates a LinkDatum with a url and an anchor text. Timestamp is set to
    * current time.
    * 
-   * @param url The link url.
-   * @param anchor The link anchor text.
+   * @param url
+   *          The link url.
+   * @param anchor
+   *          The link anchor text.
    */
   public LinkDatum(String url, String anchor) {
     this(url, anchor, System.currentTimeMillis());
@@ -112,8 +114,7 @@
     this.linkType = linkType;
   }
 
-  public void readFields(DataInput in)
-    throws IOException {
+  public void readFields(DataInput in) throws IOException {
     url = Text.readString(in);
     anchor = Text.readString(in);
     score = in.readFloat();
@@ -121,8 +122,7 @@
     linkType = in.readByte();
   }
 
-  public void write(DataOutput out)
-    throws IOException {
+  public void write(DataOutput out) throws IOException {
     Text.writeString(out, url);
     Text.writeString(out, anchor != null ? anchor : "");
     out.writeFloat(score);
@@ -132,9 +132,9 @@
 
   public String toString() {
 
-    String type = (linkType == INLINK ? "inlink" : (linkType == OUTLINK)
-      ? "outlink" : "unknown");
+    String type = (linkType == INLINK ? "inlink"
+        : (linkType == OUTLINK) ? "outlink" : "unknown");
     return "url: " + url + ", anchor: " + anchor + ", score: " + score
-      + ", timestamp: " + timestamp + ", link type: " + type;
+        + ", timestamp: " + timestamp + ", link type: " + type;
   }
 }
Index: src/java/org/apache/nutch/scoring/webgraph/NodeReader.java
===================================================================
--- src/java/org/apache/nutch/scoring/webgraph/NodeReader.java	(revision 1188252)
+++ src/java/org/apache/nutch/scoring/webgraph/NodeReader.java	(working copy)
@@ -37,7 +37,7 @@
 import org.apache.nutch.util.NutchConfiguration;
 
 /**
- * Reads and prints to system out information for a single node from the NodeDb 
+ * Reads and prints to system out information for a single node from the NodeDb
  * in the WebGraph.
  */
 public class NodeReader extends Configured {
@@ -46,33 +46,35 @@
   private MapFile.Reader[] nodeReaders;
 
   public NodeReader() {
-    
+
   }
-  
+
   public NodeReader(Configuration conf) {
     super(conf);
   }
-  
+
   /**
    * Prints the content of the Node represented by the url to system out.
    * 
-   * @param webGraphDb The webgraph from which to get the node.
-   * @param url The url of the node.
+   * @param webGraphDb
+   *          The webgraph from which to get the node.
+   * @param url
+   *          The url of the node.
    * 
-   * @throws IOException If an error occurs while getting the node.
+   * @throws IOException
+   *           If an error occurs while getting the node.
    */
-  public void dumpUrl(Path webGraphDb, String url)
-    throws IOException {
+  public void dumpUrl(Path webGraphDb, String url) throws IOException {
 
     fs = FileSystem.get(getConf());
     nodeReaders = MapFileOutputFormat.getReaders(fs, new Path(webGraphDb,
-      WebGraph.NODE_DIR), getConf());
+        WebGraph.NODE_DIR), getConf());
 
     // open the readers, get the node, print out the info, and close the readers
     Text key = new Text(url);
     Node node = new Node();
     MapFileOutputFormat.getEntry(nodeReaders,
-      new HashPartitioner<Text, Node>(), key, node);
+        new HashPartitioner<Text, Node>(), key, node);
     System.out.println(url + ":");
     System.out.println("  inlink score: " + node.getInlinkScore());
     System.out.println("  outlink score: " + node.getOutlinkScore());
@@ -82,20 +84,19 @@
   }
 
   /**
-   * Runs the NodeReader tool.  The command line arguments must contain a 
-   * webgraphdb path and a url.  The url must match the normalized url that is
+   * Runs the NodeReader tool. The command line arguments must contain a
+   * webgraphdb path and a url. The url must match the normalized url that is
    * contained in the NodeDb of the WebGraph.
    */
-  public static void main(String[] args)
-    throws Exception {
+  public static void main(String[] args) throws Exception {
 
     Options options = new Options();
-    Option helpOpts = OptionBuilder.withArgName("help").withDescription(
-      "show this help message").create("help");
+    Option helpOpts = OptionBuilder.withArgName("help")
+        .withDescription("show this help message").create("help");
     Option webGraphOpts = OptionBuilder.withArgName("webgraphdb").hasArg()
-      .withDescription("the webgraphdb to use").create("webgraphdb");
+        .withDescription("the webgraphdb to use").create("webgraphdb");
     Option urlOpts = OptionBuilder.withArgName("url").hasOptionalArg()
-      .withDescription("the url to dump").create("url");
+        .withDescription("the url to dump").create("url");
     options.addOption(helpOpts);
     options.addOption(webGraphOpts);
     options.addOption(urlOpts);
@@ -106,7 +107,7 @@
       // command line must take a webgraphdb and a url
       CommandLine line = parser.parse(options, args);
       if (line.hasOption("help") || !line.hasOption("webgraphdb")
-        || !line.hasOption("url")) {
+          || !line.hasOption("url")) {
         HelpFormatter formatter = new HelpFormatter();
         formatter.printHelp("WebGraphReader", options);
         return;
@@ -117,10 +118,9 @@
       String url = line.getOptionValue("url");
       NodeReader reader = new NodeReader(NutchConfiguration.create());
       reader.dumpUrl(new Path(webGraphDb), url);
-      
+
       return;
-    }
-    catch (Exception e) {
+    } catch (Exception e) {
       e.printStackTrace();
       return;
     }
Index: src/java/org/apache/nutch/scoring/webgraph/LinkRank.java
===================================================================
--- src/java/org/apache/nutch/scoring/webgraph/LinkRank.java	(revision 1188252)
+++ src/java/org/apache/nutch/scoring/webgraph/LinkRank.java	(working copy)
@@ -68,9 +68,7 @@
 import org.apache.nutch.util.TimingUtil;
 import org.apache.nutch.util.URLUtil;
 
-public class LinkRank
-  extends Configured
-  implements Tool {
+public class LinkRank extends Configured implements Tool {
 
   public static final Logger LOG = LoggerFactory.getLogger(LinkRank.class);
   private static final String NUM_NODES = "_num_nodes_";
@@ -79,14 +77,16 @@
    * Runs the counter job. The counter job determines the number of links in the
    * webgraph. This is used during analysis.
    * 
-   * @param fs The job file system.
-   * @param webGraphDb The web graph database to use.
+   * @param fs
+   *          The job file system.
+   * @param webGraphDb
+   *          The web graph database to use.
    * 
    * @return The number of nodes in the web graph.
-   * @throws IOException If an error occurs while running the counter job.
+   * @throws IOException
+   *           If an error occurs while running the counter job.
    */
-  private int runCounter(FileSystem fs, Path webGraphDb)
-    throws IOException {
+  private int runCounter(FileSystem fs, Path webGraphDb) throws IOException {
 
     // configure the counter job
     Path numLinksPath = new Path(webGraphDb, NUM_NODES);
@@ -110,8 +110,7 @@
     LOG.info("Starting link counter job");
     try {
       JobClient.runJob(counter);
-    }
-    catch (IOException e) {
+    } catch (IOException e) {
       LOG.error(StringUtils.stringifyException(e));
       throw e;
     }
@@ -124,13 +123,13 @@
     BufferedReader buffer = new BufferedReader(new InputStreamReader(readLinks));
     String numLinksLine = buffer.readLine();
     readLinks.close();
-    
+
     // check if there are links to process, if none, webgraph might be empty
     if (numLinksLine == null || numLinksLine.length() == 0) {
       fs.delete(numLinksPath, true);
       throw new IOException("No links to process, is the webgraph empty?");
     }
-    
+
     // delete temp file and convert and return the number of links as an int
     LOG.info("Deleting numlinks temp file");
     fs.delete(numLinksPath, true);
@@ -142,13 +141,15 @@
    * Runs the initializer job. The initializer job sets up the nodes with a
    * default starting score for link analysis.
    * 
-   * @param nodeDb The node database to use.
-   * @param output The job output directory.
+   * @param nodeDb
+   *          The node database to use.
+   * @param output
+   *          The job output directory.
    * 
-   * @throws IOException If an error occurs while running the initializer job.
+   * @throws IOException
+   *           If an error occurs while running the initializer job.
    */
-  private void runInitializer(Path nodeDb, Path output)
-    throws IOException {
+  private void runInitializer(Path nodeDb, Path output) throws IOException {
 
     // configure the initializer
     JobConf initializer = new NutchJob(getConf());
@@ -167,8 +168,7 @@
     LOG.info("Starting initialization job");
     try {
       JobClient.runJob(initializer);
-    }
-    catch (IOException e) {
+    } catch (IOException e) {
       LOG.error(StringUtils.stringifyException(e));
       throw e;
     }
@@ -184,15 +184,20 @@
    * space requirements but it can be very useful is weeding out and eliminating
    * link farms and other spam pages.
    * 
-   * @param nodeDb The node database to use.
-   * @param outlinkDb The outlink database to use.
-   * @param loopDb The loop database to use if it exists.
-   * @param output The output directory.
+   * @param nodeDb
+   *          The node database to use.
+   * @param outlinkDb
+   *          The outlink database to use.
+   * @param loopDb
+   *          The loop database to use if it exists.
+   * @param output
+   *          The output directory.
    * 
-   * @throws IOException If an error occurs while running the inverter job.
+   * @throws IOException
+   *           If an error occurs while running the inverter job.
    */
   private void runInverter(Path nodeDb, Path outlinkDb, Path loopDb, Path output)
-    throws IOException {
+      throws IOException {
 
     // configure the inverter
     JobConf inverter = new NutchJob(getConf());
@@ -218,8 +223,7 @@
     LOG.info("Starting inverter job");
     try {
       JobClient.runJob(inverter);
-    }
-    catch (IOException e) {
+    } catch (IOException e) {
       LOG.error(StringUtils.stringifyException(e));
       throw e;
     }
@@ -233,23 +237,28 @@
    * Typically the link analysis job is run a number of times to allow the link
    * rank scores to converge.
    * 
-   * @param nodeDb The node database from which we are getting previous link
-   * rank scores.
-   * @param inverted The inverted inlinks
-   * @param output The link analysis output.
-   * @param iteration The current iteration number.
-   * @param numIterations The total number of link analysis iterations
+   * @param nodeDb
+   *          The node database from which we are getting previous link rank
+   *          scores.
+   * @param inverted
+   *          The inverted inlinks
+   * @param output
+   *          The link analysis output.
+   * @param iteration
+   *          The current iteration number.
+   * @param numIterations
+   *          The total number of link analysis iterations
    * 
-   * @throws IOException If an error occurs during link analysis.
+   * @throws IOException
+   *           If an error occurs during link analysis.
    */
   private void runAnalysis(Path nodeDb, Path inverted, Path output,
-    int iteration, int numIterations, float rankOne)
-    throws IOException {
+      int iteration, int numIterations, float rankOne) throws IOException {
 
     JobConf analyzer = new NutchJob(getConf());
     analyzer.set("link.analyze.iteration", String.valueOf(iteration + 1));
     analyzer.setJobName("LinkAnalysis Analyzer, iteration " + (iteration + 1)
-      + " of " + numIterations);
+        + " of " + numIterations);
     FileInputFormat.addInputPath(analyzer, nodeDb);
     FileInputFormat.addInputPath(analyzer, inverted);
     FileOutputFormat.setOutputPath(analyzer, output);
@@ -266,8 +275,7 @@
     LOG.info("Starting analysis job");
     try {
       JobClient.runJob(analyzer);
-    }
-    catch (IOException e) {
+    } catch (IOException e) {
       LOG.error(StringUtils.stringifyException(e));
       throw e;
     }
@@ -279,9 +287,9 @@
    * This is used to determine a rank one score for pages with zero inlinks but
    * that contain outlinks.
    */
-  private static class Counter
-    implements Mapper<Text, Node, Text, LongWritable>,
-    Reducer<Text, LongWritable, Text, LongWritable> {
+  private static class Counter implements
+      Mapper<Text, Node, Text, LongWritable>,
+      Reducer<Text, LongWritable, Text, LongWritable> {
 
     private JobConf conf;
     private static Text numNodes = new Text(NUM_NODES);
@@ -295,8 +303,8 @@
      * Outputs one for every node.
      */
     public void map(Text key, Node value,
-      OutputCollector<Text, LongWritable> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, LongWritable> output, Reporter reporter)
+        throws IOException {
       output.collect(numNodes, one);
     }
 
@@ -304,8 +312,8 @@
      * Totals the node number and outputs a single total value.
      */
     public void reduce(Text key, Iterator<LongWritable> values,
-      OutputCollector<Text, LongWritable> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, LongWritable> output, Reporter reporter)
+        throws IOException {
 
       long total = 0;
       while (values.hasNext()) {
@@ -318,8 +326,7 @@
     }
   }
 
-  private static class Initializer
-    implements Mapper<Text, Node, Text, Node> {
+  private static class Initializer implements Mapper<Text, Node, Text, Node> {
 
     private JobConf conf;
     private float initialScore = 1.0f;
@@ -330,11 +337,10 @@
     }
 
     public void map(Text key, Node node, OutputCollector<Text, Node> output,
-      Reporter reporter)
-      throws IOException {
+        Reporter reporter) throws IOException {
 
       String url = key.toString();
-      Node outNode = (Node)WritableUtils.clone(node, conf);
+      Node outNode = (Node) WritableUtils.clone(node, conf);
       outNode.setInlinkScore(initialScore);
 
       output.collect(new Text(url), outNode);
@@ -349,9 +355,9 @@
    * WebGraph. The link analysis process consists of inverting, analyzing and
    * scoring, in a loop for a given number of iterations.
    */
-  private static class Inverter
-    implements Mapper<Text, Writable, Text, ObjectWritable>,
-    Reducer<Text, ObjectWritable, Text, LinkDatum> {
+  private static class Inverter implements
+      Mapper<Text, Writable, Text, ObjectWritable>,
+      Reducer<Text, ObjectWritable, Text, LinkDatum> {
 
     private JobConf conf;
 
@@ -363,8 +369,8 @@
      * Convert values to ObjectWritable
      */
     public void map(Text key, Writable value,
-      OutputCollector<Text, ObjectWritable> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, ObjectWritable> output, Reporter reporter)
+        throws IOException {
 
       ObjectWritable objWrite = new ObjectWritable();
       objWrite.set(value);
@@ -377,8 +383,8 @@
      * within the loopset.
      */
     public void reduce(Text key, Iterator<ObjectWritable> values,
-      OutputCollector<Text, LinkDatum> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, LinkDatum> output, Reporter reporter)
+        throws IOException {
 
       String fromUrl = key.toString();
       List<LinkDatum> outlinks = new ArrayList<LinkDatum>();
@@ -390,14 +396,12 @@
         ObjectWritable write = values.next();
         Object obj = write.get();
         if (obj instanceof Node) {
-          node = (Node)obj;
+          node = (Node) obj;
+        } else if (obj instanceof LinkDatum) {
+          outlinks.add((LinkDatum) WritableUtils.clone((LinkDatum) obj, conf));
+        } else if (obj instanceof LoopSet) {
+          loops = (LoopSet) obj;
         }
-        else if (obj instanceof LinkDatum) {
-          outlinks.add((LinkDatum)WritableUtils.clone((LinkDatum)obj, conf));
-        }
-        else if (obj instanceof LoopSet) {
-          loops = (LoopSet)obj;
-        }
       }
 
       // get the number of outlinks and the current inlink and outlink scores
@@ -418,7 +422,7 @@
           // remove any url that is contained in the loopset
           if (loopSet != null && loopSet.contains(toUrl)) {
             LOG.debug(fromUrl + ": Skipping inverting inlink from loop "
-              + toUrl);
+                + toUrl);
             continue;
           }
           outlink.setUrl(fromUrl);
@@ -427,8 +431,8 @@
           // collect the inverted outlink
           output.collect(new Text(toUrl), outlink);
           LOG.debug(toUrl + ": inverting inlink from " + fromUrl
-            + " origscore: " + inlinkScore + " numOutlinks: " + numOutlinks
-            + " inlinkscore: " + outlinkScore);
+              + " origscore: " + inlinkScore + " numOutlinks: " + numOutlinks
+              + " inlinkscore: " + outlinkScore);
         }
       }
     }
@@ -440,9 +444,9 @@
   /**
    * Runs a single link analysis iteration.
    */
-  private static class Analyzer
-    implements Mapper<Text, Writable, Text, ObjectWritable>,
-    Reducer<Text, ObjectWritable, Text, Node> {
+  private static class Analyzer implements
+      Mapper<Text, Writable, Text, ObjectWritable>,
+      Reducer<Text, ObjectWritable, Text, Node> {
 
     private JobConf conf;
     private float dampingFactor = 0.85f;
@@ -459,13 +463,13 @@
 
       try {
         this.conf = conf;
-        this.dampingFactor = conf.getFloat("link.analyze.damping.factor", 0.85f);
+        this.dampingFactor = conf
+            .getFloat("link.analyze.damping.factor", 0.85f);
         this.rankOne = conf.getFloat("link.analyze.rank.one", 0.0f);
         this.itNum = conf.getInt("link.analyze.iteration", 0);
         limitPages = conf.getBoolean("link.ignore.limit.page", true);
         limitDomains = conf.getBoolean("link.ignore.limit.domain", true);
-      }
-      catch (Exception e) {
+      } catch (Exception e) {
         LOG.error(StringUtils.stringifyException(e));
         throw new IllegalArgumentException(e);
       }
@@ -475,8 +479,8 @@
      * Convert values to ObjectWritable
      */
     public void map(Text key, Writable value,
-      OutputCollector<Text, ObjectWritable> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, ObjectWritable> output, Reporter reporter)
+        throws IOException {
 
       ObjectWritable objWrite = new ObjectWritable();
       objWrite.set(WritableUtils.clone(value, conf));
@@ -488,8 +492,8 @@
      * stored in a temporary NodeDb which replaces the NodeDb of the WebGraph.
      */
     public void reduce(Text key, Iterator<ObjectWritable> values,
-      OutputCollector<Text, Node> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, Node> output, Reporter reporter)
+        throws IOException {
 
       String url = key.toString();
       Set<String> domains = new HashSet<String>();
@@ -505,11 +509,10 @@
         ObjectWritable next = values.next();
         Object value = next.get();
         if (value instanceof Node) {
-          node = (Node)value;
-        }
-        else if (value instanceof LinkDatum) {
+          node = (Node) value;
+        } else if (value instanceof LinkDatum) {
 
-          LinkDatum linkDatum = (LinkDatum)value;
+          LinkDatum linkDatum = (LinkDatum) value;
           float scoreFromInlink = linkDatum.getScore();
           String inlinkUrl = linkDatum.getUrl();
           String inLinkDomain = URLUtil.getDomainName(inlinkUrl);
@@ -517,9 +520,9 @@
 
           // limit counting duplicate inlinks by pages or domains
           if ((limitPages && pages.contains(inLinkPage))
-            || (limitDomains && domains.contains(inLinkDomain))) {
+              || (limitDomains && domains.contains(inLinkDomain))) {
             LOG.debug(url + ": ignoring " + scoreFromInlink + " from "
-              + inlinkUrl + ", duplicate page or domain");
+                + inlinkUrl + ", duplicate page or domain");
             continue;
           }
 
@@ -529,25 +532,24 @@
           domains.add(inLinkDomain);
           pages.add(inLinkPage);
           LOG.debug(url + ": adding " + scoreFromInlink + " from " + inlinkUrl
-            + ", total: " + totalInlinkScore);
+              + ", total: " + totalInlinkScore);
         }
       }
 
       // calculate linkRank score formula
       float linkRankScore = (1 - this.dampingFactor)
-        + (this.dampingFactor * totalInlinkScore);
+          + (this.dampingFactor * totalInlinkScore);
 
       LOG.info(url + ": score: " + linkRankScore + " num inlinks: "
-        + numInlinks + " iteration: " + itNum);
+          + numInlinks + " iteration: " + itNum);
 
       // store the score in a temporary NodeDb
-      Node outNode = (Node)WritableUtils.clone(node, conf);
+      Node outNode = (Node) WritableUtils.clone(node, conf);
       outNode.setInlinkScore(linkRankScore);
       output.collect(key, outNode);
     }
 
-    public void close()
-      throws IOException {
+    public void close() throws IOException {
     }
   }
 
@@ -574,12 +576,13 @@
    * by default 10. And finally replaces the NodeDb in the WebGraph with the
    * link rank output.
    * 
-   * @param webGraphDb The WebGraph to run link analysis on.
+   * @param webGraphDb
+   *          The WebGraph to run link analysis on.
    * 
-   * @throws IOException If an error occurs during link analysis.
+   * @throws IOException
+   *           If an error occurs during link analysis.
    */
-  public void analyze(Path webGraphDb)
-    throws IOException {
+  public void analyze(Path webGraphDb) throws IOException {
 
     SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
     long start = System.currentTimeMillis();
@@ -609,7 +612,7 @@
     // initialze all urls with a default score
     int numLinks = runCounter(fs, webGraphDb);
     runInitializer(wgNodeDb, nodeDb);
-    float rankOneScore = (1f / (float)numLinks);
+    float rankOneScore = (1f / (float) numLinks);
 
     if (LOG.isInfoEnabled()) {
       LOG.info("Analysis: Number of links: " + numLinks);
@@ -622,9 +625,10 @@
     for (int i = 0; i < numIterations; i++) {
 
       // the input to inverting is always the previous output from analysis
-      LOG.info("Analysis: Starting iteration " + (i + 1) + " of " + numIterations);
+      LOG.info("Analysis: Starting iteration " + (i + 1) + " of "
+          + numIterations);
       Path tempRank = new Path(linkRank + "-"
-        + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
+          + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
       fs.mkdirs(tempRank);
       Path tempInverted = new Path(tempRank, "inverted");
       Path tempNodeDb = new Path(tempRank, WebGraph.NODE_DIR);
@@ -632,13 +636,13 @@
       // run invert and analysis
       runInverter(nodeDb, wgOutlinkDb, loopDb, tempInverted);
       runAnalysis(nodeDb, tempInverted, tempNodeDb, i, numIterations,
-        rankOneScore);
+          rankOneScore);
 
       // replace the temporary NodeDb with the output from analysis
       LOG.info("Analysis: Installing new link scores");
       FSUtils.replace(fs, linkRank, tempRank, true);
       LOG.info("Analysis: finished iteration " + (i + 1) + " of "
-        + numIterations);
+          + numIterations);
     }
 
     // replace the NodeDb in the WebGraph with the final output of analysis
@@ -648,11 +652,11 @@
     // remove the temporary link rank folder
     fs.delete(linkRank, true);
     long end = System.currentTimeMillis();
-    LOG.info("Analysis: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    LOG.info("Analysis: finished at " + sdf.format(end) + ", elapsed: "
+        + TimingUtil.elapsedTime(start, end));
   }
 
-  public static void main(String[] args)
-    throws Exception {
+  public static void main(String[] args) throws Exception {
     int res = ToolRunner.run(NutchConfiguration.create(), new LinkRank(), args);
     System.exit(res);
   }
@@ -660,14 +664,13 @@
   /**
    * Runs the LinkRank tool.
    */
-  public int run(String[] args)
-    throws Exception {
+  public int run(String[] args) throws Exception {
 
     Options options = new Options();
-    Option helpOpts = OptionBuilder.withArgName("help").withDescription(
-      "show this help message").create("help");
-    Option webgraphOpts = OptionBuilder.withArgName("webgraphdb").hasArg().withDescription(
-      "the web graph db to use").create("webgraphdb");
+    Option helpOpts = OptionBuilder.withArgName("help")
+        .withDescription("show this help message").create("help");
+    Option webgraphOpts = OptionBuilder.withArgName("webgraphdb").hasArg()
+        .withDescription("the web graph db to use").create("webgraphdb");
     options.addOption(helpOpts);
     options.addOption(webgraphOpts);
 
@@ -685,8 +688,7 @@
 
       analyze(new Path(webGraphDb));
       return 0;
-    }
-    catch (Exception e) {
+    } catch (Exception e) {
       LOG.error("LinkAnalysis: " + StringUtils.stringifyException(e));
       return -2;
     }
Index: src/java/org/apache/nutch/scoring/webgraph/LinkDumper.java
===================================================================
--- src/java/org/apache/nutch/scoring/webgraph/LinkDumper.java	(revision 1188252)
+++ src/java/org/apache/nutch/scoring/webgraph/LinkDumper.java	(working copy)
@@ -67,27 +67,24 @@
 
 /**
  * The LinkDumper tool creates a database of node to inlink information that can
- * be read using the nested Reader class.  This allows the inlink and scoring 
- * state of a single url to be reviewed quickly to determine why a given url is 
- * ranking a certain way.  This tool is to be used with the LinkRank analysis.
+ * be read using the nested Reader class. This allows the inlink and scoring
+ * state of a single url to be reviewed quickly to determine why a given url is
+ * ranking a certain way. This tool is to be used with the LinkRank analysis.
  */
-public class LinkDumper
-  extends Configured
-  implements Tool {
+public class LinkDumper extends Configured implements Tool {
 
   public static final Logger LOG = LoggerFactory.getLogger(LinkDumper.class);
   public static final String DUMP_DIR = "linkdump";
 
   /**
-   * Reader class which will print out the url and all of its inlinks to system 
-   * out.  Each inlinkwill be displayed with its node information including 
-   * score and number of in and outlinks.
+   * Reader class which will print out the url and all of its inlinks to system
+   * out. Each inlinkwill be displayed with its node information including score
+   * and number of in and outlinks.
    */
   public static class Reader {
 
-    public static void main(String[] args)
-      throws Exception {
-      
+    public static void main(String[] args) throws Exception {
+
       if (args == null || args.length < 2) {
         System.out.println("LinkDumper$Reader usage: <webgraphdb> <url>");
         return;
@@ -99,20 +96,20 @@
       Path webGraphDb = new Path(args[0]);
       String url = args[1];
       MapFile.Reader[] readers = MapFileOutputFormat.getReaders(fs, new Path(
-        webGraphDb, DUMP_DIR), conf);
+          webGraphDb, DUMP_DIR), conf);
 
       // get the link nodes for the url
       Text key = new Text(url);
       LinkNodes nodes = new LinkNodes();
       MapFileOutputFormat.getEntry(readers,
-        new HashPartitioner<Text, LinkNodes>(), key, nodes);
+          new HashPartitioner<Text, LinkNodes>(), key, nodes);
 
       // print out the link nodes
       LinkNode[] linkNodesAr = nodes.getLinks();
       System.out.println(url + ":");
       for (LinkNode node : linkNodesAr) {
         System.out.println("  " + node.getUrl() + " - "
-          + node.getNode().toString());
+            + node.getNode().toString());
       }
 
       // close the readers
@@ -123,8 +120,7 @@
   /**
    * Bean class which holds url to node information.
    */
-  public static class LinkNode
-    implements Writable {
+  public static class LinkNode implements Writable {
 
     private String url = null;
     private Node node = null;
@@ -154,15 +150,13 @@
       this.node = node;
     }
 
-    public void readFields(DataInput in)
-      throws IOException {
+    public void readFields(DataInput in) throws IOException {
       url = in.readUTF();
       node = new Node();
       node.readFields(in);
     }
 
-    public void write(DataOutput out)
-      throws IOException {
+    public void write(DataOutput out) throws IOException {
       out.writeUTF(url);
       node.write(out);
     }
@@ -172,8 +166,7 @@
   /**
    * Writable class which holds an array of LinkNode objects.
    */
-  public static class LinkNodes
-    implements Writable {
+  public static class LinkNodes implements Writable {
 
     private LinkNode[] links;
 
@@ -193,8 +186,7 @@
       this.links = links;
     }
 
-    public void readFields(DataInput in)
-      throws IOException {
+    public void readFields(DataInput in) throws IOException {
       int numLinks = in.readInt();
       if (numLinks > 0) {
         links = new LinkNode[numLinks];
@@ -206,8 +198,7 @@
       }
     }
 
-    public void write(DataOutput out)
-      throws IOException {
+    public void write(DataOutput out) throws IOException {
       if (links != null && links.length > 0) {
         int numLinks = links.length;
         out.writeInt(numLinks);
@@ -222,9 +213,9 @@
    * Inverts outlinks from the WebGraph to inlinks and attaches node
    * information.
    */
-  public static class Inverter
-    implements Mapper<Text, Writable, Text, ObjectWritable>,
-    Reducer<Text, ObjectWritable, Text, LinkNode> {
+  public static class Inverter implements
+      Mapper<Text, Writable, Text, ObjectWritable>,
+      Reducer<Text, ObjectWritable, Text, LinkNode> {
 
     private JobConf conf;
 
@@ -236,8 +227,8 @@
      * Wraps all values in ObjectWritables.
      */
     public void map(Text key, Writable value,
-      OutputCollector<Text, ObjectWritable> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, ObjectWritable> output, Reporter reporter)
+        throws IOException {
 
       ObjectWritable objWrite = new ObjectWritable();
       objWrite.set(value);
@@ -245,12 +236,12 @@
     }
 
     /**
-     * Inverts outlinks to inlinks while attaching node information to the 
+     * Inverts outlinks to inlinks while attaching node information to the
      * outlink.
      */
     public void reduce(Text key, Iterator<ObjectWritable> values,
-      OutputCollector<Text, LinkNode> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, LinkNode> output, Reporter reporter)
+        throws IOException {
 
       String fromUrl = key.toString();
       List<LinkDatum> outlinks = new ArrayList<LinkDatum>();
@@ -262,14 +253,12 @@
         ObjectWritable write = values.next();
         Object obj = write.get();
         if (obj instanceof Node) {
-          node = (Node)obj;
+          node = (Node) obj;
+        } else if (obj instanceof LinkDatum) {
+          outlinks.add((LinkDatum) WritableUtils.clone((LinkDatum) obj, conf));
+        } else if (obj instanceof LoopSet) {
+          loops = (LoopSet) obj;
         }
-        else if (obj instanceof LinkDatum) {
-          outlinks.add((LinkDatum)WritableUtils.clone((LinkDatum)obj, conf));
-        }
-        else if (obj instanceof LoopSet) {
-          loops = (LoopSet)obj;
-        }
       }
 
       // only collect if there are outlinks
@@ -280,13 +269,13 @@
         for (int i = 0; i < outlinks.size(); i++) {
           LinkDatum outlink = outlinks.get(i);
           String toUrl = outlink.getUrl();
-          
+
           // remove any url that is in the loopset, same as LinkRank
           if (loopSet != null && loopSet.contains(toUrl)) {
             continue;
           }
-          
-          // collect the outlink as an inlink with the node 
+
+          // collect the outlink as an inlink with the node
           output.collect(new Text(toUrl), new LinkNode(fromUrl, node));
         }
       }
@@ -297,11 +286,11 @@
   }
 
   /**
-   * Merges LinkNode objects into a single array value per url.  This allows 
-   * all values to be quickly retrieved and printed via the Reader tool.
+   * Merges LinkNode objects into a single array value per url. This allows all
+   * values to be quickly retrieved and printed via the Reader tool.
    */
-  public static class Merger
-    implements Reducer<Text, LinkNode, Text, LinkNodes> {
+  public static class Merger implements
+      Reducer<Text, LinkNode, Text, LinkNodes> {
 
     private JobConf conf;
     private int maxInlinks = 50000;
@@ -314,8 +303,8 @@
      * Aggregate all LinkNode objects for a given url.
      */
     public void reduce(Text key, Iterator<LinkNode> values,
-      OutputCollector<Text, LinkNodes> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, LinkNodes> output, Reporter reporter)
+        throws IOException {
 
       List<LinkNode> nodeList = new ArrayList<LinkNode>();
       int numNodes = 0;
@@ -323,10 +312,9 @@
       while (values.hasNext()) {
         LinkNode cur = values.next();
         if (numNodes < maxInlinks) {
-          nodeList.add((LinkNode)WritableUtils.clone(cur, conf));
+          nodeList.add((LinkNode) WritableUtils.clone(cur, conf));
           numNodes++;
-        }
-        else {
+        } else {
           break;
         }
       }
@@ -342,11 +330,10 @@
   }
 
   /**
-   * Runs the inverter and merger jobs of the LinkDumper tool to create the 
-   * url to inlink node database.
+   * Runs the inverter and merger jobs of the LinkDumper tool to create the url
+   * to inlink node database.
    */
-  public void dumpLinks(Path webGraphDb)
-    throws IOException {
+  public void dumpLinks(Path webGraphDb) throws IOException {
 
     SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
     long start = System.currentTimeMillis();
@@ -362,7 +349,7 @@
 
     // run the inverter job
     Path tempInverted = new Path(webGraphDb, "inverted-"
-      + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
+        + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
     JobConf inverter = new NutchJob(conf);
     inverter.setJobName("LinkDumper: inverter");
     FileInputFormat.addInputPath(inverter, nodeDb);
@@ -384,8 +371,7 @@
       LOG.info("LinkDumper: running inverter");
       JobClient.runJob(inverter);
       LOG.info("LinkDumper: finished inverter");
-    }
-    catch (IOException e) {
+    } catch (IOException e) {
       LOG.error(StringUtils.stringifyException(e));
       throw e;
     }
@@ -407,36 +393,34 @@
       LOG.info("LinkDumper: running merger");
       JobClient.runJob(merger);
       LOG.info("LinkDumper: finished merger");
-    }
-    catch (IOException e) {
+    } catch (IOException e) {
       LOG.error(StringUtils.stringifyException(e));
       throw e;
     }
 
     fs.delete(tempInverted, true);
     long end = System.currentTimeMillis();
-    LOG.info("LinkDumper: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    LOG.info("LinkDumper: finished at " + sdf.format(end) + ", elapsed: "
+        + TimingUtil.elapsedTime(start, end));
   }
 
-  public static void main(String[] args)
-    throws Exception {
+  public static void main(String[] args) throws Exception {
     int res = ToolRunner.run(NutchConfiguration.create(), new LinkDumper(),
-      args);
+        args);
     System.exit(res);
   }
 
   /**
-   * Runs the LinkDumper tool.  This simply creates the database, to read the
+   * Runs the LinkDumper tool. This simply creates the database, to read the
    * values the nested Reader tool must be used.
    */
-  public int run(String[] args)
-    throws Exception {
+  public int run(String[] args) throws Exception {
 
     Options options = new Options();
-    Option helpOpts = OptionBuilder.withArgName("help").withDescription(
-      "show this help message").create("help");
+    Option helpOpts = OptionBuilder.withArgName("help")
+        .withDescription("show this help message").create("help");
     Option webGraphDbOpts = OptionBuilder.withArgName("webgraphdb").hasArg()
-      .withDescription("the web graph database to use").create("webgraphdb");
+        .withDescription("the web graph database to use").create("webgraphdb");
     options.addOption(helpOpts);
     options.addOption(webGraphDbOpts);
     CommandLineParser parser = new GnuParser();
@@ -452,8 +436,7 @@
       String webGraphDb = line.getOptionValue("webgraphdb");
       dumpLinks(new Path(webGraphDb));
       return 0;
-    }
-    catch (Exception e) {
+    } catch (Exception e) {
       LOG.error("LinkDumper: " + StringUtils.stringifyException(e));
       return -2;
     }
Index: src/java/org/apache/nutch/scoring/webgraph/LoopReader.java
===================================================================
--- src/java/org/apache/nutch/scoring/webgraph/LoopReader.java	(revision 1188252)
+++ src/java/org/apache/nutch/scoring/webgraph/LoopReader.java	(working copy)
@@ -44,37 +44,39 @@
 
   private FileSystem fs;
   private MapFile.Reader[] loopReaders;
-  
+
   public LoopReader() {
-    
+
   }
-  
+
   public LoopReader(Configuration conf) {
     super(conf);
   }
 
   /**
-   * Prints loopset for a single url.  The loopset information will show any
+   * Prints loopset for a single url. The loopset information will show any
    * outlink url the eventually forms a link cycle.
    * 
-   * @param webGraphDb The WebGraph to check for loops
-   * @param url The url to check.
+   * @param webGraphDb
+   *          The WebGraph to check for loops
+   * @param url
+   *          The url to check.
    * 
-   * @throws IOException If an error occurs while printing loopset information.
+   * @throws IOException
+   *           If an error occurs while printing loopset information.
    */
-  public void dumpUrl(Path webGraphDb, String url)
-    throws IOException {
+  public void dumpUrl(Path webGraphDb, String url) throws IOException {
 
     // open the readers
     fs = FileSystem.get(getConf());
     loopReaders = MapFileOutputFormat.getReaders(fs, new Path(webGraphDb,
-      Loops.LOOPS_DIR), getConf());
+        Loops.LOOPS_DIR), getConf());
 
     // get the loopset for a given url, if any
     Text key = new Text(url);
     LoopSet loop = new LoopSet();
     MapFileOutputFormat.getEntry(loopReaders,
-      new HashPartitioner<Text, LoopSet>(), key, loop);
+        new HashPartitioner<Text, LoopSet>(), key, loop);
 
     // print out each loop url in the set
     System.out.println(url + ":");
@@ -87,19 +89,18 @@
   }
 
   /**
-   * Runs the LoopReader tool.  For this tool to work the loops job must have
+   * Runs the LoopReader tool. For this tool to work the loops job must have
    * already been run on the corresponding WebGraph.
    */
-  public static void main(String[] args)
-    throws Exception {
+  public static void main(String[] args) throws Exception {
 
     Options options = new Options();
-    Option helpOpts = OptionBuilder.withArgName("help").withDescription(
-      "show this help message").create("help");
+    Option helpOpts = OptionBuilder.withArgName("help")
+        .withDescription("show this help message").create("help");
     Option webGraphOpts = OptionBuilder.withArgName("webgraphdb").hasArg()
-      .withDescription("the webgraphdb to use").create("webgraphdb");
+        .withDescription("the webgraphdb to use").create("webgraphdb");
     Option urlOpts = OptionBuilder.withArgName("url").hasOptionalArg()
-      .withDescription("the url to dump").create("url");
+        .withDescription("the url to dump").create("url");
     options.addOption(helpOpts);
     options.addOption(webGraphOpts);
     options.addOption(urlOpts);
@@ -109,7 +110,7 @@
 
       CommandLine line = parser.parse(options, args);
       if (line.hasOption("help") || !line.hasOption("webgraphdb")
-        || !line.hasOption("url")) {
+          || !line.hasOption("url")) {
         HelpFormatter formatter = new HelpFormatter();
         formatter.printHelp("WebGraphReader", options);
         return;
@@ -120,8 +121,7 @@
       LoopReader reader = new LoopReader(NutchConfiguration.create());
       reader.dumpUrl(new Path(webGraphDb), url);
       return;
-    }
-    catch (Exception e) {
+    } catch (Exception e) {
       e.printStackTrace();
       return;
     }
Index: src/java/org/apache/nutch/scoring/webgraph/Loops.java
===================================================================
--- src/java/org/apache/nutch/scoring/webgraph/Loops.java	(revision 1188252)
+++ src/java/org/apache/nutch/scoring/webgraph/Loops.java	(working copy)
@@ -76,9 +76,7 @@
  * rather small. Because of this the Loops job is optional and if it doesn't
  * exist then it won't be factored into the LinkRank program.
  */
-public class Loops
-  extends Configured
-  implements Tool {
+public class Loops extends Configured implements Tool {
 
   public static final Logger LOG = LoggerFactory.getLogger(Loops.class);
   public static final String LOOPS_DIR = "loops";
@@ -87,8 +85,7 @@
   /**
    * A link path or route looking to identify a link cycle.
    */
-  public static class Route
-    implements Writable {
+  public static class Route implements Writable {
 
     private String outlinkUrl = null;
     private String lookingFor = null;
@@ -122,16 +119,14 @@
       this.found = found;
     }
 
-    public void readFields(DataInput in)
-      throws IOException {
+    public void readFields(DataInput in) throws IOException {
 
       outlinkUrl = Text.readString(in);
       lookingFor = Text.readString(in);
       found = in.readBoolean();
     }
 
-    public void write(DataOutput out)
-      throws IOException {
+    public void write(DataOutput out) throws IOException {
       Text.writeString(out, outlinkUrl);
       Text.writeString(out, lookingFor);
       out.writeBoolean(found);
@@ -141,8 +136,7 @@
   /**
    * A set of loops.
    */
-  public static class LoopSet
-    implements Writable {
+  public static class LoopSet implements Writable {
 
     private Set<String> loopSet = new HashSet<String>();
 
@@ -158,8 +152,7 @@
       this.loopSet = loopSet;
     }
 
-    public void readFields(DataInput in)
-      throws IOException {
+    public void readFields(DataInput in) throws IOException {
 
       int numNodes = in.readInt();
       loopSet = new HashSet<String>();
@@ -169,8 +162,7 @@
       }
     }
 
-    public void write(DataOutput out)
-      throws IOException {
+    public void write(DataOutput out) throws IOException {
 
       int numNodes = (loopSet != null ? loopSet.size() : 0);
       out.writeInt(numNodes);
@@ -191,10 +183,9 @@
   /**
    * Initializes the Loop routes.
    */
-  public static class Initializer
-    extends Configured
-    implements Mapper<Text, Writable, Text, ObjectWritable>,
-    Reducer<Text, ObjectWritable, Text, Route> {
+  public static class Initializer extends Configured implements
+      Mapper<Text, Writable, Text, ObjectWritable>,
+      Reducer<Text, ObjectWritable, Text, Route> {
 
     private JobConf conf;
 
@@ -222,8 +213,8 @@
      * Wraps values in ObjectWritable.
      */
     public void map(Text key, Writable value,
-      OutputCollector<Text, ObjectWritable> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, ObjectWritable> output, Reporter reporter)
+        throws IOException {
 
       ObjectWritable objWrite = new ObjectWritable();
       objWrite.set(value);
@@ -236,8 +227,8 @@
      * the Looper job.
      */
     public void reduce(Text key, Iterator<ObjectWritable> values,
-      OutputCollector<Text, Route> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, Route> output, Reporter reporter)
+        throws IOException {
 
       String url = key.toString();
       Node node = null;
@@ -248,11 +239,10 @@
         ObjectWritable objWrite = values.next();
         Object obj = objWrite.get();
         if (obj instanceof LinkDatum) {
-          outlinkList.add((LinkDatum)obj);
+          outlinkList.add((LinkDatum) obj);
+        } else if (obj instanceof Node) {
+          node = (Node) obj;
         }
-        else if (obj instanceof Node) {
-          node = (Node)obj;
-        }
       }
 
       // has to have inlinks otherwise cycle not possible
@@ -282,10 +272,9 @@
    * Follows a route path looking for the start url of the route. If the start
    * url is found then the route is a cyclical path.
    */
-  public static class Looper
-    extends Configured
-    implements Mapper<Text, Writable, Text, ObjectWritable>,
-    Reducer<Text, ObjectWritable, Text, Route> {
+  public static class Looper extends Configured implements
+      Mapper<Text, Writable, Text, ObjectWritable>,
+      Reducer<Text, ObjectWritable, Text, Route> {
 
     private JobConf conf;
     private boolean last = false;
@@ -315,15 +304,14 @@
      * Wrap values in ObjectWritable.
      */
     public void map(Text key, Writable value,
-      OutputCollector<Text, ObjectWritable> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, ObjectWritable> output, Reporter reporter)
+        throws IOException {
 
       ObjectWritable objWrite = new ObjectWritable();
       Writable cloned = null;
       if (value instanceof LinkDatum) {
-        cloned = new Text(((LinkDatum)value).getUrl());
-      }
-      else {
+        cloned = new Text(((LinkDatum) value).getUrl());
+      } else {
         cloned = WritableUtils.clone(value, conf);
       }
       objWrite.set(cloned);
@@ -336,8 +324,8 @@
      * passes.
      */
     public void reduce(Text key, Iterator<ObjectWritable> values,
-      OutputCollector<Text, Route> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, Route> output, Reporter reporter)
+        throws IOException {
 
       List<Route> routeList = new ArrayList<Route>();
       Set<String> outlinkUrls = new LinkedHashSet<String>();
@@ -348,10 +336,9 @@
         ObjectWritable next = values.next();
         Object value = next.get();
         if (value instanceof Route) {
-          routeList.add((Route)WritableUtils.clone((Route)value, conf));
-        }
-        else if (value instanceof Text) {
-          String outlinkUrl = ((Text)value).toString();
+          routeList.add((Route) WritableUtils.clone((Route) value, conf));
+        } else if (value instanceof Text) {
+          String outlinkUrl = ((Text) value).toString();
           if (!outlinkUrls.contains(outlinkUrl)) {
             outlinkUrls.add(outlinkUrl);
           }
@@ -375,16 +362,14 @@
         routeIt.remove();
         if (route.isFound()) {
           output.collect(key, route);
-        }
-        else {
+        } else {
 
           // if the route start url is found, set route to found and collect
           String lookingFor = route.getLookingFor();
           if (outlinkUrls.contains(lookingFor)) {
             route.setFound(true);
             output.collect(key, route);
-          }
-          else if (!last) {
+          } else if (!last) {
 
             // setup for next pass through the loop
             for (String outlink : outlinkUrls) {
@@ -402,10 +387,8 @@
   /**
    * Finishes the Loops job by aggregating and collecting and found routes.
    */
-  public static class Finalizer
-    extends Configured
-    implements Mapper<Text, Route, Text, Route>,
-    Reducer<Text, Route, Text, LoopSet> {
+  public static class Finalizer extends Configured implements
+      Mapper<Text, Route, Text, Route>, Reducer<Text, Route, Text, LoopSet> {
 
     private JobConf conf;
 
@@ -433,8 +416,7 @@
      * Maps out and found routes, those will be the link cycles.
      */
     public void map(Text key, Route value, OutputCollector<Text, Route> output,
-      Reporter reporter)
-      throws IOException {
+        Reporter reporter) throws IOException {
 
       if (value.isFound()) {
         String lookingFor = value.getLookingFor();
@@ -443,12 +425,12 @@
     }
 
     /**
-     * Aggregates all found routes for a given start url into a loopset and 
+     * Aggregates all found routes for a given start url into a loopset and
      * collects the loopset.
      */
     public void reduce(Text key, Iterator<Route> values,
-      OutputCollector<Text, LoopSet> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, LoopSet> output, Reporter reporter)
+        throws IOException {
 
       LoopSet loops = new LoopSet();
       while (values.hasNext()) {
@@ -465,8 +447,7 @@
   /**
    * Runs the various loop jobs.
    */
-  public void findLoops(Path webGraphDb)
-    throws IOException {
+  public void findLoops(Path webGraphDb) throws IOException {
 
     SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
     long start = System.currentTimeMillis();
@@ -481,7 +462,7 @@
     Path nodeDb = new Path(webGraphDb, WebGraph.NODE_DIR);
     Path routes = new Path(webGraphDb, ROUTES_DIR);
     Path tempRoute = new Path(webGraphDb, ROUTES_DIR + "-"
-      + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
+        + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
 
     // run the initializer
     JobConf init = new NutchJob(conf);
@@ -504,8 +485,7 @@
       LOG.info("Loops: installing initializer " + routes);
       FSUtils.replace(fs, routes, tempRoute, true);
       LOG.info("Loops: finished initializer");
-    }
-    catch (IOException e) {
+    } catch (IOException e) {
       LOG.error(StringUtils.stringifyException(e));
       throw e;
     }
@@ -536,8 +516,7 @@
         LOG.info("Loops: installing looper " + routes);
         FSUtils.replace(fs, routes, tempRoute, true);
         LOG.info("Loops: finished looper");
-      }
-      catch (IOException e) {
+      } catch (IOException e) {
         LOG.error(StringUtils.stringifyException(e));
         throw e;
       }
@@ -561,17 +540,16 @@
       LOG.info("Loops: starting finalizer");
       JobClient.runJob(finalizer);
       LOG.info("Loops: finished finalizer");
-    }
-    catch (IOException e) {
+    } catch (IOException e) {
       LOG.error(StringUtils.stringifyException(e));
       throw e;
     }
     long end = System.currentTimeMillis();
-    LOG.info("Loops: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    LOG.info("Loops: finished at " + sdf.format(end) + ", elapsed: "
+        + TimingUtil.elapsedTime(start, end));
   }
 
-  public static void main(String[] args)
-    throws Exception {
+  public static void main(String[] args) throws Exception {
     int res = ToolRunner.run(NutchConfiguration.create(), new Loops(), args);
     System.exit(res);
   }
@@ -579,14 +557,13 @@
   /**
    * Runs the Loops tool.
    */
-  public int run(String[] args)
-    throws Exception {
+  public int run(String[] args) throws Exception {
 
     Options options = new Options();
-    Option helpOpts = OptionBuilder.withArgName("help").withDescription(
-      "show this help message").create("help");
-    Option webGraphDbOpts = OptionBuilder.withArgName("webgraphdb").hasArg().withDescription(
-      "the web graph database to use").create("webgraphdb");
+    Option helpOpts = OptionBuilder.withArgName("help")
+        .withDescription("show this help message").create("help");
+    Option webGraphDbOpts = OptionBuilder.withArgName("webgraphdb").hasArg()
+        .withDescription("the web graph database to use").create("webgraphdb");
     options.addOption(helpOpts);
     options.addOption(webGraphDbOpts);
 
@@ -603,8 +580,7 @@
       String webGraphDb = line.getOptionValue("webgraphdb");
       findLoops(new Path(webGraphDb));
       return 0;
-    }
-    catch (Exception e) {
+    } catch (Exception e) {
       LOG.error("Loops: " + StringUtils.stringifyException(e));
       return -2;
     }
Index: src/java/org/apache/nutch/scoring/ScoringFilterException.java
===================================================================
--- src/java/org/apache/nutch/scoring/ScoringFilterException.java	(revision 1188252)
+++ src/java/org/apache/nutch/scoring/ScoringFilterException.java	(working copy)
@@ -1,19 +1,19 @@
 /*
-* Licensed to the Apache Software Foundation (ASF) under one or more
-* contributor license agreements.  See the NOTICE file distributed with
-* this work for additional information regarding copyright ownership.
-* The ASF licenses this file to You under the Apache License, Version 2.0
-* (the "License"); you may not use this file except in compliance with
-* the License.  You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 package org.apache.nutch.scoring;
 
 /**
Index: src/java/org/apache/nutch/scoring/ScoringFilter.java
===================================================================
--- src/java/org/apache/nutch/scoring/ScoringFilter.java	(revision 1188252)
+++ src/java/org/apache/nutch/scoring/ScoringFilter.java	(working copy)
@@ -33,129 +33,181 @@
 /**
  * A contract defining behavior of scoring plugins.
  * 
- * A scoring filter will manipulate scoring variables in CrawlDatum and
- * in resulting search indexes. Filters can be chained in a specific order,
- * to provide multi-stage scoring adjustments.
+ * A scoring filter will manipulate scoring variables in CrawlDatum and in
+ * resulting search indexes. Filters can be chained in a specific order, to
+ * provide multi-stage scoring adjustments.
  * 
  * @author Andrzej Bialecki
  */
 public interface ScoringFilter extends Configurable, Pluggable {
   /** The name of the extension point. */
   public final static String X_POINT_ID = ScoringFilter.class.getName();
-  
+
   /**
    * Set an initial score for newly injected pages. Note: newly injected pages
-   * may have no inlinks, so filter implementations may wish to set this 
-   * score to a non-zero value, to give newly injected pages some initial
-   * credit.
-   * @param url url of the page
-   * @param datum new datum. Filters will modify it in-place.
+   * may have no inlinks, so filter implementations may wish to set this score
+   * to a non-zero value, to give newly injected pages some initial credit.
+   * 
+   * @param url
+   *          url of the page
+   * @param datum
+   *          new datum. Filters will modify it in-place.
    * @throws ScoringFilterException
    */
-  public void injectedScore(Text url, CrawlDatum datum) throws ScoringFilterException;
-  
+  public void injectedScore(Text url, CrawlDatum datum)
+      throws ScoringFilterException;
+
   /**
-   * Set an initial score for newly discovered pages. Note: newly discovered pages
-   * have at least one inlink with its score contribution, so filter implementations
-   * may choose to set initial score to zero (unknown value), and then the inlink
-   * score contribution will set the "real" value of the new page.
-   * @param url url of the page
-   * @param datum new datum. Filters will modify it in-place.
+   * Set an initial score for newly discovered pages. Note: newly discovered
+   * pages have at least one inlink with its score contribution, so filter
+   * implementations may choose to set initial score to zero (unknown value),
+   * and then the inlink score contribution will set the "real" value of the new
+   * page.
+   * 
+   * @param url
+   *          url of the page
+   * @param datum
+   *          new datum. Filters will modify it in-place.
    * @throws ScoringFilterException
    */
-  public void initialScore(Text url, CrawlDatum datum) throws ScoringFilterException;
-  
+  public void initialScore(Text url, CrawlDatum datum)
+      throws ScoringFilterException;
+
   /**
-   * This method prepares a sort value for the purpose of sorting and
-   * selecting top N scoring pages during fetchlist generation.
-   * @param url url of the page
-   * @param datum page's datum, should not be modified
-   * @param initSort initial sort value, or a value from previous filters in chain
+   * This method prepares a sort value for the purpose of sorting and selecting
+   * top N scoring pages during fetchlist generation.
+   * 
+   * @param url
+   *          url of the page
+   * @param datum
+   *          page's datum, should not be modified
+   * @param initSort
+   *          initial sort value, or a value from previous filters in chain
    */
-  public float generatorSortValue(Text url, CrawlDatum datum, float initSort) throws ScoringFilterException;
-  
+  public float generatorSortValue(Text url, CrawlDatum datum, float initSort)
+      throws ScoringFilterException;
+
   /**
    * This method takes all relevant score information from the current datum
    * (coming from a generated fetchlist) and stores it into
-   * {@link org.apache.nutch.protocol.Content} metadata.
-   * This is needed in order to pass this value(s) to the mechanism that distributes it
-   * to outlinked pages.
-   * @param url url of the page
-   * @param datum source datum. NOTE: modifications to this value are not persisted.
-   * @param content instance of content. Implementations may modify this
-   * in-place, primarily by setting some metadata properties.
+   * {@link org.apache.nutch.protocol.Content} metadata. This is needed in order
+   * to pass this value(s) to the mechanism that distributes it to outlinked
+   * pages.
+   * 
+   * @param url
+   *          url of the page
+   * @param datum
+   *          source datum. NOTE: modifications to this value are not persisted.
+   * @param content
+   *          instance of content. Implementations may modify this in-place,
+   *          primarily by setting some metadata properties.
    */
-  public void passScoreBeforeParsing(Text url, CrawlDatum datum, Content content) throws ScoringFilterException;
-  
+  public void passScoreBeforeParsing(Text url, CrawlDatum datum, Content content)
+      throws ScoringFilterException;
+
   /**
    * Currently a part of score distribution is performed using only data coming
    * from the parsing process. We need this method in order to ensure the
    * presence of score data in these steps.
-   * @param url page url
-   * @param content original content. NOTE: modifications to this value are not persisted.
-   * @param parse target instance to copy the score information to. Implementations
-   * may modify this in-place, primarily by setting some metadata properties.
+   * 
+   * @param url
+   *          page url
+   * @param content
+   *          original content. NOTE: modifications to this value are not
+   *          persisted.
+   * @param parse
+   *          target instance to copy the score information to. Implementations
+   *          may modify this in-place, primarily by setting some metadata
+   *          properties.
    */
-  public void passScoreAfterParsing(Text url, Content content, Parse parse) throws ScoringFilterException;
-  
+  public void passScoreAfterParsing(Text url, Content content, Parse parse)
+      throws ScoringFilterException;
+
   /**
    * Distribute score value from the current page to all its outlinked pages.
-   * @param fromUrl url of the source page
-   * @param parseData ParseData instance, which stores relevant score value(s)
-   * in its metadata. NOTE: filters may modify this in-place, all changes will
-   * be persisted.
-   * @param targets &lt;url, CrawlDatum&gt; pairs. NOTE: filters can modify this in-place,
-   * all changes will be persisted.
-   * @param adjust a CrawlDatum instance, initially null, which implementations
-   * may use to pass adjustment values to the original CrawlDatum. When creating
-   * this instance, set its status to {@link CrawlDatum#STATUS_LINKED}.
-   * @param allCount number of all collected outlinks from the source page
+   * 
+   * @param fromUrl
+   *          url of the source page
+   * @param parseData
+   *          ParseData instance, which stores relevant score value(s) in its
+   *          metadata. NOTE: filters may modify this in-place, all changes will
+   *          be persisted.
+   * @param targets
+   *          &lt;url, CrawlDatum&gt; pairs. NOTE: filters can modify this
+   *          in-place, all changes will be persisted.
+   * @param adjust
+   *          a CrawlDatum instance, initially null, which implementations may
+   *          use to pass adjustment values to the original CrawlDatum. When
+   *          creating this instance, set its status to
+   *          {@link CrawlDatum#STATUS_LINKED}.
+   * @param allCount
+   *          number of all collected outlinks from the source page
    * @return if needed, implementations may return an instance of CrawlDatum,
-   * with status {@link CrawlDatum#STATUS_LINKED}, which contains adjustments
-   * to be applied to the original CrawlDatum score(s) and metadata. This can
-   * be null if not needed.
+   *         with status {@link CrawlDatum#STATUS_LINKED}, which contains
+   *         adjustments to be applied to the original CrawlDatum score(s) and
+   *         metadata. This can be null if not needed.
    * @throws ScoringFilterException
    */
-  public CrawlDatum distributeScoreToOutlinks(Text fromUrl, ParseData parseData, 
-          Collection<Entry<Text, CrawlDatum>> targets, CrawlDatum adjust,
-          int allCount) throws ScoringFilterException;
+  public CrawlDatum distributeScoreToOutlinks(Text fromUrl,
+      ParseData parseData, Collection<Entry<Text, CrawlDatum>> targets,
+      CrawlDatum adjust, int allCount) throws ScoringFilterException;
 
   /**
-   * This method calculates a new score of CrawlDatum during CrawlDb update, based on the
-   * initial value of the original CrawlDatum, and also score values contributed by
-   * inlinked pages.
-   * @param url url of the page
-   * @param old original datum, with original score. May be null if this is a newly
-   * discovered page. If not null, filters should use score values from this parameter
-   * as the starting values - the <code>datum</code> parameter may contain values that are
-   * no longer valid, if other updates occured between generation and this update.
-   * @param datum the new datum, with the original score saved at the time when
-   * fetchlist was generated. Filters should update this in-place, and it will be saved in
-   * the crawldb.
-   * @param inlinked (partial) list of CrawlDatum-s (with their scores) from
-   * links pointing to this page, found in the current update batch.
+   * This method calculates a new score of CrawlDatum during CrawlDb update,
+   * based on the initial value of the original CrawlDatum, and also score
+   * values contributed by inlinked pages.
+   * 
+   * @param url
+   *          url of the page
+   * @param old
+   *          original datum, with original score. May be null if this is a
+   *          newly discovered page. If not null, filters should use score
+   *          values from this parameter as the starting values - the
+   *          <code>datum</code> parameter may contain values that are no longer
+   *          valid, if other updates occured between generation and this
+   *          update.
+   * @param datum
+   *          the new datum, with the original score saved at the time when
+   *          fetchlist was generated. Filters should update this in-place, and
+   *          it will be saved in the crawldb.
+   * @param inlinked
+   *          (partial) list of CrawlDatum-s (with their scores) from links
+   *          pointing to this page, found in the current update batch.
    * @throws ScoringFilterException
    */
-  public void updateDbScore(Text url, CrawlDatum old, CrawlDatum datum, List<CrawlDatum> inlinked) throws ScoringFilterException;
-  
+  public void updateDbScore(Text url, CrawlDatum old, CrawlDatum datum,
+      List<CrawlDatum> inlinked) throws ScoringFilterException;
+
   /**
    * This method calculates a Lucene document boost.
-   * @param url url of the page
-   * @param doc Lucene document. NOTE: this already contains all information collected
-   * by indexing filters. Implementations may modify this instance, in order to store/remove
-   * some information.
-   * @param dbDatum current page from CrawlDb. NOTE: changes made to this instance
-   * are not persisted.
-   * @param fetchDatum datum from FetcherOutput (containing among others the fetching status)
-   * @param parse parsing result. NOTE: changes made to this instance are not persisted.
-   * @param inlinks current inlinks from LinkDb. NOTE: changes made to this instance are
-   * not persisted.
-   * @param initScore initial boost value for the Lucene document.
-   * @return boost value for the Lucene document. This value is passed as an argument
-   * to the next scoring filter in chain. NOTE: implementations may also express
-   * other scoring strategies by modifying Lucene document directly.
+   * 
+   * @param url
+   *          url of the page
+   * @param doc
+   *          Lucene document. NOTE: this already contains all information
+   *          collected by indexing filters. Implementations may modify this
+   *          instance, in order to store/remove some information.
+   * @param dbDatum
+   *          current page from CrawlDb. NOTE: changes made to this instance are
+   *          not persisted.
+   * @param fetchDatum
+   *          datum from FetcherOutput (containing among others the fetching
+   *          status)
+   * @param parse
+   *          parsing result. NOTE: changes made to this instance are not
+   *          persisted.
+   * @param inlinks
+   *          current inlinks from LinkDb. NOTE: changes made to this instance
+   *          are not persisted.
+   * @param initScore
+   *          initial boost value for the Lucene document.
+   * @return boost value for the Lucene document. This value is passed as an
+   *         argument to the next scoring filter in chain. NOTE: implementations
+   *         may also express other scoring strategies by modifying Lucene
+   *         document directly.
    * @throws ScoringFilterException
    */
   public float indexerScore(Text url, NutchDocument doc, CrawlDatum dbDatum,
-          CrawlDatum fetchDatum, Parse parse, Inlinks inlinks, float initScore) throws ScoringFilterException;
+      CrawlDatum fetchDatum, Parse parse, Inlinks inlinks, float initScore)
+      throws ScoringFilterException;
 }
Index: src/java/org/apache/nutch/scoring/ScoringFilters.java
===================================================================
--- src/java/org/apache/nutch/scoring/ScoringFilters.java	(revision 1188252)
+++ src/java/org/apache/nutch/scoring/ScoringFilters.java	(working copy)
@@ -51,7 +51,8 @@
     super(conf);
     ObjectCache objectCache = ObjectCache.get(conf);
     String order = conf.get("scoring.filter.order");
-    this.filters = (ScoringFilter[]) objectCache.getObject(ScoringFilter.class.getName());
+    this.filters = (ScoringFilter[]) objectCache.getObject(ScoringFilter.class
+        .getName());
 
     if (this.filters == null) {
       String[] orderedFilters = null;
@@ -60,20 +61,23 @@
       }
 
       try {
-        ExtensionPoint point = PluginRepository.get(conf).getExtensionPoint(ScoringFilter.X_POINT_ID);
-        if (point == null) throw new RuntimeException(ScoringFilter.X_POINT_ID + " not found.");
+        ExtensionPoint point = PluginRepository.get(conf).getExtensionPoint(
+            ScoringFilter.X_POINT_ID);
+        if (point == null)
+          throw new RuntimeException(ScoringFilter.X_POINT_ID + " not found.");
         Extension[] extensions = point.getExtensions();
-        HashMap<String, ScoringFilter> filterMap =
-          new HashMap<String, ScoringFilter>();
+        HashMap<String, ScoringFilter> filterMap = new HashMap<String, ScoringFilter>();
         for (int i = 0; i < extensions.length; i++) {
           Extension extension = extensions[i];
-          ScoringFilter filter = (ScoringFilter) extension.getExtensionInstance();
+          ScoringFilter filter = (ScoringFilter) extension
+              .getExtensionInstance();
           if (!filterMap.containsKey(filter.getClass().getName())) {
             filterMap.put(filter.getClass().getName(), filter);
           }
         }
         if (orderedFilters == null) {
-          objectCache.setObject(ScoringFilter.class.getName(), filterMap.values().toArray(new ScoringFilter[0]));
+          objectCache.setObject(ScoringFilter.class.getName(), filterMap
+              .values().toArray(new ScoringFilter[0]));
         } else {
           ScoringFilter[] filter = new ScoringFilter[orderedFilters.length];
           for (int i = 0; i < orderedFilters.length; i++) {
@@ -84,12 +88,14 @@
       } catch (PluginRuntimeException e) {
         throw new RuntimeException(e);
       }
-      this.filters = (ScoringFilter[]) objectCache.getObject(ScoringFilter.class.getName());
+      this.filters = (ScoringFilter[]) objectCache
+          .getObject(ScoringFilter.class.getName());
     }
   }
 
   /** Calculate a sort value for Generate. */
-  public float generatorSortValue(Text url, CrawlDatum datum, float initSort) throws ScoringFilterException {
+  public float generatorSortValue(Text url, CrawlDatum datum, float initSort)
+      throws ScoringFilterException {
     for (int i = 0; i < this.filters.length; i++) {
       initSort = this.filters[i].generatorSortValue(url, datum, initSort);
     }
@@ -97,48 +103,59 @@
   }
 
   /** Calculate a new initial score, used when adding newly discovered pages. */
-  public void initialScore(Text url, CrawlDatum datum) throws ScoringFilterException {
+  public void initialScore(Text url, CrawlDatum datum)
+      throws ScoringFilterException {
     for (int i = 0; i < this.filters.length; i++) {
       this.filters[i].initialScore(url, datum);
     }
   }
 
   /** Calculate a new initial score, used when injecting new pages. */
-  public void injectedScore(Text url, CrawlDatum datum) throws ScoringFilterException {
+  public void injectedScore(Text url, CrawlDatum datum)
+      throws ScoringFilterException {
     for (int i = 0; i < this.filters.length; i++) {
       this.filters[i].injectedScore(url, datum);
     }
   }
 
   /** Calculate updated page score during CrawlDb.update(). */
-  public void updateDbScore(Text url, CrawlDatum old, CrawlDatum datum, List<CrawlDatum> inlinked) throws ScoringFilterException {
+  public void updateDbScore(Text url, CrawlDatum old, CrawlDatum datum,
+      List<CrawlDatum> inlinked) throws ScoringFilterException {
     for (int i = 0; i < this.filters.length; i++) {
       this.filters[i].updateDbScore(url, old, datum, inlinked);
     }
   }
 
-  public void passScoreBeforeParsing(Text url, CrawlDatum datum, Content content) throws ScoringFilterException {
+  public void passScoreBeforeParsing(Text url, CrawlDatum datum, Content content)
+      throws ScoringFilterException {
     for (int i = 0; i < this.filters.length; i++) {
       this.filters[i].passScoreBeforeParsing(url, datum, content);
     }
   }
-  
-  public void passScoreAfterParsing(Text url, Content content, Parse parse) throws ScoringFilterException {
+
+  public void passScoreAfterParsing(Text url, Content content, Parse parse)
+      throws ScoringFilterException {
     for (int i = 0; i < this.filters.length; i++) {
       this.filters[i].passScoreAfterParsing(url, content, parse);
     }
   }
-  
-  public CrawlDatum distributeScoreToOutlinks(Text fromUrl, ParseData parseData, Collection<Entry<Text, CrawlDatum>> targets, CrawlDatum adjust, int allCount) throws ScoringFilterException {
+
+  public CrawlDatum distributeScoreToOutlinks(Text fromUrl,
+      ParseData parseData, Collection<Entry<Text, CrawlDatum>> targets,
+      CrawlDatum adjust, int allCount) throws ScoringFilterException {
     for (int i = 0; i < this.filters.length; i++) {
-      adjust = this.filters[i].distributeScoreToOutlinks(fromUrl, parseData, targets, adjust, allCount);
+      adjust = this.filters[i].distributeScoreToOutlinks(fromUrl, parseData,
+          targets, adjust, allCount);
     }
     return adjust;
   }
 
-  public float indexerScore(Text url, NutchDocument doc, CrawlDatum dbDatum, CrawlDatum fetchDatum, Parse parse, Inlinks inlinks, float initScore) throws ScoringFilterException {
+  public float indexerScore(Text url, NutchDocument doc, CrawlDatum dbDatum,
+      CrawlDatum fetchDatum, Parse parse, Inlinks inlinks, float initScore)
+      throws ScoringFilterException {
     for (int i = 0; i < this.filters.length; i++) {
-      initScore = this.filters[i].indexerScore(url, doc, dbDatum, fetchDatum, parse, inlinks, initScore);
+      initScore = this.filters[i].indexerScore(url, doc, dbDatum, fetchDatum,
+          parse, inlinks, initScore);
     }
     return initScore;
   }
Index: src/java/org/apache/nutch/net/protocols/ProtocolException.java
===================================================================
--- src/java/org/apache/nutch/net/protocols/ProtocolException.java	(revision 1188252)
+++ src/java/org/apache/nutch/net/protocols/ProtocolException.java	(working copy)
@@ -21,12 +21,12 @@
 
 /**
  * Base exception for all protocol handlers
+ * 
  * @deprecated Use {@link org.apache.nutch.protocol.ProtocolException} instead.
  */
 @SuppressWarnings("serial")
 public class ProtocolException extends Exception implements Serializable {
 
-
   public ProtocolException() {
     super();
   }
Index: src/java/org/apache/nutch/net/protocols/HttpDateFormat.java
===================================================================
--- src/java/org/apache/nutch/net/protocols/HttpDateFormat.java	(revision 1188252)
+++ src/java/org/apache/nutch/net/protocols/HttpDateFormat.java	(working copy)
@@ -26,15 +26,15 @@
 
 /**
  * class to handle HTTP dates.
- *
+ * 
  * Modified from FastHttpDateFormat.java in jakarta-tomcat.
- *
+ * 
  * @author John Xing
  */
 public class HttpDateFormat {
 
-  protected static SimpleDateFormat format = 
-    new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss zzz", Locale.US);
+  protected static SimpleDateFormat format = new SimpleDateFormat(
+      "EEE, dd MMM yyyy HH:mm:ss zzz", Locale.US);
 
   /**
    * HTTP date uses TimeZone GMT
@@ -43,29 +43,29 @@
     format.setTimeZone(TimeZone.getTimeZone("GMT"));
   }
 
-  //HttpDate (long t) {
-  //}
+  // HttpDate (long t) {
+  // }
 
-  //HttpDate (String s) {
-  //}
+  // HttpDate (String s) {
+  // }
 
-//  /**
-//   * Get the current date in HTTP format.
-//   */
-//  public static String getCurrentDate() {
-//
-//    long now = System.currentTimeMillis();
-//    if ((now - currentDateGenerated) > 1000) {
-//        synchronized (format) {
-//            if ((now - currentDateGenerated) > 1000) {
-//                currentDateGenerated = now;
-//                currentDate = format.format(new Date(now));
-//            }
-//        }
-//    }
-//    return currentDate;
-//
-//  }
+  // /**
+  // * Get the current date in HTTP format.
+  // */
+  // public static String getCurrentDate() {
+  //
+  // long now = System.currentTimeMillis();
+  // if ((now - currentDateGenerated) > 1000) {
+  // synchronized (format) {
+  // if ((now - currentDateGenerated) > 1000) {
+  // currentDateGenerated = now;
+  // currentDate = format.format(new Date(now));
+  // }
+  // }
+  // }
+  // return currentDate;
+  //
+  // }
 
   /**
    * Get the HTTP format of the specified date.
Index: src/java/org/apache/nutch/net/protocols/Response.java
===================================================================
--- src/java/org/apache/nutch/net/protocols/Response.java	(revision 1188252)
+++ src/java/org/apache/nutch/net/protocols/Response.java	(working copy)
@@ -23,12 +23,11 @@
 import org.apache.nutch.metadata.HttpHeaders;
 import org.apache.nutch.metadata.Metadata;
 
-
 /**
- * A response inteface.  Makes all protocols model HTTP.
+ * A response inteface. Makes all protocols model HTTP.
  */
 public interface Response extends HttpHeaders {
-  
+
   /** Returns the URL used to retrieve this response. */
   public URL getUrl();
 
@@ -40,7 +39,7 @@
 
   /** Returns all the headers. */
   public Metadata getHeaders();
-  
+
   /** Returns the full content of the response. */
   public byte[] getContent();
 
Index: src/java/org/apache/nutch/net/URLNormalizer.java
===================================================================
--- src/java/org/apache/nutch/net/URLNormalizer.java	(revision 1188252)
+++ src/java/org/apache/nutch/net/URLNormalizer.java	(working copy)
@@ -21,13 +21,17 @@
 
 import org.apache.hadoop.conf.Configurable;
 
-/** Interface used to convert URLs to normal form and optionally perform substitutions */
+/**
+ * Interface used to convert URLs to normal form and optionally perform
+ * substitutions
+ */
 public interface URLNormalizer extends Configurable {
-  
+
   /* Extension ID */
   public static final String X_POINT_ID = URLNormalizer.class.getName();
-  
+
   /* Interface for URL normalization */
-  public String normalize(String urlString, String scope) throws MalformedURLException;
+  public String normalize(String urlString, String scope)
+      throws MalformedURLException;
 
 }
Index: src/java/org/apache/nutch/net/URLFilter.java
===================================================================
--- src/java/org/apache/nutch/net/URLFilter.java	(revision 1188252)
+++ src/java/org/apache/nutch/net/URLFilter.java	(working copy)
@@ -23,17 +23,18 @@
 // Nutch imports
 import org.apache.nutch.plugin.Pluggable;
 
-
 /**
- * Interface used to limit which URLs enter Nutch.
- * Used by the injector and the db updater.
+ * Interface used to limit which URLs enter Nutch. Used by the injector and the
+ * db updater.
  */
 
 public interface URLFilter extends Pluggable, Configurable {
   /** The name of the extension point. */
   public final static String X_POINT_ID = URLFilter.class.getName();
 
-  /* Interface for a filter that transforms a URL: it can pass the
-     original URL through or "delete" the URL by returning null */
+  /*
+   * Interface for a filter that transforms a URL: it can pass the original URL
+   * through or "delete" the URL by returning null
+   */
   public String filter(String urlString);
 }
Index: src/java/org/apache/nutch/net/URLNormalizers.java
===================================================================
--- src/java/org/apache/nutch/net/URLNormalizers.java	(revision 1188252)
+++ src/java/org/apache/nutch/net/URLNormalizers.java	(working copy)
@@ -43,47 +43,63 @@
  * contexts where they are used (note however that they need to be activated
  * first through <tt>plugin.include</tt> property).
  * 
- * <p>There is one global scope defined by default, which consists of all
- * active normalizers. The order in which these normalizers
- * are executed may be defined in "urlnormalizer.order" property, which lists
- * space-separated implementation classes (if this property is missing normalizers
- * will be run in random order). If there are more
- * normalizers activated than explicitly named on this list, the remaining ones
- * will be run in random order after the ones specified on the list are executed.</p>
- * <p>You can define a set of contexts (or scopes) in which normalizers may be
+ * <p>
+ * There is one global scope defined by default, which consists of all active
+ * normalizers. The order in which these normalizers are executed may be defined
+ * in "urlnormalizer.order" property, which lists space-separated implementation
+ * classes (if this property is missing normalizers will be run in random
+ * order). If there are more normalizers activated than explicitly named on this
+ * list, the remaining ones will be run in random order after the ones specified
+ * on the list are executed.
+ * </p>
+ * <p>
+ * You can define a set of contexts (or scopes) in which normalizers may be
  * called. Each scope can have its own list of normalizers (defined in
  * "urlnormalizer.scope.<scope_name>" property) and its own order (defined in
  * "urlnormalizer.order.<scope_name>" property). If any of these properties are
- * missing, default settings are used for the global scope.</p>
- * <p>In case no normalizers are required for any given scope, a
- * <code>org.apache.nutch.net.urlnormalizer.pass.PassURLNormalizer</code> should be used.</p>
- * <p>Each normalizer may further select among many configurations, depending on
- * the scope in which it is called, because the scope name is passed as a parameter
- * to each normalizer. You can also use the same normalizer for many scopes.</p>
- * <p>Several scopes have been defined, and various Nutch tools will attempt using
- * scope-specific normalizers first (and fall back to default config if scope-specific
- * configuration is missing).</p>
- * <p>Normalizers may be run several times, to ensure that modifications introduced
+ * missing, default settings are used for the global scope.
+ * </p>
+ * <p>
+ * In case no normalizers are required for any given scope, a
+ * <code>org.apache.nutch.net.urlnormalizer.pass.PassURLNormalizer</code> should
+ * be used.
+ * </p>
+ * <p>
+ * Each normalizer may further select among many configurations, depending on
+ * the scope in which it is called, because the scope name is passed as a
+ * parameter to each normalizer. You can also use the same normalizer for many
+ * scopes.
+ * </p>
+ * <p>
+ * Several scopes have been defined, and various Nutch tools will attempt using
+ * scope-specific normalizers first (and fall back to default config if
+ * scope-specific configuration is missing).
+ * </p>
+ * <p>
+ * Normalizers may be run several times, to ensure that modifications introduced
  * by normalizers at the end of the list can be further reduced by normalizers
- * executed at the beginning. By default this loop is executed just once - if you want
- * to ensure that all possible combinations have been applied you may want to run
- * this loop up to the number of activated normalizers. This loop count can be configured
- * through <tt>urlnormalizer.loop.count</tt> property. As soon as the url is
- * unchanged the loop will stop and return the result.</p>
+ * executed at the beginning. By default this loop is executed just once - if
+ * you want to ensure that all possible combinations have been applied you may
+ * want to run this loop up to the number of activated normalizers. This loop
+ * count can be configured through <tt>urlnormalizer.loop.count</tt> property.
+ * As soon as the url is unchanged the loop will stop and return the result.
+ * </p>
  * 
  * @author Andrzej Bialecki
  */
 public final class URLNormalizers {
-  
-  /** Default scope. If no scope properties are defined then the configuration for
-   * this scope will be used.
+
+  /**
+   * Default scope. If no scope properties are defined then the configuration
+   * for this scope will be used.
    */
   public static final String SCOPE_DEFAULT = "default";
   /** Scope used by {@link org.apache.nutch.crawl.URLPartitioner}. */
   public static final String SCOPE_PARTITION = "partition";
   /** Scope used by {@link org.apache.nutch.crawl.Generator}. */
   public static final String SCOPE_GENERATE_HOST_COUNT = "generate_host_count";
-  /** Scope used by {@link org.apache.nutch.fetcher.Fetcher} when processing
+  /**
+   * Scope used by {@link org.apache.nutch.fetcher.Fetcher} when processing
    * redirect URLs.
    */
   public static final String SCOPE_FETCHER = "fetcher";
@@ -93,15 +109,18 @@
   public static final String SCOPE_LINKDB = "linkdb";
   /** Scope used by {@link org.apache.nutch.crawl.Injector}. */
   public static final String SCOPE_INJECT = "inject";
-  /** Scope used when constructing new {@link org.apache.nutch.parse.Outlink} instances. */
+  /**
+   * Scope used when constructing new {@link org.apache.nutch.parse.Outlink}
+   * instances.
+   */
   public static final String SCOPE_OUTLINK = "outlink";
-  
 
-  public static final Logger LOG = LoggerFactory.getLogger(URLNormalizers.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(URLNormalizers.class);
 
   /* Empty extension list for caching purposes. */
   private final List<Extension> EMPTY_EXTENSION_LIST = Collections.EMPTY_LIST;
-  
+
   private final URLNormalizer[] EMPTY_NORMALIZERS = new URLNormalizer[0];
 
   private Configuration conf;
@@ -109,37 +128,39 @@
   private ExtensionPoint extensionPoint;
 
   private URLNormalizer[] normalizers;
-  
+
   private int loopCount;
 
   public URLNormalizers(Configuration conf, String scope) {
     this.conf = conf;
     this.extensionPoint = PluginRepository.get(conf).getExtensionPoint(
-            URLNormalizer.X_POINT_ID);
+        URLNormalizer.X_POINT_ID);
     ObjectCache objectCache = ObjectCache.get(conf);
-    
+
     if (this.extensionPoint == null) {
       throw new RuntimeException("x point " + URLNormalizer.X_POINT_ID
-              + " not found.");
+          + " not found.");
     }
 
-    normalizers = (URLNormalizer[])objectCache.getObject(URLNormalizer.X_POINT_ID + "_" + scope);
+    normalizers = (URLNormalizer[]) objectCache
+        .getObject(URLNormalizer.X_POINT_ID + "_" + scope);
     if (normalizers == null) {
       normalizers = getURLNormalizers(scope);
     }
     if (normalizers == EMPTY_NORMALIZERS) {
-      normalizers = (URLNormalizer[])objectCache.getObject(URLNormalizer.X_POINT_ID + "_" + SCOPE_DEFAULT);
+      normalizers = (URLNormalizer[]) objectCache
+          .getObject(URLNormalizer.X_POINT_ID + "_" + SCOPE_DEFAULT);
       if (normalizers == null) {
         normalizers = getURLNormalizers(SCOPE_DEFAULT);
       }
     }
-    
+
     loopCount = conf.getInt("urlnormalizer.loop.count", 1);
   }
 
   /**
-   * Function returns an array of {@link URLNormalizer}s for a given scope,
-   * with a specified order.
+   * Function returns an array of {@link URLNormalizer}s for a given scope, with
+   * a specified order.
    * 
    * @param scope
    *          The scope to return the <code>Array</code> of
@@ -151,13 +172,14 @@
   URLNormalizer[] getURLNormalizers(String scope) {
     List<Extension> extensions = getExtensions(scope);
     ObjectCache objectCache = ObjectCache.get(conf);
-    
+
     if (extensions == EMPTY_EXTENSION_LIST) {
       return EMPTY_NORMALIZERS;
     }
-    
-    List<URLNormalizer> normalizers = new Vector<URLNormalizer>(extensions.size());
 
+    List<URLNormalizer> normalizers = new Vector<URLNormalizer>(
+        extensions.size());
+
     Iterator<Extension> it = extensions.iterator();
     while (it.hasNext()) {
       Extension ext = it.next();
@@ -174,14 +196,13 @@
       } catch (PluginRuntimeException e) {
         e.printStackTrace();
         LOG.warn("URLNormalizers:PluginRuntimeException when "
-                + "initializing url normalizer plugin "
-                + ext.getDescriptor().getPluginId()
-                + " instance in getURLNormalizers "
-                + "function: attempting to continue instantiating plugins");
+            + "initializing url normalizer plugin "
+            + ext.getDescriptor().getPluginId()
+            + " instance in getURLNormalizers "
+            + "function: attempting to continue instantiating plugins");
       }
     }
-    return normalizers.toArray(new URLNormalizer[normalizers
-            .size()]);
+    return normalizers.toArray(new URLNormalizer[normalizers.size()]);
   }
 
   /**
@@ -195,9 +216,8 @@
    */
   private List<Extension> getExtensions(String scope) {
     ObjectCache objectCache = ObjectCache.get(conf);
-    List<Extension> extensions = 
-      (List<Extension>) objectCache.getObject(URLNormalizer.X_POINT_ID + "_x_"
-                                                + scope);
+    List<Extension> extensions = (List<Extension>) objectCache
+        .getObject(URLNormalizer.X_POINT_ID + "_x_" + scope);
 
     // Just compare the reference:
     // if this is the empty list, we know we will find no extension.
@@ -208,11 +228,13 @@
     if (extensions == null) {
       extensions = findExtensions(scope);
       if (extensions != null) {
-        objectCache.setObject(URLNormalizer.X_POINT_ID + "_x_" + scope, extensions);
+        objectCache.setObject(URLNormalizer.X_POINT_ID + "_x_" + scope,
+            extensions);
       } else {
         // Put the empty extension list into cache
         // to remember we don't know any related extension.
-        objectCache.setObject(URLNormalizer.X_POINT_ID + "_x_" + scope, EMPTY_EXTENSION_LIST);
+        objectCache.setObject(URLNormalizer.X_POINT_ID + "_x_" + scope,
+            EMPTY_EXTENSION_LIST);
         extensions = EMPTY_EXTENSION_LIST;
       }
     }
@@ -232,7 +254,8 @@
 
     String[] orders = null;
     String orderlist = conf.get("urlnormalizer.order." + scope);
-    if (orderlist == null) orderlist = conf.get("urlnormalizer.order");
+    if (orderlist == null)
+      orderlist = conf.get("urlnormalizer.order");
     if (orderlist != null && !orderlist.trim().equals("")) {
       orders = orderlist.split("\\s+");
     }
@@ -270,13 +293,17 @@
 
   /**
    * Normalize
-   * @param urlString The URL string to normalize.
-   * @param scope The given scope.
+   * 
+   * @param urlString
+   *          The URL string to normalize.
+   * @param scope
+   *          The given scope.
    * @return A normalized String, using the given <code>scope</code>
-   * @throws MalformedURLException If the given URL string is malformed.
+   * @throws MalformedURLException
+   *           If the given URL string is malformed.
    */
   public String normalize(String urlString, String scope)
-          throws MalformedURLException {
+      throws MalformedURLException {
     // optionally loop several times, and break if no further changes
     String initialString = urlString;
     for (int k = 0; k < loopCount; k++) {
@@ -285,7 +312,8 @@
           return null;
         urlString = this.normalizers[i].normalize(urlString, scope);
       }
-      if (initialString.equals(urlString)) break;
+      if (initialString.equals(urlString))
+        break;
       initialString = urlString;
     }
     return urlString;
Index: src/java/org/apache/nutch/net/URLNormalizerChecker.java
===================================================================
--- src/java/org/apache/nutch/net/URLNormalizerChecker.java	(revision 1188252)
+++ src/java/org/apache/nutch/net/URLNormalizerChecker.java	(working copy)
@@ -36,23 +36,23 @@
   private Configuration conf;
 
   public URLNormalizerChecker(Configuration conf) {
-      this.conf = conf;
+    this.conf = conf;
   }
 
   private void checkOne(String normalizerName, String scope) throws Exception {
     URLNormalizer normalizer = null;
 
-    ExtensionPoint point =
-      PluginRepository.get(conf).getExtensionPoint(URLNormalizer.X_POINT_ID);
+    ExtensionPoint point = PluginRepository.get(conf).getExtensionPoint(
+        URLNormalizer.X_POINT_ID);
 
     if (point == null)
-      throw new RuntimeException(URLNormalizer.X_POINT_ID+" not found.");
+      throw new RuntimeException(URLNormalizer.X_POINT_ID + " not found.");
 
     Extension[] extensions = point.getExtensions();
 
     for (int i = 0; i < extensions.length; i++) {
       Extension extension = extensions[i];
-      normalizer = (URLNormalizer)extension.getExtensionInstance();
+      normalizer = (URLNormalizer) extension.getExtensionInstance();
       if (normalizer.getClass().getName().equals(normalizerName)) {
         break;
       } else {
@@ -61,7 +61,8 @@
     }
 
     if (normalizer == null)
-      throw new RuntimeException("URLNormalizer "+normalizerName+" not found.");
+      throw new RuntimeException("URLNormalizer " + normalizerName
+          + " not found.");
 
     System.out.println("Checking URLNormalizer " + normalizerName);
 
@@ -79,7 +80,7 @@
     BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
     String line;
     URLNormalizers normalizers = new URLNormalizers(conf, scope);
-    while((line = in.readLine()) != null) {
+    while ((line = in.readLine()) != null) {
       String out = normalizers.normalize(line, scope);
       System.out.println(out);
     }
@@ -88,7 +89,7 @@
   public static void main(String[] args) throws Exception {
 
     String usage = "Usage: URLNormalizerChecker [-normalizer <normalizerName>] [-scope <scope>]"
-      + "\n\tscope can be one of: default,partition,generate_host_count,fetcher,crawldb,linkdb,inject,outlink";
+        + "\n\tscope can be one of: default,partition,generate_host_count,fetcher,crawldb,linkdb,inject,outlink";
 
     String normalizerName = null;
     String scope = URLNormalizers.SCOPE_DEFAULT;
@@ -103,7 +104,8 @@
       }
     }
 
-    URLNormalizerChecker checker = new URLNormalizerChecker(NutchConfiguration.create());
+    URLNormalizerChecker checker = new URLNormalizerChecker(
+        NutchConfiguration.create());
     if (normalizerName != null) {
       checker.checkOne(normalizerName, scope);
     } else {
Index: src/java/org/apache/nutch/net/URLFilters.java
===================================================================
--- src/java/org/apache/nutch/net/URLFilters.java	(revision 1188252)
+++ src/java/org/apache/nutch/net/URLFilters.java	(working copy)
@@ -28,7 +28,8 @@
 import org.apache.nutch.util.ObjectCache;
 
 import org.apache.hadoop.conf.Configuration;
-/** Creates and caches {@link URLFilter} implementing plugins.*/
+
+/** Creates and caches {@link URLFilter} implementing plugins. */
 public class URLFilters {
 
   public static final String URLFILTER_ORDER = "urlfilter.order";
@@ -37,7 +38,8 @@
   public URLFilters(Configuration conf) {
     String order = conf.get(URLFILTER_ORDER);
     ObjectCache objectCache = ObjectCache.get(conf);
-    this.filters = (URLFilter[]) objectCache.getObject(URLFilter.class.getName());
+    this.filters = (URLFilter[]) objectCache.getObject(URLFilter.class
+        .getName());
 
     if (this.filters == null) {
       String[] orderedFilters = null;
@@ -60,8 +62,8 @@
           }
         }
         if (orderedFilters == null) {
-          objectCache.setObject(URLFilter.class.getName(), filterMap.values().toArray(
-              new URLFilter[0]));
+          objectCache.setObject(URLFilter.class.getName(), filterMap.values()
+              .toArray(new URLFilter[0]));
         } else {
           ArrayList<URLFilter> filters = new ArrayList<URLFilter>();
           for (int i = 0; i < orderedFilters.length; i++) {
@@ -70,13 +72,14 @@
               filters.add(filter);
             }
           }
-          objectCache.setObject(URLFilter.class.getName(), filters
-              .toArray(new URLFilter[filters.size()]));
+          objectCache.setObject(URLFilter.class.getName(),
+              filters.toArray(new URLFilter[filters.size()]));
         }
       } catch (PluginRuntimeException e) {
         throw new RuntimeException(e);
       }
-      this.filters = (URLFilter[]) objectCache.getObject(URLFilter.class.getName());
+      this.filters = (URLFilter[]) objectCache.getObject(URLFilter.class
+          .getName());
     }
   }
 
Index: src/java/org/apache/nutch/net/URLFilterChecker.java
===================================================================
--- src/java/org/apache/nutch/net/URLFilterChecker.java	(revision 1188252)
+++ src/java/org/apache/nutch/net/URLFilterChecker.java	(working copy)
@@ -38,23 +38,23 @@
   private Configuration conf;
 
   public URLFilterChecker(Configuration conf) {
-      this.conf = conf;
+    this.conf = conf;
   }
 
   private void checkOne(String filterName) throws Exception {
     URLFilter filter = null;
 
-    ExtensionPoint point =
-      PluginRepository.get(conf).getExtensionPoint(URLFilter.X_POINT_ID);
+    ExtensionPoint point = PluginRepository.get(conf).getExtensionPoint(
+        URLFilter.X_POINT_ID);
 
     if (point == null)
-      throw new RuntimeException(URLFilter.X_POINT_ID+" not found.");
+      throw new RuntimeException(URLFilter.X_POINT_ID + " not found.");
 
     Extension[] extensions = point.getExtensions();
 
     for (int i = 0; i < extensions.length; i++) {
       Extension extension = extensions[i];
-      filter = (URLFilter)extension.getExtensionInstance();
+      filter = (URLFilter) extension.getExtensionInstance();
       if (filter.getClass().getName().equals(filterName)) {
         break;
       } else {
@@ -63,19 +63,19 @@
     }
 
     if (filter == null)
-      throw new RuntimeException("Filter "+filterName+" not found.");
+      throw new RuntimeException("Filter " + filterName + " not found.");
 
     // jerome : should we keep this behavior?
-    //if (LogFormatter.hasLoggedSevere())
-    //  throw new RuntimeException("Severe error encountered.");
+    // if (LogFormatter.hasLoggedSevere())
+    // throw new RuntimeException("Severe error encountered.");
 
-    System.out.println("Checking URLFilter "+filterName);
+    System.out.println("Checking URLFilter " + filterName);
 
     BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
     String line;
-    while((line=in.readLine())!=null) {
-      String out=filter.filter(line);
-      if(out!=null) {
+    while ((line = in.readLine()) != null) {
+      String out = filter.filter(line);
+      if (out != null) {
         System.out.print("+");
         System.out.println(out);
       } else {
@@ -90,10 +90,10 @@
 
     BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
     String line;
-    while((line=in.readLine())!=null) {
+    while ((line = in.readLine()) != null) {
       URLFilters filters = new URLFilters(this.conf);
       String out = filters.filter(line);
-      if(out!=null) {
+      if (out != null) {
         System.out.print("+");
         System.out.println(out);
       } else {
Index: src/java/org/apache/nutch/crawl/Signature.java
===================================================================
--- src/java/org/apache/nutch/crawl/Signature.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/Signature.java	(working copy)
@@ -24,7 +24,7 @@
 
 public abstract class Signature implements Configurable {
   protected Configuration conf;
-  
+
   public abstract byte[] calculate(Content content, Parse parse);
 
   public Configuration getConf() {
Index: src/java/org/apache/nutch/crawl/CrawlDbReader.java
===================================================================
--- src/java/org/apache/nutch/crawl/CrawlDbReader.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/CrawlDbReader.java	(working copy)
@@ -69,68 +69,76 @@
 public class CrawlDbReader implements Closeable {
 
   public static final Logger LOG = LoggerFactory.getLogger(CrawlDbReader.class);
-  
+
   public static final int STD_FORMAT = 0;
   public static final int CSV_FORMAT = 1;
-    
+
   private MapFile.Reader[] readers = null;
-  
-  private void openReaders(String crawlDb, Configuration config) throws IOException {
-    if (readers != null) return;
+
+  private void openReaders(String crawlDb, Configuration config)
+      throws IOException {
+    if (readers != null)
+      return;
     FileSystem fs = FileSystem.get(config);
     readers = MapFileOutputFormat.getReaders(fs, new Path(crawlDb,
         CrawlDb.CURRENT_NAME), config);
   }
-  
+
   private void closeReaders() {
-    if (readers == null) return;
+    if (readers == null)
+      return;
     for (int i = 0; i < readers.length; i++) {
       try {
         readers[i].close();
       } catch (Exception e) {
-        
+
       }
     }
   }
-  
-  public static class CrawlDatumCsvOutputFormat extends FileOutputFormat<Text,CrawlDatum> {
-    protected static class LineRecordWriter implements RecordWriter<Text,CrawlDatum> {
+
+  public static class CrawlDatumCsvOutputFormat extends
+      FileOutputFormat<Text, CrawlDatum> {
+    protected static class LineRecordWriter implements
+        RecordWriter<Text, CrawlDatum> {
       private DataOutputStream out;
 
       public LineRecordWriter(DataOutputStream out) {
         this.out = out;
         try {
           out.writeBytes("Url;Status code;Status name;Fetch Time;Modified Time;Retries since fetch;Retry interval;Score;Signature;Metadata\n");
-        } catch (IOException e) {}
+        } catch (IOException e) {
+        }
       }
 
-      public synchronized void write(Text key, CrawlDatum value) throws IOException {
-          out.writeByte('"');
-          out.writeBytes(key.toString());
-          out.writeByte('"');
-          out.writeByte(';');
-          out.writeBytes(Integer.toString(value.getStatus()));
-          out.writeByte(';');
-          out.writeByte('"');
-          out.writeBytes(CrawlDatum.getStatusName(value.getStatus()));
-          out.writeByte('"');
-          out.writeByte(';');
-          out.writeBytes(new Date(value.getFetchTime()).toString());
-          out.writeByte(';');
-          out.writeBytes(new Date(value.getModifiedTime()).toString());
-          out.writeByte(';');
-          out.writeBytes(Integer.toString(value.getRetriesSinceFetch()));
-          out.writeByte(';');
-          out.writeBytes(Float.toString(value.getFetchInterval()));
-          out.writeByte(';');
-          out.writeBytes(Float.toString((value.getFetchInterval() / FetchSchedule.SECONDS_PER_DAY)));
-          out.writeByte(';');
-          out.writeBytes(Float.toString(value.getScore()));
-          out.writeByte(';');
-          out.writeByte('"');
-          out.writeBytes(value.getSignature() != null ? StringUtil.toHexString(value.getSignature()): "null");
-          out.writeByte('"');
-          out.writeByte('\n');
+      public synchronized void write(Text key, CrawlDatum value)
+          throws IOException {
+        out.writeByte('"');
+        out.writeBytes(key.toString());
+        out.writeByte('"');
+        out.writeByte(';');
+        out.writeBytes(Integer.toString(value.getStatus()));
+        out.writeByte(';');
+        out.writeByte('"');
+        out.writeBytes(CrawlDatum.getStatusName(value.getStatus()));
+        out.writeByte('"');
+        out.writeByte(';');
+        out.writeBytes(new Date(value.getFetchTime()).toString());
+        out.writeByte(';');
+        out.writeBytes(new Date(value.getModifiedTime()).toString());
+        out.writeByte(';');
+        out.writeBytes(Integer.toString(value.getRetriesSinceFetch()));
+        out.writeByte(';');
+        out.writeBytes(Float.toString(value.getFetchInterval()));
+        out.writeByte(';');
+        out.writeBytes(Float.toString((value.getFetchInterval() / FetchSchedule.SECONDS_PER_DAY)));
+        out.writeByte(';');
+        out.writeBytes(Float.toString(value.getScore()));
+        out.writeByte(';');
+        out.writeByte('"');
+        out.writeBytes(value.getSignature() != null ? StringUtil
+            .toHexString(value.getSignature()) : "null");
+        out.writeByte('"');
+        out.writeByte('\n');
       }
 
       public synchronized void close(Reporter reporter) throws IOException {
@@ -138,48 +146,65 @@
       }
     }
 
-    public RecordWriter<Text,CrawlDatum> getRecordWriter(FileSystem fs, JobConf job, String name,
-        Progressable progress) throws IOException {
+    public RecordWriter<Text, CrawlDatum> getRecordWriter(FileSystem fs,
+        JobConf job, String name, Progressable progress) throws IOException {
       Path dir = FileOutputFormat.getOutputPath(job);
       DataOutputStream fileOut = fs.create(new Path(dir, name), progress);
       return new LineRecordWriter(fileOut);
-   }
+    }
   }
 
-  public static class CrawlDbStatMapper implements Mapper<Text, CrawlDatum, Text, LongWritable> {
+  public static class CrawlDbStatMapper implements
+      Mapper<Text, CrawlDatum, Text, LongWritable> {
     LongWritable COUNT_1 = new LongWritable(1);
     private boolean sort = false;
+
     public void configure(JobConf job) {
-      sort = job.getBoolean("db.reader.stats.sort", false );
+      sort = job.getBoolean("db.reader.stats.sort", false);
     }
-    public void close() {}
-    public void map(Text key, CrawlDatum value, OutputCollector<Text, LongWritable> output, Reporter reporter)
-            throws IOException {
+
+    public void close() {
+    }
+
+    public void map(Text key, CrawlDatum value,
+        OutputCollector<Text, LongWritable> output, Reporter reporter)
+        throws IOException {
       output.collect(new Text("T"), COUNT_1);
       output.collect(new Text("status " + value.getStatus()), COUNT_1);
-      output.collect(new Text("retry " + value.getRetriesSinceFetch()), COUNT_1);
-      output.collect(new Text("s"), new LongWritable((long) (value.getScore() * 1000.0)));
-      if(sort){
+      output
+          .collect(new Text("retry " + value.getRetriesSinceFetch()), COUNT_1);
+      output.collect(new Text("s"), new LongWritable(
+          (long) (value.getScore() * 1000.0)));
+      if (sort) {
         URL u = new URL(key.toString());
         String host = u.getHost();
-        output.collect(new Text("status " + value.getStatus() + " " + host), COUNT_1);
+        output.collect(new Text("status " + value.getStatus() + " " + host),
+            COUNT_1);
       }
     }
   }
-  
-  public static class CrawlDbStatCombiner implements Reducer<Text, LongWritable, Text, LongWritable> {
+
+  public static class CrawlDbStatCombiner implements
+      Reducer<Text, LongWritable, Text, LongWritable> {
     LongWritable val = new LongWritable();
-    
-    public CrawlDbStatCombiner() { }
-    public void configure(JobConf job) { }
-    public void close() {}
-    public void reduce(Text key, Iterator<LongWritable> values, OutputCollector<Text, LongWritable> output, Reporter reporter)
+
+    public CrawlDbStatCombiner() {
+    }
+
+    public void configure(JobConf job) {
+    }
+
+    public void close() {
+    }
+
+    public void reduce(Text key, Iterator<LongWritable> values,
+        OutputCollector<Text, LongWritable> output, Reporter reporter)
         throws IOException {
       val.set(0L);
-      String k = ((Text)key).toString();
+      String k = ((Text) key).toString();
       if (!k.equals("s")) {
         while (values.hasNext()) {
-          LongWritable cnt = (LongWritable)values.next();
+          LongWritable cnt = (LongWritable) values.next();
           val.set(val.get() + cnt.get());
         }
         output.collect(key, val);
@@ -188,9 +213,11 @@
         long min = Long.MAX_VALUE;
         long max = Long.MIN_VALUE;
         while (values.hasNext()) {
-          LongWritable cnt = (LongWritable)values.next();
-          if (cnt.get() < min) min = cnt.get();
-          if (cnt.get() > max) max = cnt.get();
+          LongWritable cnt = (LongWritable) values.next();
+          if (cnt.get() < min)
+            min = cnt.get();
+          if (cnt.get() > max)
+            max = cnt.get();
           total += cnt.get();
         }
         output.collect(new Text("scn"), new LongWritable(min));
@@ -200,12 +227,18 @@
     }
   }
 
-  public static class CrawlDbStatReducer implements Reducer<Text, LongWritable, Text, LongWritable> {
-    public void configure(JobConf job) {}
-    public void close() {}
-    public void reduce(Text key, Iterator<LongWritable> values, OutputCollector<Text, LongWritable> output, Reporter reporter)
-            throws IOException {
+  public static class CrawlDbStatReducer implements
+      Reducer<Text, LongWritable, Text, LongWritable> {
+    public void configure(JobConf job) {
+    }
 
+    public void close() {
+    }
+
+    public void reduce(Text key, Iterator<LongWritable> values,
+        OutputCollector<Text, LongWritable> output, Reporter reporter)
+        throws IOException {
+
       String k = ((Text) key).toString();
       if (k.equals("T")) {
         // sum all values for this key
@@ -218,28 +251,30 @@
       } else if (k.startsWith("status") || k.startsWith("retry")) {
         LongWritable cnt = new LongWritable();
         while (values.hasNext()) {
-          LongWritable val = (LongWritable)values.next();
+          LongWritable val = (LongWritable) values.next();
           cnt.set(cnt.get() + val.get());
         }
         output.collect(key, cnt);
       } else if (k.equals("scx")) {
         LongWritable cnt = new LongWritable(Long.MIN_VALUE);
         while (values.hasNext()) {
-          LongWritable val = (LongWritable)values.next();
-          if (cnt.get() < val.get()) cnt.set(val.get());
+          LongWritable val = (LongWritable) values.next();
+          if (cnt.get() < val.get())
+            cnt.set(val.get());
         }
         output.collect(key, cnt);
       } else if (k.equals("scn")) {
         LongWritable cnt = new LongWritable(Long.MAX_VALUE);
         while (values.hasNext()) {
-          LongWritable val = (LongWritable)values.next();
-          if (cnt.get() > val.get()) cnt.set(val.get());
+          LongWritable val = (LongWritable) values.next();
+          if (cnt.get() > val.get())
+            cnt.set(val.get());
         }
         output.collect(key, cnt);
       } else if (k.equals("sct")) {
         LongWritable cnt = new LongWritable();
         while (values.hasNext()) {
-          LongWritable val = (LongWritable)values.next();
+          LongWritable val = (LongWritable) values.next();
           cnt.set(cnt.get() + val.get());
         }
         output.collect(key, cnt);
@@ -247,30 +282,39 @@
     }
   }
 
-  public static class CrawlDbTopNMapper implements Mapper<Text, CrawlDatum, FloatWritable, Text> {
+  public static class CrawlDbTopNMapper implements
+      Mapper<Text, CrawlDatum, FloatWritable, Text> {
     private static final FloatWritable fw = new FloatWritable();
     private float min = 0.0f;
-    
+
     public void configure(JobConf job) {
       long lmin = job.getLong("db.reader.topn.min", 0);
       if (lmin != 0) {
-        min = (float)lmin / 1000000.0f;
+        min = (float) lmin / 1000000.0f;
       }
     }
-    public void close() {}
-    public void map(Text key, CrawlDatum value, OutputCollector<FloatWritable, Text> output, Reporter reporter)
-            throws IOException {
-      if (value.getScore() < min) return; // don't collect low-scoring records
+
+    public void close() {
+    }
+
+    public void map(Text key, CrawlDatum value,
+        OutputCollector<FloatWritable, Text> output, Reporter reporter)
+        throws IOException {
+      if (value.getScore() < min)
+        return; // don't collect low-scoring records
       fw.set(-value.getScore()); // reverse sorting order
       output.collect(fw, key); // invert mapping: score -> url
     }
   }
-  
-  public static class CrawlDbTopNReducer implements Reducer<FloatWritable, Text, FloatWritable, Text> {
+
+  public static class CrawlDbTopNReducer implements
+      Reducer<FloatWritable, Text, FloatWritable, Text> {
     private long topN;
     private long count = 0L;
-    
-    public void reduce(FloatWritable key, Iterator<Text> values, OutputCollector<FloatWritable, Text> output, Reporter reporter) throws IOException {
+
+    public void reduce(FloatWritable key, Iterator<Text> values,
+        OutputCollector<FloatWritable, Text> output, Reporter reporter)
+        throws IOException {
       while (values.hasNext() && count < topN) {
         key.set(-key.get());
         output.collect(key, values.next());
@@ -281,20 +325,22 @@
     public void configure(JobConf job) {
       topN = job.getLong("db.reader.topn", 100) / job.getNumReduceTasks();
     }
-    
-    public void close() {}
+
+    public void close() {
+    }
   }
 
   public void close() {
     closeReaders();
   }
-  
-  public void processStatJob(String crawlDb, Configuration config, boolean sort) throws IOException {
 
+  public void processStatJob(String crawlDb, Configuration config, boolean sort)
+      throws IOException {
+
     if (LOG.isInfoEnabled()) {
       LOG.info("CrawlDb statistics start: " + crawlDb);
     }
-    
+
     Path tmpFolder = new Path(crawlDb, "stat_tmp" + System.currentTimeMillis());
 
     JobConf job = new NutchJob(config);
@@ -320,7 +366,8 @@
 
     // reading the result
     FileSystem fileSystem = FileSystem.get(config);
-    SequenceFile.Reader[] readers = SequenceFileOutputFormat.getReaders(config, tmpFolder);
+    SequenceFile.Reader[] readers = SequenceFileOutputFormat.getReaders(config,
+        tmpFolder);
 
     Text key = new Text();
     LongWritable value = new LongWritable();
@@ -333,21 +380,25 @@
         LongWritable val = stats.get(k);
         if (val == null) {
           val = new LongWritable();
-          if (k.equals("scx")) val.set(Long.MIN_VALUE);
-          if (k.equals("scn")) val.set(Long.MAX_VALUE);
+          if (k.equals("scx"))
+            val.set(Long.MIN_VALUE);
+          if (k.equals("scn"))
+            val.set(Long.MAX_VALUE);
           stats.put(k, val);
         }
         if (k.equals("scx")) {
-          if (val.get() < value.get()) val.set(value.get());
+          if (val.get() < value.get())
+            val.set(value.get());
         } else if (k.equals("scn")) {
-          if (val.get() > value.get()) val.set(value.get());          
+          if (val.get() > value.get())
+            val.set(value.get());
         } else {
           val.set(val.get() + value.get());
         }
       }
       reader.close();
     }
-    
+
     if (LOG.isInfoEnabled()) {
       LOG.info("Statistics for CrawlDb: " + crawlDb);
       LongWritable totalCnt = stats.get("T");
@@ -361,31 +412,40 @@
         } else if (k.equals("scx")) {
           LOG.info("max score:\t" + (float) (val.get() / 1000.0f));
         } else if (k.equals("sct")) {
-          LOG.info("avg score:\t" + (float) ((((double)val.get()) / totalCnt.get()) / 1000.0));
+          LOG.info("avg score:\t"
+              + (float) ((((double) val.get()) / totalCnt.get()) / 1000.0));
         } else if (k.startsWith("status")) {
           String[] st = k.split(" ");
           int code = Integer.parseInt(st[1]);
-          if(st.length >2 ) LOG.info("   " + st[2] +" :\t" + val);
-          else LOG.info(st[0] +" " +code + " (" + CrawlDatum.getStatusName((byte) code) + "):\t" + val);
-        } else LOG.info(k + ":\t" + val);
+          if (st.length > 2)
+            LOG.info("   " + st[2] + " :\t" + val);
+          else
+            LOG.info(st[0] + " " + code + " ("
+                + CrawlDatum.getStatusName((byte) code) + "):\t" + val);
+        } else
+          LOG.info(k + ":\t" + val);
       }
     }
     // removing the tmp folder
     fileSystem.delete(tmpFolder, true);
-    if (LOG.isInfoEnabled()) { LOG.info("CrawlDb statistics: done"); }
+    if (LOG.isInfoEnabled()) {
+      LOG.info("CrawlDb statistics: done");
+    }
 
   }
-  
-  public CrawlDatum get(String crawlDb, String url, Configuration config) throws IOException {
+
+  public CrawlDatum get(String crawlDb, String url, Configuration config)
+      throws IOException {
     Text key = new Text(url);
     CrawlDatum val = new CrawlDatum();
     openReaders(crawlDb, config);
-    CrawlDatum res = (CrawlDatum)MapFileOutputFormat.getEntry(readers,
+    CrawlDatum res = (CrawlDatum) MapFileOutputFormat.getEntry(readers,
         new HashPartitioner<Text, CrawlDatum>(), key, val);
     return res;
   }
 
-  public void readUrl(String crawlDb, String url, Configuration config) throws IOException {
+  public void readUrl(String crawlDb, String url, Configuration config)
+      throws IOException {
     CrawlDatum res = get(crawlDb, url, config);
     System.out.println("URL: " + url);
     if (res != null) {
@@ -394,14 +454,15 @@
       System.out.println("not found");
     }
   }
-  
-  public void processDumpJob(String crawlDb, String output, Configuration config, int format) throws IOException {
 
+  public void processDumpJob(String crawlDb, String output,
+      Configuration config, int format) throws IOException {
+
     if (LOG.isInfoEnabled()) {
       LOG.info("CrawlDb dump: starting");
       LOG.info("CrawlDb db: " + crawlDb);
     }
-    
+
     Path outFolder = new Path(output);
 
     JobConf job = new NutchJob(config);
@@ -411,27 +472,31 @@
     job.setInputFormat(SequenceFileInputFormat.class);
 
     FileOutputFormat.setOutputPath(job, outFolder);
-    if(format == CSV_FORMAT) job.setOutputFormat(CrawlDatumCsvOutputFormat.class);
-    else job.setOutputFormat(TextOutputFormat.class);
+    if (format == CSV_FORMAT)
+      job.setOutputFormat(CrawlDatumCsvOutputFormat.class);
+    else
+      job.setOutputFormat(TextOutputFormat.class);
     job.setOutputKeyClass(Text.class);
     job.setOutputValueClass(CrawlDatum.class);
 
     JobClient.runJob(job);
-    if (LOG.isInfoEnabled()) { LOG.info("CrawlDb dump: done"); }
+    if (LOG.isInfoEnabled()) {
+      LOG.info("CrawlDb dump: done");
+    }
   }
 
-  public void processTopNJob(String crawlDb, long topN, float min, String output, Configuration config) throws IOException {
-    
+  public void processTopNJob(String crawlDb, long topN, float min,
+      String output, Configuration config) throws IOException {
+
     if (LOG.isInfoEnabled()) {
       LOG.info("CrawlDb topN: starting (topN=" + topN + ", min=" + min + ")");
       LOG.info("CrawlDb db: " + crawlDb);
     }
-    
+
     Path outFolder = new Path(output);
-    Path tempDir =
-      new Path(config.get("mapred.temp.dir", ".") +
-               "/readdb-topN-temp-"+
-               Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
+    Path tempDir = new Path(config.get("mapred.temp.dir", ".")
+        + "/readdb-topN-temp-"
+        + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
 
     JobConf job = new NutchJob(config);
     job.setJobName("topN prepare " + crawlDb);
@@ -447,8 +512,8 @@
 
     // XXX hmmm, no setFloat() in the API ... :(
     job.setLong("db.reader.topn.min", Math.round(1000000.0 * min));
-    JobClient.runJob(job); 
-    
+    JobClient.runJob(job);
+
     if (LOG.isInfoEnabled()) {
       LOG.info("CrawlDb topN: collecting topN scores.");
     }
@@ -471,7 +536,9 @@
     JobClient.runJob(job);
     FileSystem fs = FileSystem.get(config);
     fs.delete(tempDir, true);
-    if (LOG.isInfoEnabled()) { LOG.info("CrawlDb topN: done"); }
+    if (LOG.isInfoEnabled()) {
+      LOG.info("CrawlDb topN: done");
+    }
 
   }
 
@@ -479,16 +546,24 @@
     CrawlDbReader dbr = new CrawlDbReader();
 
     if (args.length < 1) {
-      System.err.println("Usage: CrawlDbReader <crawldb> (-stats | -dump <out_dir> | -topN <nnnn> <out_dir> [<min>] | -url <url>)");
-      System.err.println("\t<crawldb>\tdirectory name where crawldb is located");
-      System.err.println("\t-stats [-sort] \tprint overall statistics to System.out");
+      System.err
+          .println("Usage: CrawlDbReader <crawldb> (-stats | -dump <out_dir> | -topN <nnnn> <out_dir> [<min>] | -url <url>)");
+      System.err
+          .println("\t<crawldb>\tdirectory name where crawldb is located");
+      System.err
+          .println("\t-stats [-sort] \tprint overall statistics to System.out");
       System.err.println("\t\t[-sort]\tlist status sorted by host");
-      System.err.println("\t-dump <out_dir> [-format normal|csv ]\tdump the whole db to a text file in <out_dir>");
+      System.err
+          .println("\t-dump <out_dir> [-format normal|csv ]\tdump the whole db to a text file in <out_dir>");
       System.err.println("\t\t[-format csv]\tdump in Csv format");
-      System.err.println("\t\t[-format normal]\tdump in standard format (default option)");
-      System.err.println("\t-url <url>\tprint information on <url> to System.out");
-      System.err.println("\t-topN <nnnn> <out_dir> [<min>]\tdump top <nnnn> urls sorted by score to <out_dir>");
-      System.err.println("\t\t[<min>]\tskip records with scores below this value.");
+      System.err
+          .println("\t\t[-format normal]\tdump in standard format (default option)");
+      System.err
+          .println("\t-url <url>\tprint information on <url> to System.out");
+      System.err
+          .println("\t-topN <nnnn> <out_dir> [<min>]\tdump top <nnnn> urls sorted by score to <out_dir>");
+      System.err
+          .println("\t\t[<min>]\tskip records with scores below this value.");
       System.err.println("\t\t\tThis can significantly improve performance.");
       return;
     }
@@ -498,7 +573,7 @@
     for (int i = 1; i < args.length; i++) {
       if (args[i].equals("-stats")) {
         boolean toSort = false;
-        if(i < args.length - 1 && "-sort".equals(args[i+1])){
+        if (i < args.length - 1 && "-sort".equals(args[i + 1])) {
           toSort = true;
           i++;
         }
@@ -506,9 +581,10 @@
       } else if (args[i].equals("-dump")) {
         param = args[++i];
         String format = "normal";
-        if(i < args.length - 1 &&  "-format".equals(args[i+1]))
-          format = args[i=i+2];
-        dbr.processDumpJob(crawlDb, param, conf, "csv".equals(format)? CSV_FORMAT : STD_FORMAT );
+        if (i < args.length - 1 && "-format".equals(args[i + 1]))
+          format = args[i = i + 2];
+        dbr.processDumpJob(crawlDb, param, conf,
+            "csv".equals(format) ? CSV_FORMAT : STD_FORMAT);
       } else if (args[i].equals("-url")) {
         param = args[++i];
         dbr.readUrl(crawlDb, param, conf);
Index: src/java/org/apache/nutch/crawl/LinkDb.java
===================================================================
--- src/java/org/apache/nutch/crawl/LinkDb.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/LinkDb.java	(working copy)
@@ -43,7 +43,8 @@
 import org.apache.nutch.util.TimingUtil;
 
 /** Maintains an inverted link map, listing incoming links for each url. */
-public class LinkDb extends Configured implements Tool, Mapper<Text, ParseData, Text, Inlinks> {
+public class LinkDb extends Configured implements Tool,
+    Mapper<Text, ParseData, Text, Inlinks> {
 
   public static final Logger LOG = LoggerFactory.getLogger(LinkDb.class);
 
@@ -54,13 +55,14 @@
   private boolean ignoreInternalLinks;
   private URLFilters urlFilters;
   private URLNormalizers urlNormalizers;
-  
-  public LinkDb() {}
-  
+
+  public LinkDb() {
+  }
+
   public LinkDb(Configuration conf) {
     setConf(conf);
   }
-  
+
   public void configure(JobConf job) {
     maxAnchorLength = job.getInt("db.max.anchor.length", 100);
     ignoreInternalLinks = job.getBoolean("db.ignore.internal.links", true);
@@ -72,16 +74,19 @@
     }
   }
 
-  public void close() {}
+  public void close() {
+  }
 
   public void map(Text key, ParseData parseData,
-                  OutputCollector<Text, Inlinks> output, Reporter reporter)
-    throws IOException {
+      OutputCollector<Text, Inlinks> output, Reporter reporter)
+      throws IOException {
     String fromUrl = key.toString();
     String fromHost = getHost(fromUrl);
     if (urlNormalizers != null) {
       try {
-        fromUrl = urlNormalizers.normalize(fromUrl, URLNormalizers.SCOPE_LINKDB); // normalize the url
+        fromUrl = urlNormalizers
+            .normalize(fromUrl, URLNormalizers.SCOPE_LINKDB); // normalize the
+                                                              // url
       } catch (Exception e) {
         LOG.warn("Skipping " + fromUrl + ":" + e);
         fromUrl = null;
@@ -95,7 +100,8 @@
         fromUrl = null;
       }
     }
-    if (fromUrl == null) return; // discard all outlinks
+    if (fromUrl == null)
+      return; // discard all outlinks
     Outlink[] outlinks = parseData.getOutlinks();
     Inlinks inlinks = new Inlinks();
     for (int i = 0; i < outlinks.length; i++) {
@@ -105,12 +111,14 @@
       if (ignoreInternalLinks) {
         String toHost = getHost(toUrl);
         if (toHost == null || toHost.equals(fromHost)) { // internal link
-          continue;                               // skip it
+          continue; // skip it
         }
       }
       if (urlNormalizers != null) {
         try {
-          toUrl = urlNormalizers.normalize(toUrl, URLNormalizers.SCOPE_LINKDB); // normalize the url
+          toUrl = urlNormalizers.normalize(toUrl, URLNormalizers.SCOPE_LINKDB); // normalize
+                                                                                // the
+                                                                                // url
         } catch (Exception e) {
           LOG.warn("Skipping " + toUrl + ":" + e);
           toUrl = null;
@@ -124,13 +132,14 @@
           toUrl = null;
         }
       }
-      if (toUrl == null) continue;
+      if (toUrl == null)
+        continue;
       inlinks.clear();
-      String anchor = outlink.getAnchor();        // truncate long anchors
+      String anchor = outlink.getAnchor(); // truncate long anchors
       if (anchor.length() > maxAnchorLength) {
         anchor = anchor.substring(0, maxAnchorLength);
       }
-      inlinks.add(new Inlink(fromUrl, anchor));   // collect inverted link
+      inlinks.add(new Inlink(fromUrl, anchor)); // collect inverted link
       output.collect(new Text(toUrl), inlinks);
     }
   }
@@ -143,13 +152,16 @@
     }
   }
 
-  public void invert(Path linkDb, final Path segmentsDir, boolean normalize, boolean filter, boolean force) throws IOException {
+  public void invert(Path linkDb, final Path segmentsDir, boolean normalize,
+      boolean filter, boolean force) throws IOException {
     final FileSystem fs = FileSystem.get(getConf());
-    FileStatus[] files = fs.listStatus(segmentsDir, HadoopFSUtil.getPassDirectoriesFilter(fs));
+    FileStatus[] files = fs.listStatus(segmentsDir,
+        HadoopFSUtil.getPassDirectoriesFilter(fs));
     invert(linkDb, HadoopFSUtil.getPaths(files), normalize, filter, force);
   }
 
-  public void invert(Path linkDb, Path[] segments, boolean normalize, boolean filter, boolean force) throws IOException {
+  public void invert(Path linkDb, Path[] segments, boolean normalize,
+      boolean filter, boolean force) throws IOException {
     Path lock = new Path(linkDb, LOCK_NAME);
     FileSystem fs = FileSystem.get(getConf());
     LockUtil.createLockFile(fs, lock, force);
@@ -169,7 +181,8 @@
       if (LOG.isInfoEnabled()) {
         LOG.info("LinkDb: adding segment: " + segments[i]);
       }
-      FileInputFormat.addInputPath(job, new Path(segments[i], ParseData.DIR_NAME));
+      FileInputFormat.addInputPath(job, new Path(segments[i],
+          ParseData.DIR_NAME));
     }
     try {
       JobClient.runJob(job);
@@ -198,13 +211,14 @@
     LinkDb.install(job, linkDb);
 
     long end = System.currentTimeMillis();
-    LOG.info("LinkDb: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    LOG.info("LinkDb: finished at " + sdf.format(end) + ", elapsed: "
+        + TimingUtil.elapsedTime(start, end));
   }
 
-  private static JobConf createJob(Configuration config, Path linkDb, boolean normalize, boolean filter) {
-    Path newLinkDb =
-      new Path("linkdb-" +
-               Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
+  private static JobConf createJob(Configuration config, Path linkDb,
+      boolean normalize, boolean filter) {
+    Path newLinkDb = new Path("linkdb-"
+        + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
 
     JobConf job = new NutchJob(config);
     job.setJobName("linkdb " + linkDb);
@@ -242,12 +256,14 @@
     Path old = new Path(linkDb, "old");
     Path current = new Path(linkDb, CURRENT_NAME);
     if (fs.exists(current)) {
-      if (fs.exists(old)) fs.delete(old, true);
+      if (fs.exists(old))
+        fs.delete(old, true);
       fs.rename(current, old);
     }
     fs.mkdirs(linkDb);
     fs.rename(newLinkDb, current);
-    if (fs.exists(old)) fs.delete(old, true);
+    if (fs.exists(old))
+      fs.delete(old, true);
     LockUtil.removeLockFile(fs, new Path(linkDb, LOCK_NAME));
   }
 
@@ -258,11 +274,14 @@
 
   public int run(String[] args) throws Exception {
     if (args.length < 2) {
-      System.err.println("Usage: LinkDb <linkdb> (-dir <segmentsDir> | <seg1> <seg2> ...) [-force] [-noNormalize] [-noFilter]");
+      System.err
+          .println("Usage: LinkDb <linkdb> (-dir <segmentsDir> | <seg1> <seg2> ...) [-force] [-noNormalize] [-noFilter]");
       System.err.println("\tlinkdb\toutput LinkDb to create or update");
-      System.err.println("\t-dir segmentsDir\tparent directory of several segments, OR");
+      System.err
+          .println("\t-dir segmentsDir\tparent directory of several segments, OR");
       System.err.println("\tseg1 seg2 ...\t list of segment directories");
-      System.err.println("\t-force\tforce update even if LinkDb appears to be locked (CAUTION advised)");
+      System.err
+          .println("\t-force\tforce update even if LinkDb appears to be locked (CAUTION advised)");
       System.err.println("\t-noNormalize\tdon't normalize link URLs");
       System.err.println("\t-noFilter\tdon't apply URLFilters to link URLs");
       return -1;
@@ -276,7 +295,8 @@
     boolean force = false;
     for (int i = 1; i < args.length; i++) {
       if (args[i].equals("-dir")) {
-        FileStatus[] paths = fs.listStatus(new Path(args[++i]), HadoopFSUtil.getPassDirectoriesFilter(fs));
+        FileStatus[] paths = fs.listStatus(new Path(args[++i]),
+            HadoopFSUtil.getPassDirectoriesFilter(fs));
         segs.addAll(Arrays.asList(HadoopFSUtil.getPaths(paths)));
       } else if (args[i].equalsIgnoreCase("-noNormalize")) {
         normalize = false;
@@ -284,7 +304,8 @@
         filter = false;
       } else if (args[i].equalsIgnoreCase("-force")) {
         force = true;
-      } else segs.add(new Path(args[i]));
+      } else
+        segs.add(new Path(args[i]));
     }
     try {
       invert(db, segs.toArray(new Path[segs.size()]), normalize, filter, force);
Index: src/java/org/apache/nutch/crawl/CrawlDatum.java
===================================================================
--- src/java/org/apache/nutch/crawl/CrawlDatum.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/CrawlDatum.java	(working copy)
@@ -41,51 +41,50 @@
   private static final byte OLD_STATUS_FETCH_SUCCESS = 5;
   private static final byte OLD_STATUS_FETCH_RETRY = 6;
   private static final byte OLD_STATUS_FETCH_GONE = 7;
-  
+
   private static HashMap<Byte, Byte> oldToNew = new HashMap<Byte, Byte>();
-  
+
   /** Page was not fetched yet. */
-  public static final byte STATUS_DB_UNFETCHED      = 0x01;
+  public static final byte STATUS_DB_UNFETCHED = 0x01;
   /** Page was successfully fetched. */
-  public static final byte STATUS_DB_FETCHED        = 0x02;
+  public static final byte STATUS_DB_FETCHED = 0x02;
   /** Page no longer exists. */
-  public static final byte STATUS_DB_GONE           = 0x03;
+  public static final byte STATUS_DB_GONE = 0x03;
   /** Page temporarily redirects to other page. */
-  public static final byte STATUS_DB_REDIR_TEMP     = 0x04;
+  public static final byte STATUS_DB_REDIR_TEMP = 0x04;
   /** Page permanently redirects to other page. */
-  public static final byte STATUS_DB_REDIR_PERM     = 0x05;
+  public static final byte STATUS_DB_REDIR_PERM = 0x05;
   /** Page was successfully fetched and found not modified. */
-  public static final byte STATUS_DB_NOTMODIFIED    = 0x06;
-  
+  public static final byte STATUS_DB_NOTMODIFIED = 0x06;
+
   /** Maximum value of DB-related status. */
-  public static final byte STATUS_DB_MAX            = 0x1f;
-  
+  public static final byte STATUS_DB_MAX = 0x1f;
+
   /** Fetching was successful. */
-  public static final byte STATUS_FETCH_SUCCESS     = 0x21;
+  public static final byte STATUS_FETCH_SUCCESS = 0x21;
   /** Fetching unsuccessful, needs to be retried (transient errors). */
-  public static final byte STATUS_FETCH_RETRY       = 0x22;
+  public static final byte STATUS_FETCH_RETRY = 0x22;
   /** Fetching temporarily redirected to other page. */
-  public static final byte STATUS_FETCH_REDIR_TEMP  = 0x23;
+  public static final byte STATUS_FETCH_REDIR_TEMP = 0x23;
   /** Fetching permanently redirected to other page. */
-  public static final byte STATUS_FETCH_REDIR_PERM  = 0x24;
+  public static final byte STATUS_FETCH_REDIR_PERM = 0x24;
   /** Fetching unsuccessful - page is gone. */
-  public static final byte STATUS_FETCH_GONE        = 0x25;
+  public static final byte STATUS_FETCH_GONE = 0x25;
   /** Fetching successful - page is not modified. */
   public static final byte STATUS_FETCH_NOTMODIFIED = 0x26;
-  
+
   /** Maximum value of fetch-related status. */
-  public static final byte STATUS_FETCH_MAX         = 0x3f;
-  
+  public static final byte STATUS_FETCH_MAX = 0x3f;
+
   /** Page signature. */
-  public static final byte STATUS_SIGNATURE         = 0x41;
+  public static final byte STATUS_SIGNATURE = 0x41;
   /** Page was newly injected. */
-  public static final byte STATUS_INJECTED          = 0x42;
+  public static final byte STATUS_INJECTED = 0x42;
   /** Page discovered through a link. */
-  public static final byte STATUS_LINKED            = 0x43;
+  public static final byte STATUS_LINKED = 0x43;
   /** Page got metadata from a parser */
-  public static final byte STATUS_PARSE_META        = 0x44;
-  
-  
+  public static final byte STATUS_PARSE_META = 0x44;
+
   public static final HashMap<Byte, String> statNames = new HashMap<Byte, String>();
   static {
     statNames.put(STATUS_DB_UNFETCHED, "db_unfetched");
@@ -104,7 +103,7 @@
     statNames.put(STATUS_FETCH_GONE, "fetch_gone");
     statNames.put(STATUS_FETCH_NOTMODIFIED, "fetch_notmodified");
     statNames.put(STATUS_PARSE_META, "parse_metadata");
-    
+
     oldToNew.put(OLD_STATUS_DB_UNFETCHED, STATUS_DB_UNFETCHED);
     oldToNew.put(OLD_STATUS_DB_FETCHED, STATUS_DB_FETCHED);
     oldToNew.put(OLD_STATUS_DB_GONE, STATUS_DB_GONE);
@@ -123,22 +122,25 @@
   private byte[] signature = null;
   private long modifiedTime;
   private org.apache.hadoop.io.MapWritable metaData;
-  
+
   public static boolean hasDbStatus(CrawlDatum datum) {
-    if (datum.status <= STATUS_DB_MAX) return true;
+    if (datum.status <= STATUS_DB_MAX)
+      return true;
     return false;
   }
 
   public static boolean hasFetchStatus(CrawlDatum datum) {
-    if (datum.status > STATUS_DB_MAX && datum.status <= STATUS_FETCH_MAX) return true;
+    if (datum.status > STATUS_DB_MAX && datum.status <= STATUS_FETCH_MAX)
+      return true;
     return false;
   }
 
-  public CrawlDatum() { }
+  public CrawlDatum() {
+  }
 
   public CrawlDatum(int status, int fetchInterval) {
     this();
-    this.status = (byte)status;
+    this.status = (byte) status;
     this.fetchInterval = fetchInterval;
   }
 
@@ -151,26 +153,36 @@
   // accessor methods
   //
 
-  public byte getStatus() { return status; }
-  
+  public byte getStatus() {
+    return status;
+  }
+
   public static String getStatusName(byte value) {
     String res = statNames.get(value);
-    if (res == null) res = "unknown";
+    if (res == null)
+      res = "unknown";
     return res;
   }
-  
-  public void setStatus(int status) { this.status = (byte)status; }
 
+  public void setStatus(int status) {
+    this.status = (byte) status;
+  }
+
   /**
    * Returns either the time of the last fetch, or the next fetch time,
    * depending on whether Fetcher or CrawlDbReducer set the time.
    */
-  public long getFetchTime() { return fetchTime; }
+  public long getFetchTime() {
+    return fetchTime;
+  }
+
   /**
-   * Sets either the time of the last fetch or the next fetch time,
-   * depending on whether Fetcher or CrawlDbReducer set the time.
+   * Sets either the time of the last fetch or the next fetch time, depending on
+   * whether Fetcher or CrawlDbReducer set the time.
    */
-  public void setFetchTime(long fetchTime) { this.fetchTime = fetchTime; }
+  public void setFetchTime(long fetchTime) {
+    this.fetchTime = fetchTime;
+  }
 
   public long getModifiedTime() {
     return modifiedTime;
@@ -179,54 +191,72 @@
   public void setModifiedTime(long modifiedTime) {
     this.modifiedTime = modifiedTime;
   }
-  
-  public byte getRetriesSinceFetch() { return retries; }
-  public void setRetriesSinceFetch(int retries) {this.retries = (byte)retries;}
 
-  public int getFetchInterval() { return fetchInterval; }
+  public byte getRetriesSinceFetch() {
+    return retries;
+  }
+
+  public void setRetriesSinceFetch(int retries) {
+    this.retries = (byte) retries;
+  }
+
+  public int getFetchInterval() {
+    return fetchInterval;
+  }
+
   public void setFetchInterval(int fetchInterval) {
     this.fetchInterval = fetchInterval;
   }
+
   public void setFetchInterval(float fetchInterval) {
     this.fetchInterval = Math.round(fetchInterval);
   }
 
-  public float getScore() { return score; }
-  public void setScore(float score) { this.score = score; }
+  public float getScore() {
+    return score;
+  }
 
+  public void setScore(float score) {
+    this.score = score;
+  }
+
   public byte[] getSignature() {
     return signature;
   }
 
   public void setSignature(byte[] signature) {
     if (signature != null && signature.length > 256)
-      throw new RuntimeException("Max signature length (256) exceeded: " + signature.length);
+      throw new RuntimeException("Max signature length (256) exceeded: "
+          + signature.length);
     this.signature = signature;
   }
-  
-   public void setMetaData(org.apache.hadoop.io.MapWritable mapWritable) {
-     this.metaData = new org.apache.hadoop.io.MapWritable(mapWritable);
-   }
-   
-   /** Add all metadata from other CrawlDatum to this CrawlDatum.
-    * 
-    * @param other CrawlDatum
-    */
-   public void putAllMetaData(CrawlDatum other) {
-     for (Entry<Writable, Writable> e : other.getMetaData().entrySet()) {
-       getMetaData().put(e.getKey(), e.getValue());
-     }
-   }
 
+  public void setMetaData(org.apache.hadoop.io.MapWritable mapWritable) {
+    this.metaData = new org.apache.hadoop.io.MapWritable(mapWritable);
+  }
+
   /**
-   * returns a MapWritable if it was set or read in @see readFields(DataInput), 
-   * returns empty map in case CrawlDatum was freshly created (lazily instantiated).
+   * Add all metadata from other CrawlDatum to this CrawlDatum.
+   * 
+   * @param other
+   *          CrawlDatum
    */
+  public void putAllMetaData(CrawlDatum other) {
+    for (Entry<Writable, Writable> e : other.getMetaData().entrySet()) {
+      getMetaData().put(e.getKey(), e.getValue());
+    }
+  }
+
+  /**
+   * returns a MapWritable if it was set or read in @see readFields(DataInput),
+   * returns empty map in case CrawlDatum was freshly created (lazily
+   * instantiated).
+   */
   public org.apache.hadoop.io.MapWritable getMetaData() {
-    if (this.metaData == null) this.metaData = new org.apache.hadoop.io.MapWritable();
+    if (this.metaData == null)
+      this.metaData = new org.apache.hadoop.io.MapWritable();
     return this.metaData;
   }
-  
 
   //
   // writable methods
@@ -239,8 +269,8 @@
   }
 
   public void readFields(DataInput in) throws IOException {
-    byte version = in.readByte();                 // read version
-    if (version > CUR_VERSION)                   // check version
+    byte version = in.readByte(); // read version
+    if (version > CUR_VERSION) // check version
       throw new VersionMismatchException(CUR_VERSION, version);
 
     status = in.readByte();
@@ -248,7 +278,8 @@
     retries = in.readByte();
     if (version > 5) {
       fetchInterval = in.readInt();
-    } else fetchInterval = Math.round(in.readFloat());
+    } else
+      fetchInterval = Math.round(in.readFloat());
     score = in.readFloat();
     if (version > 2) {
       modifiedTime = in.readLong();
@@ -256,9 +287,10 @@
       if (cnt > 0) {
         signature = new byte[cnt];
         in.readFully(signature);
-      } else signature = null;
+      } else
+        signature = null;
     }
-    
+
     if (version > 3) {
       boolean hasMetadata = false;
       if (version < 7) {
@@ -278,7 +310,8 @@
           metaData.readFields(in);
         }
       }
-      if (hasMetadata==false) metaData = null;
+      if (hasMetadata == false)
+        metaData = null;
     }
     // translate status codes
     if (version < 5) {
@@ -286,7 +319,7 @@
         status = oldToNew.get(status);
       else
         status = STATUS_DB_UNFETCHED;
-      
+
     }
   }
 
@@ -295,7 +328,7 @@
   private static final int SIG_OFFSET = SCORE_OFFSET + 4 + 8;
 
   public void write(DataOutput out) throws IOException {
-    out.writeByte(CUR_VERSION);                   // store current version
+    out.writeByte(CUR_VERSION); // store current version
     out.writeByte(status);
     out.writeLong(fetchTime);
     out.writeByte(retries);
@@ -326,17 +359,19 @@
     this.modifiedTime = that.modifiedTime;
     this.signature = that.signature;
     if (that.metaData != null) {
-      this.metaData = new org.apache.hadoop.io.MapWritable(that.metaData); // make a deep copy
+      this.metaData = new org.apache.hadoop.io.MapWritable(that.metaData); // make
+                                                                           // a
+                                                                           // deep
+                                                                           // copy
     } else {
       this.metaData = null;
     }
   }
 
-
   //
   // compare methods
   //
-  
+
   /** Sort by decreasing score. */
   public int compareTo(CrawlDatum that) {
     if (that.score != this.score)
@@ -354,47 +389,49 @@
     return SignatureComparator._compare(this, that);
   }
 
-  /** A Comparator optimized for CrawlDatum. */ 
+  /** A Comparator optimized for CrawlDatum. */
   public static class Comparator extends WritableComparator {
-    public Comparator() { super(CrawlDatum.class); }
+    public Comparator() {
+      super(CrawlDatum.class);
+    }
 
     public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
-      float score1 = readFloat(b1,s1+SCORE_OFFSET);
-      float score2 = readFloat(b2,s2+SCORE_OFFSET);
+      float score1 = readFloat(b1, s1 + SCORE_OFFSET);
+      float score2 = readFloat(b2, s2 + SCORE_OFFSET);
       if (score2 != score1) {
         return (score2 - score1) > 0 ? 1 : -1;
       }
-      int status1 = b1[s1+1];
-      int status2 = b2[s2+1];
+      int status1 = b1[s1 + 1];
+      int status2 = b2[s2 + 1];
       if (status2 != status1)
         return status1 - status2;
-      long fetchTime1 = readLong(b1, s1+1+1);
-      long fetchTime2 = readLong(b2, s2+1+1);
+      long fetchTime1 = readLong(b1, s1 + 1 + 1);
+      long fetchTime2 = readLong(b2, s2 + 1 + 1);
       if (fetchTime2 != fetchTime1)
         return (fetchTime2 - fetchTime1) > 0 ? 1 : -1;
-      int retries1 = b1[s1+1+1+8];
-      int retries2 = b2[s2+1+1+8];
+      int retries1 = b1[s1 + 1 + 1 + 8];
+      int retries2 = b2[s2 + 1 + 1 + 8];
       if (retries2 != retries1)
         return retries2 - retries1;
-      int fetchInterval1 = readInt(b1, s1+1+1+8+1);
-      int fetchInterval2 = readInt(b2, s2+1+1+8+1);
+      int fetchInterval1 = readInt(b1, s1 + 1 + 1 + 8 + 1);
+      int fetchInterval2 = readInt(b2, s2 + 1 + 1 + 8 + 1);
       if (fetchInterval2 != fetchInterval1)
         return (fetchInterval2 - fetchInterval1) > 0 ? 1 : -1;
       long modifiedTime1 = readLong(b1, s1 + SCORE_OFFSET + 4);
       long modifiedTime2 = readLong(b2, s2 + SCORE_OFFSET + 4);
       if (modifiedTime2 != modifiedTime1)
         return (modifiedTime2 - modifiedTime1) > 0 ? 1 : -1;
-      int sigl1 = b1[s1+SIG_OFFSET];
-      int sigl2 = b2[s2+SIG_OFFSET];
-      return SignatureComparator._compare(b1, SIG_OFFSET, sigl1, b2, SIG_OFFSET, sigl2);
+      int sigl1 = b1[s1 + SIG_OFFSET];
+      int sigl2 = b2[s2 + SIG_OFFSET];
+      return SignatureComparator._compare(b1, SIG_OFFSET, sigl1, b2,
+          SIG_OFFSET, sigl2);
     }
   }
 
-  static {                                        // register this comparator
+  static { // register this comparator
     WritableComparator.define(CrawlDatum.class, new Comparator());
   }
 
-
   //
   // basic methods
   //
@@ -402,12 +439,13 @@
   public String toString() {
     StringBuilder buf = new StringBuilder();
     buf.append("Version: " + CUR_VERSION + "\n");
-    buf.append("Status: " + getStatus() + " (" + getStatusName(getStatus()) + ")\n");
+    buf.append("Status: " + getStatus() + " (" + getStatusName(getStatus())
+        + ")\n");
     buf.append("Fetch time: " + new Date(getFetchTime()) + "\n");
     buf.append("Modified time: " + new Date(getModifiedTime()) + "\n");
     buf.append("Retries since fetch: " + getRetriesSinceFetch() + "\n");
-    buf.append("Retry interval: " + getFetchInterval() + " seconds (" +
-        (getFetchInterval() / FetchSchedule.SECONDS_PER_DAY) + " days)\n");
+    buf.append("Retry interval: " + getFetchInterval() + " seconds ("
+        + (getFetchInterval() / FetchSchedule.SECONDS_PER_DAY) + " days)\n");
     buf.append("Score: " + getScore() + "\n");
     buf.append("Signature: " + StringUtil.toHexString(getSignature()) + "\n");
     buf.append("Metadata: ");
@@ -421,35 +459,35 @@
     buf.append('\n');
     return buf.toString();
   }
-  
+
   private boolean metadataEquals(org.apache.hadoop.io.MapWritable otherMetaData) {
-    if (metaData==null || metaData.size() ==0) {
+    if (metaData == null || metaData.size() == 0) {
       return otherMetaData == null || otherMetaData.size() == 0;
     }
     if (otherMetaData == null) {
       // we already know that the current object is not null or empty
       return false;
     }
-    HashSet<Entry<Writable, Writable>> set1 =
-      new HashSet<Entry<Writable,Writable>>(metaData.entrySet());
-    HashSet<Entry<Writable, Writable>> set2 =
-      new HashSet<Entry<Writable,Writable>>(otherMetaData.entrySet());
+    HashSet<Entry<Writable, Writable>> set1 = new HashSet<Entry<Writable, Writable>>(
+        metaData.entrySet());
+    HashSet<Entry<Writable, Writable>> set2 = new HashSet<Entry<Writable, Writable>>(
+        otherMetaData.entrySet());
     return set1.equals(set2);
   }
 
   public boolean equals(Object o) {
     if (!(o instanceof CrawlDatum))
       return false;
-    CrawlDatum other = (CrawlDatum)o;
-    boolean res =
-      (this.status == other.status) &&
-      (this.fetchTime == other.fetchTime) &&
-      (this.modifiedTime == other.modifiedTime) &&
-      (this.retries == other.retries) &&
-      (this.fetchInterval == other.fetchInterval) &&
-      (SignatureComparator._compare(this.signature, other.signature) == 0) &&
-      (this.score == other.score);
-    if (!res) return res;
+    CrawlDatum other = (CrawlDatum) o;
+    boolean res = (this.status == other.status)
+        && (this.fetchTime == other.fetchTime)
+        && (this.modifiedTime == other.modifiedTime)
+        && (this.retries == other.retries)
+        && (this.fetchInterval == other.fetchInterval)
+        && (SignatureComparator._compare(this.signature, other.signature) == 0)
+        && (this.score == other.score);
+    if (!res)
+      return res;
     return metadataEquals(other.metaData);
   }
 
@@ -457,20 +495,14 @@
     int res = 0;
     if (signature != null) {
       for (int i = 0; i < signature.length / 4; i += 4) {
-        res ^= (int)(signature[i] << 24 + signature[i+1] << 16 +
-                signature[i+2] << 8 + signature[i+3]);
+        res ^= (int) (signature[i] << 24 + signature[i + 1] << 16 + signature[i + 2] << 8 + signature[i + 3]);
       }
     }
     if (metaData != null) {
       res ^= metaData.entrySet().hashCode();
     }
-    return
-      res ^ status ^
-      ((int)fetchTime) ^
-      ((int)modifiedTime) ^
-      retries ^
-      fetchInterval ^
-      Float.floatToIntBits(score);
+    return res ^ status ^ ((int) fetchTime) ^ ((int) modifiedTime) ^ retries
+        ^ fetchInterval ^ Float.floatToIntBits(score);
   }
 
   public Object clone() {
Index: src/java/org/apache/nutch/crawl/LinkDbMerger.java
===================================================================
--- src/java/org/apache/nutch/crawl/LinkDbMerger.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/LinkDbMerger.java	(working copy)
@@ -46,37 +46,44 @@
 import org.apache.nutch.util.TimingUtil;
 
 /**
- * This tool merges several LinkDb-s into one, optionally filtering
- * URLs through the current URLFilters, to skip prohibited URLs and
- * links.
+ * This tool merges several LinkDb-s into one, optionally filtering URLs through
+ * the current URLFilters, to skip prohibited URLs and links.
  * 
- * <p>It's possible to use this tool just for filtering - in that case
- * only one LinkDb should be specified in arguments.</p>
- * <p>If more than one LinkDb contains information about the same URL,
- * all inlinks are accumulated, but only at most <code>db.max.inlinks</code>
- * inlinks will ever be added.</p>
- * <p>If activated, URLFilters will be applied to both the target URLs and
- * to any incoming link URL. If a target URL is prohibited, all
- * inlinks to that target will be removed, including the target URL. If
- * some of incoming links are prohibited, only they will be removed, and they
- * won't count when checking the above-mentioned maximum limit.
+ * <p>
+ * It's possible to use this tool just for filtering - in that case only one
+ * LinkDb should be specified in arguments.
+ * </p>
+ * <p>
+ * If more than one LinkDb contains information about the same URL, all inlinks
+ * are accumulated, but only at most <code>db.max.inlinks</code> inlinks will
+ * ever be added.
+ * </p>
+ * <p>
+ * If activated, URLFilters will be applied to both the target URLs and to any
+ * incoming link URL. If a target URL is prohibited, all inlinks to that target
+ * will be removed, including the target URL. If some of incoming links are
+ * prohibited, only they will be removed, and they won't count when checking the
+ * above-mentioned maximum limit.
  * 
  * @author Andrzej Bialecki
  */
-public class LinkDbMerger extends Configured implements Tool, Reducer<Text, Inlinks, Text, Inlinks> {
+public class LinkDbMerger extends Configured implements Tool,
+    Reducer<Text, Inlinks, Text, Inlinks> {
   private static final Logger LOG = LoggerFactory.getLogger(LinkDbMerger.class);
-  
+
   private int maxInlinks;
-  
+
   public LinkDbMerger() {
-    
+
   }
-  
+
   public LinkDbMerger(Configuration conf) {
     setConf(conf);
   }
 
-  public void reduce(Text key, Iterator<Inlinks> values, OutputCollector<Text, Inlinks> output, Reporter reporter) throws IOException {
+  public void reduce(Text key, Iterator<Inlinks> values,
+      OutputCollector<Text, Inlinks> output, Reporter reporter)
+      throws IOException {
 
     Inlinks result = new Inlinks();
 
@@ -86,43 +93,48 @@
       int end = Math.min(maxInlinks - result.size(), inlinks.size());
       Iterator<Inlink> it = inlinks.iterator();
       int i = 0;
-      while(it.hasNext() && i++ < end) {
+      while (it.hasNext() && i++ < end) {
         result.add(it.next());
       }
     }
-    if (result.size() == 0) return;
+    if (result.size() == 0)
+      return;
     output.collect(key, result);
-    
+
   }
 
   public void configure(JobConf job) {
     maxInlinks = job.getInt("db.max.inlinks", 10000);
   }
 
-  public void close() throws IOException { }
+  public void close() throws IOException {
+  }
 
-  public void merge(Path output, Path[] dbs, boolean normalize, boolean filter) throws Exception {
+  public void merge(Path output, Path[] dbs, boolean normalize, boolean filter)
+      throws Exception {
     SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
     long start = System.currentTimeMillis();
     LOG.info("LinkDb merge: starting at " + sdf.format(start));
 
     JobConf job = createMergeJob(getConf(), output, normalize, filter);
     for (int i = 0; i < dbs.length; i++) {
-      FileInputFormat.addInputPath(job, new Path(dbs[i], LinkDb.CURRENT_NAME));      
+      FileInputFormat.addInputPath(job, new Path(dbs[i], LinkDb.CURRENT_NAME));
     }
     JobClient.runJob(job);
     FileSystem fs = FileSystem.get(getConf());
     fs.mkdirs(output);
-    fs.rename(FileOutputFormat.getOutputPath(job), new Path(output, LinkDb.CURRENT_NAME));
+    fs.rename(FileOutputFormat.getOutputPath(job), new Path(output,
+        LinkDb.CURRENT_NAME));
 
     long end = System.currentTimeMillis();
-    LOG.info("LinkDb merge: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    LOG.info("LinkDb merge: finished at " + sdf.format(end) + ", elapsed: "
+        + TimingUtil.elapsedTime(start, end));
   }
 
-  public static JobConf createMergeJob(Configuration config, Path linkDb, boolean normalize, boolean filter) {
-    Path newLinkDb =
-      new Path("linkdb-merge-" + 
-               Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
+  public static JobConf createMergeJob(Configuration config, Path linkDb,
+      boolean normalize, boolean filter) {
+    Path newLinkDb = new Path("linkdb-merge-"
+        + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
 
     JobConf job = new NutchJob(config);
     job.setJobName("linkdb merge " + linkDb);
@@ -145,22 +157,27 @@
 
     return job;
   }
-  
+
   /**
    * @param args
    */
   public static void main(String[] args) throws Exception {
-    int res = ToolRunner.run(NutchConfiguration.create(), new LinkDbMerger(), args);
+    int res = ToolRunner.run(NutchConfiguration.create(), new LinkDbMerger(),
+        args);
     System.exit(res);
   }
-  
+
   public int run(String[] args) throws Exception {
     if (args.length < 2) {
-      System.err.println("Usage: LinkDbMerger <output_linkdb> <linkdb1> [<linkdb2> <linkdb3> ...] [-normalize] [-filter]");
+      System.err
+          .println("Usage: LinkDbMerger <output_linkdb> <linkdb1> [<linkdb2> <linkdb3> ...] [-normalize] [-filter]");
       System.err.println("\toutput_linkdb\toutput LinkDb");
-      System.err.println("\tlinkdb1 ...\tinput LinkDb-s (single input LinkDb is ok)");
-      System.err.println("\t-normalize\tuse URLNormalizer on both fromUrls and toUrls in linkdb(s) (usually not needed)");
-      System.err.println("\t-filter\tuse URLFilters on both fromUrls and toUrls in linkdb(s)");
+      System.err
+          .println("\tlinkdb1 ...\tinput LinkDb-s (single input LinkDb is ok)");
+      System.err
+          .println("\t-normalize\tuse URLNormalizer on both fromUrls and toUrls in linkdb(s) (usually not needed)");
+      System.err
+          .println("\t-filter\tuse URLFilters on both fromUrls and toUrls in linkdb(s)");
       return -1;
     }
     Path output = new Path(args[0]);
@@ -172,7 +189,8 @@
         filter = true;
       } else if (args[i].equals("-normalize")) {
         normalize = true;
-      } else dbs.add(new Path(args[i]));
+      } else
+        dbs.add(new Path(args[i]));
     }
     try {
       merge(output, dbs.toArray(new Path[dbs.size()]), normalize, filter);
Index: src/java/org/apache/nutch/crawl/FetchSchedule.java
===================================================================
--- src/java/org/apache/nutch/crawl/FetchSchedule.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/FetchSchedule.java	(working copy)
@@ -21,134 +21,184 @@
 import org.apache.hadoop.io.Text;
 
 /**
- * This interface defines the contract for implementations that manipulate
- * fetch times and re-fetch intervals.
+ * This interface defines the contract for implementations that manipulate fetch
+ * times and re-fetch intervals.
  * 
  * @author Andrzej Bialecki
  */
 public interface FetchSchedule extends Configurable {
-  
+
   /** It is unknown whether page was changed since our last visit. */
-  public static final int STATUS_UNKNOWN       = 0;
+  public static final int STATUS_UNKNOWN = 0;
   /** Page is known to have been modified since our last visit. */
-  public static final int STATUS_MODIFIED      = 1;
+  public static final int STATUS_MODIFIED = 1;
   /** Page is known to remain unmodified since our last visit. */
-  public static final int STATUS_NOTMODIFIED    = 2;
-  
+  public static final int STATUS_NOTMODIFIED = 2;
+
   public static final int SECONDS_PER_DAY = 3600 * 24;
+
   /**
-   * Initialize fetch schedule related data. Implementations should at least
-   * set the <code>fetchTime</code> and <code>fetchInterval</code>. The default
-   * implementation set the <code>fetchTime</code> to now, using the
-   * default <code>fetchInterval</code>.
+   * Initialize fetch schedule related data. Implementations should at least set
+   * the <code>fetchTime</code> and <code>fetchInterval</code>. The default
+   * implementation set the <code>fetchTime</code> to now, using the default
+   * <code>fetchInterval</code>.
    * 
-   * @param url URL of the page.
-   * @param datum datum instance to be initialized.
+   * @param url
+   *          URL of the page.
+   * @param datum
+   *          datum instance to be initialized.
    * @return adjusted page information, including all original information.
-   * NOTE: this may be a different instance than {@param datum}, but
-   * implementations should make sure that it contains at least all
-   * information from {@param datum}.
+   *         NOTE: this may be a different instance than
+   * @param datum
+   *          , but implementations should make sure that it contains at least
+   *          all information from
+   * @param datum
+   *          .
    */
   public CrawlDatum initializeSchedule(Text url, CrawlDatum datum);
-  
+
   /**
    * Sets the <code>fetchInterval</code> and <code>fetchTime</code> on a
-   * successfully fetched page.
-   * Implementations may use supplied arguments to support different re-fetching
-   * schedules.
+   * successfully fetched page. Implementations may use supplied arguments to
+   * support different re-fetching schedules.
    * 
-   * @param url url of the page
-   * @param datum page description to be adjusted. NOTE: this instance, passed by reference,
-   * may be modified inside the method.
-   * @param prevFetchTime previous value of fetch time, or 0 if not available
-   * @param prevModifiedTime previous value of modifiedTime, or 0 if not available
-   * @param fetchTime the latest time, when the page was recently re-fetched. Most FetchSchedule
-   * implementations should update the value in {@param datum} to something greater than this value.
-   * @param modifiedTime last time the content was modified. This information comes from
-   * the protocol implementations, or is set to < 0 if not available. Most FetchSchedule
-   * implementations should update the value in {@param datum} to this value.
-   * @param state if {@link #STATUS_MODIFIED}, then the content is considered to be "changed" before the
-   * <code>fetchTime</code>, if {@link #STATUS_NOTMODIFIED} then the content is known to be unchanged.
-   * This information may be obtained by comparing page signatures before and after fetching. If this
-   * is set to {@link #STATUS_UNKNOWN}, then it is unknown whether the page was changed; implementations
-   * are free to follow a sensible default behavior.
-   * @return adjusted page information, including all original information. NOTE: this may
-   * be a different instance than {@param datum}, but implementations should make sure that
-   * it contains at least all information from {@param datum}.
+   * @param url
+   *          url of the page
+   * @param datum
+   *          page description to be adjusted. NOTE: this instance, passed by
+   *          reference, may be modified inside the method.
+   * @param prevFetchTime
+   *          previous value of fetch time, or 0 if not available
+   * @param prevModifiedTime
+   *          previous value of modifiedTime, or 0 if not available
+   * @param fetchTime
+   *          the latest time, when the page was recently re-fetched. Most
+   *          FetchSchedule implementations should update the value in
+   * @param datum
+   *          to something greater than this value.
+   * @param modifiedTime
+   *          last time the content was modified. This information comes from
+   *          the protocol implementations, or is set to < 0 if not available.
+   *          Most FetchSchedule implementations should update the value in
+   * @param datum
+   *          to this value.
+   * @param state
+   *          if {@link #STATUS_MODIFIED}, then the content is considered to be
+   *          "changed" before the <code>fetchTime</code>, if
+   *          {@link #STATUS_NOTMODIFIED} then the content is known to be
+   *          unchanged. This information may be obtained by comparing page
+   *          signatures before and after fetching. If this is set to
+   *          {@link #STATUS_UNKNOWN}, then it is unknown whether the page was
+   *          changed; implementations are free to follow a sensible default
+   *          behavior.
+   * @return adjusted page information, including all original information.
+   *         NOTE: this may be a different instance than
+   * @param datum
+   *          , but implementations should make sure that it contains at least
+   *          all information from
+   * @param datum
+   *          .
    */
   public CrawlDatum setFetchSchedule(Text url, CrawlDatum datum,
-          long prevFetchTime, long prevModifiedTime,
-          long fetchTime, long modifiedTime, int state);
-  
+      long prevFetchTime, long prevModifiedTime, long fetchTime,
+      long modifiedTime, int state);
+
   /**
-   * This method specifies how to schedule refetching of pages
-   * marked as GONE. Default implementation increases fetchInterval by 50%,
-   * and if it exceeds the <code>maxInterval</code> it calls
+   * This method specifies how to schedule refetching of pages marked as GONE.
+   * Default implementation increases fetchInterval by 50%, and if it exceeds
+   * the <code>maxInterval</code> it calls
    * {@link #forceRefetch(Text, CrawlDatum, boolean)}.
-   * @param url URL of the page
-   * @param datum datum instance to be adjusted
+   * 
+   * @param url
+   *          URL of the page
+   * @param datum
+   *          datum instance to be adjusted
    * @return adjusted page information, including all original information.
-   * NOTE: this may be a different instance than {@param datum}, but
-   * implementations should make sure that it contains at least all
-   * information from {@param datum}.
+   *         NOTE: this may be a different instance than
+   * @param datum
+   *          , but implementations should make sure that it contains at least
+   *          all information from
+   * @param datum
+   *          .
    */
   public CrawlDatum setPageGoneSchedule(Text url, CrawlDatum datum,
-          long prevFetchTime, long prevModifiedTime, long fetchTime);
-  
+      long prevFetchTime, long prevModifiedTime, long fetchTime);
+
   /**
-   * This method adjusts the fetch schedule if fetching needs to be
-   * re-tried due to transient errors. The default implementation
-   * sets the next fetch time 1 day in the future and increases the
-   * retry counter.
-   * @param url URL of the page
-   * @param datum page information
-   * @param prevFetchTime previous fetch time
-   * @param prevModifiedTime previous modified time
-   * @param fetchTime current fetch time
+   * This method adjusts the fetch schedule if fetching needs to be re-tried due
+   * to transient errors. The default implementation sets the next fetch time 1
+   * day in the future and increases the retry counter.
+   * 
+   * @param url
+   *          URL of the page
+   * @param datum
+   *          page information
+   * @param prevFetchTime
+   *          previous fetch time
+   * @param prevModifiedTime
+   *          previous modified time
+   * @param fetchTime
+   *          current fetch time
    * @return adjusted page information, including all original information.
-   * NOTE: this may be a different instance than {@param datum}, but
-   * implementations should make sure that it contains at least all
-   * information from {@param datum}.
+   *         NOTE: this may be a different instance than
+   * @param datum
+   *          , but implementations should make sure that it contains at least
+   *          all information from
+   * @param datum
+   *          .
    */
   public CrawlDatum setPageRetrySchedule(Text url, CrawlDatum datum,
-          long prevFetchTime, long prevModifiedTime, long fetchTime);
-  
+      long prevFetchTime, long prevModifiedTime, long fetchTime);
+
   /**
    * Calculates last fetch time of the given CrawlDatum.
+   * 
    * @return the date as a long.
    */
   public long calculateLastFetchTime(CrawlDatum datum);
 
   /**
-   * This method provides information whether the page is suitable for
-   * selection in the current fetchlist. NOTE: a true return value does not
-   * guarantee that the page will be fetched, it just allows it to be
-   * included in the further selection process based on scores. The default
-   * implementation checks <code>fetchTime</code>, if it is higher than the
-   * {@param curTime} it returns false, and true otherwise. It will also
-   * check that fetchTime is not too remote (more than <code>maxInterval</code),
-   * in which case it lowers the interval and returns true.
-   * @param url URL of the page
-   * @param datum datum instance
-   * @param curTime reference time (usually set to the time when the
-   * fetchlist generation process was started).
+   * This method provides information whether the page is suitable for selection
+   * in the current fetchlist. NOTE: a true return value does not guarantee that
+   * the page will be fetched, it just allows it to be included in the further
+   * selection process based on scores. The default implementation checks
+   * <code>fetchTime</code>, if it is higher than the
+   * 
+   * @param curTime
+   *          it returns false, and true otherwise. It will also check that
+   *          fetchTime is not too remote (more than <code>maxInterval</code ),
+   *          in which case it lowers the interval and returns true.
+   * @param url
+   *          URL of the page
+   * @param datum
+   *          datum instance
+   * @param curTime
+   *          reference time (usually set to the time when the fetchlist
+   *          generation process was started).
    * @return true, if the page should be considered for inclusion in the current
-   * fetchlist, otherwise false.
+   *         fetchlist, otherwise false.
    */
   public boolean shouldFetch(Text url, CrawlDatum datum, long curTime);
-  
+
   /**
-   * This method resets fetchTime, fetchInterval, modifiedTime and
-   * page signature, so that it forces refetching.
-   * @param url URL of the page
-   * @param datum datum instance
-   * @param asap if true, force refetch as soon as possible - this sets
-   * the fetchTime to now. If false, force refetch whenever the next fetch
-   * time is set.
+   * This method resets fetchTime, fetchInterval, modifiedTime and page
+   * signature, so that it forces refetching.
+   * 
+   * @param url
+   *          URL of the page
+   * @param datum
+   *          datum instance
+   * @param asap
+   *          if true, force refetch as soon as possible - this sets the
+   *          fetchTime to now. If false, force refetch whenever the next fetch
+   *          time is set.
    * @return adjusted page information, including all original information.
-   * NOTE: this may be a different instance than {@param datum}, but
-   * implementations should make sure that it contains at least all
-   * information from {@param datum}.
+   *         NOTE: this may be a different instance than
+   * @param datum
+   *          , but implementations should make sure that it contains at least
+   *          all information from
+   * @param datum
+   *          .
    */
   public CrawlDatum forceRefetch(Text url, CrawlDatum datum, boolean asap);
 }
Index: src/java/org/apache/nutch/crawl/MD5Signature.java
===================================================================
--- src/java/org/apache/nutch/crawl/MD5Signature.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/MD5Signature.java	(working copy)
@@ -22,9 +22,9 @@
 import org.apache.nutch.protocol.Content;
 
 /**
- * Default implementation of a page signature. It calculates an MD5 hash
- * of the raw binary content of a page. In case there is no content, it
- * calculates a hash from the page's URL.
+ * Default implementation of a page signature. It calculates an MD5 hash of the
+ * raw binary content of a page. In case there is no content, it calculates a
+ * hash from the page's URL.
  * 
  * @author Andrzej Bialecki &lt;ab@getopt.org&gt;
  */
@@ -32,7 +32,8 @@
 
   public byte[] calculate(Content content, Parse parse) {
     byte[] data = content.getContent();
-    if (data == null) data = content.getUrl().getBytes();
+    if (data == null)
+      data = content.getUrl().getBytes();
     return MD5Hash.digest(data).getDigest();
   }
 }
Index: src/java/org/apache/nutch/crawl/MapWritable.java
===================================================================
--- src/java/org/apache/nutch/crawl/MapWritable.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/MapWritable.java	(working copy)
@@ -47,15 +47,15 @@
 import org.apache.nutch.protocol.ProtocolStatus;
 
 /**
- * A writable map, with a similar behavior as <code>java.util.HashMap</code>.
- * In addition to the size of key and value writable tuple two additional bytes
- * are stored to identify the Writable classes. This means that a maximum of
- * 255 different class types can be used for key and value objects.
- * A binary-id to class mapping is defined in a static block of this class.
- * However it is possible to use custom implementations of Writable.
- * For these custom Writables we write the byte id - utf class name tuple
- * into the header of each MapWritable that uses these types.
- *
+ * A writable map, with a similar behavior as <code>java.util.HashMap</code>. In
+ * addition to the size of key and value writable tuple two additional bytes are
+ * stored to identify the Writable classes. This means that a maximum of 255
+ * different class types can be used for key and value objects. A binary-id to
+ * class mapping is defined in a static block of this class. However it is
+ * possible to use custom implementations of Writable. For these custom
+ * Writables we write the byte id - utf class name tuple into the header of each
+ * MapWritable that uses these types.
+ * 
  * @author Stefan Groschupf
  * @deprecated Use org.apache.hadoop.io.MapWritable instead.
  */
@@ -105,14 +105,16 @@
     CLASS_ID_MAP.put(clazz, byteId);
     ID_CLASS_MAP.put(byteId, clazz);
   }
-  
-  public MapWritable() { }
-  
+
+  public MapWritable() {
+  }
+
   /**
    * Copy constructor. This constructor makes a deep copy, using serialization /
    * deserialization to break any possible references to contained objects.
    * 
-   * @param map map to copy from
+   * @param map
+   *          map to copy from
    */
   public MapWritable(MapWritable map) {
     if (map != null) {
@@ -123,8 +125,8 @@
         dib.reset(dob.getData(), dob.getLength());
         readFields(dib);
       } catch (IOException e) {
-        throw new IllegalArgumentException("this map cannot be copied: " +
-                StringUtils.stringifyException(e));
+        throw new IllegalArgumentException("this map cannot be copied: "
+            + StringUtils.stringifyException(e));
       }
     }
   }
@@ -177,7 +179,8 @@
 
   public Set<Writable> keySet() {
     HashSet<Writable> set = new HashSet<Writable>();
-    if (isEmpty()) return set;
+    if (isEmpty())
+      return set;
     set.add(fFirst.fKey);
     KeyValueEntry entry = fFirst;
     while ((entry = entry.fNextEntry) != null) {
@@ -257,7 +260,8 @@
   public boolean equals(Object obj) {
     if (obj instanceof MapWritable) {
       MapWritable map = (MapWritable) obj;
-      if (fSize != map.fSize) return false;
+      if (fSize != map.fSize)
+        return false;
       HashSet<KeyValueEntry> set1 = new HashSet<KeyValueEntry>();
       KeyValueEntry e1 = fFirst;
       while (e1 != null) {
@@ -345,7 +349,7 @@
           clazz = Class.forName(Text.readString(in));
           addIdEntry(id, clazz);
         } catch (Exception e) {
-          if (LOG.isWarnEnabled()) { 
+          if (LOG.isWarnEnabled()) {
             LOG.warn("Unable to load internal map entry" + e.toString());
           }
           fIdCount--;
@@ -364,8 +368,8 @@
           }
         } catch (IOException e) {
           if (LOG.isWarnEnabled()) {
-            LOG.warn("Unable to load meta data entry, ignoring.. : "  +
-                     e.toString());
+            LOG.warn("Unable to load meta data entry, ignoring.. : "
+                + e.toString());
           }
           fSize--;
         }
Index: src/java/org/apache/nutch/crawl/Inlinks.java
===================================================================
--- src/java/org/apache/nutch/crawl/Inlinks.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/Inlinks.java	(working copy)
@@ -27,18 +27,26 @@
 public class Inlinks implements Writable {
   private HashSet<Inlink> inlinks = new HashSet<Inlink>(1);
 
-  public void add(Inlink inlink) { inlinks.add(inlink); }
+  public void add(Inlink inlink) {
+    inlinks.add(inlink);
+  }
 
-  public void add(Inlinks inlinks) { this.inlinks.addAll(inlinks.inlinks); }
+  public void add(Inlinks inlinks) {
+    this.inlinks.addAll(inlinks.inlinks);
+  }
 
   public Iterator<Inlink> iterator() {
     return this.inlinks.iterator();
   }
-  
-  public int size() { return inlinks.size(); }
 
-  public void clear() { inlinks.clear(); }
+  public int size() {
+    return inlinks.size();
+  }
 
+  public void clear() {
+    inlinks.clear();
+  }
+
   public void readFields(DataInput in) throws IOException {
     int length = in.readInt();
     inlinks.clear();
@@ -67,30 +75,32 @@
     return buffer.toString();
   }
 
-  /** Return the set of anchor texts.  Only a single anchor with a given text
-   * is permitted from a given domain. */
+  /**
+   * Return the set of anchor texts. Only a single anchor with a given text is
+   * permitted from a given domain.
+   */
   public String[] getAnchors() {
-    HashMap<String, Set<String>> domainToAnchors =
-      new HashMap<String, Set<String>>();
+    HashMap<String, Set<String>> domainToAnchors = new HashMap<String, Set<String>>();
     ArrayList<String> results = new ArrayList<String>();
     Iterator<Inlink> it = inlinks.iterator();
     while (it.hasNext()) {
       Inlink inlink = it.next();
       String anchor = inlink.getAnchor();
 
-      if (anchor.length() == 0)                   // skip empty anchors
+      if (anchor.length() == 0) // skip empty anchors
         continue;
-      String domain = null;                       // extract domain name
+      String domain = null; // extract domain name
       try {
         domain = new URL(inlink.getFromUrl()).getHost();
-      } catch (MalformedURLException e) {}
+      } catch (MalformedURLException e) {
+      }
       Set<String> domainAnchors = domainToAnchors.get(domain);
       if (domainAnchors == null) {
         domainAnchors = new HashSet<String>();
         domainToAnchors.put(domain, domainAnchors);
       }
-      if (domainAnchors.add(anchor)) {            // new anchor from domain
-        results.add(anchor);                      // collect it
+      if (domainAnchors.add(anchor)) { // new anchor from domain
+        results.add(anchor); // collect it
       }
     }
 
Index: src/java/org/apache/nutch/crawl/Crawl.java
===================================================================
--- src/java/org/apache/nutch/crawl/Crawl.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/Crawl.java	(working copy)
@@ -43,24 +43,26 @@
   public static final Logger LOG = LoggerFactory.getLogger(Crawl.class);
 
   private static String getDate() {
-    return new SimpleDateFormat("yyyyMMddHHmmss").format
-      (new Date(System.currentTimeMillis()));
+    return new SimpleDateFormat("yyyyMMddHHmmss").format(new Date(System
+        .currentTimeMillis()));
   }
 
-
-  /* Perform complete crawling and indexing (to Solr) given a set of root urls and the -solr
-     parameter respectively. More information and Usage parameters can be found below. */
+  /*
+   * Perform complete crawling and indexing (to Solr) given a set of root urls
+   * and the -solr parameter respectively. More information and Usage parameters
+   * can be found below.
+   */
   public static void main(String args[]) throws Exception {
     Configuration conf = NutchConfiguration.create();
     int res = ToolRunner.run(conf, new Crawl(), args);
     System.exit(res);
   }
-  
+
   @Override
   public int run(String[] args) throws Exception {
     if (args.length < 1) {
-      System.out.println
-      ("Usage: Crawl <urlDir> -solr <solrURL> [-dir d] [-threads n] [-depth i] [-topN N]");
+      System.out
+          .println("Usage: Crawl <urlDir> -solr <solrURL> [-dir d] [-threads n] [-depth i] [-topN N]");
       return -1;
     }
     Path rootUrlDir = null;
@@ -69,20 +71,20 @@
     int depth = 5;
     long topN = Long.MAX_VALUE;
     String solrUrl = null;
-    
+
     for (int i = 0; i < args.length; i++) {
       if ("-dir".equals(args[i])) {
-        dir = new Path(args[i+1]);
+        dir = new Path(args[i + 1]);
         i++;
       } else if ("-threads".equals(args[i])) {
-        threads = Integer.parseInt(args[i+1]);
+        threads = Integer.parseInt(args[i + 1]);
         i++;
       } else if ("-depth".equals(args[i])) {
-        depth = Integer.parseInt(args[i+1]);
+        depth = Integer.parseInt(args[i + 1]);
         i++;
       } else if ("-topN".equals(args[i])) {
-          topN = Integer.parseInt(args[i+1]);
-          i++;
+        topN = Integer.parseInt(args[i + 1]);
+        i++;
       } else if ("-solr".equals(args[i])) {
         solrUrl = StringUtils.lowerCase(args[i + 1]);
         i++;
@@ -90,7 +92,7 @@
         rootUrlDir = new Path(args[i]);
       }
     }
-    
+
     JobConf job = new NutchJob(getConf());
 
     if (solrUrl == null) {
@@ -103,39 +105,39 @@
       LOG.info("crawl started in: " + dir);
       LOG.info("rootUrlDir = " + rootUrlDir);
       LOG.info("threads = " + threads);
-      LOG.info("depth = " + depth);      
+      LOG.info("depth = " + depth);
       LOG.info("solrUrl=" + solrUrl);
       if (topN != Long.MAX_VALUE)
         LOG.info("topN = " + topN);
     }
-    
+
     Path crawlDb = new Path(dir + "/crawldb");
     Path linkDb = new Path(dir + "/linkdb");
     Path segments = new Path(dir + "/segments");
     Path indexes = new Path(dir + "/indexes");
     Path index = new Path(dir + "/index");
 
-    Path tmpDir = job.getLocalPath("crawl"+Path.SEPARATOR+getDate());
+    Path tmpDir = job.getLocalPath("crawl" + Path.SEPARATOR + getDate());
     Injector injector = new Injector(getConf());
     Generator generator = new Generator(getConf());
     Fetcher fetcher = new Fetcher(getConf());
     ParseSegment parseSegment = new ParseSegment(getConf());
     CrawlDb crawlDbTool = new CrawlDb(getConf());
     LinkDb linkDbTool = new LinkDb(getConf());
-      
+
     // initialize crawlDb
     injector.inject(crawlDb, rootUrlDir);
     int i;
-    for (i = 0; i < depth; i++) {             // generate new segment
-      Path[] segs = generator.generate(crawlDb, segments, -1, topN, System
-          .currentTimeMillis());
+    for (i = 0; i < depth; i++) { // generate new segment
+      Path[] segs = generator.generate(crawlDb, segments, -1, topN,
+          System.currentTimeMillis());
       if (segs == null) {
         LOG.info("Stopping at depth=" + i + " - no more URLs to fetch.");
         break;
       }
-      fetcher.fetch(segs[0], threads);  // fetch it
+      fetcher.fetch(segs[0], threads); // fetch it
       if (!Fetcher.isParsing(job)) {
-        parseSegment.parse(segs[0]);    // parse it, if needed
+        parseSegment.parse(segs[0]); // parse it, if needed
       }
       crawlDbTool.update(crawlDb, segs, true, true); // update crawldb
     }
@@ -144,21 +146,23 @@
 
       if (solrUrl != null) {
         // index, dedup & merge
-        FileStatus[] fstats = fs.listStatus(segments, HadoopFSUtil.getPassDirectoriesFilter(fs));
+        FileStatus[] fstats = fs.listStatus(segments,
+            HadoopFSUtil.getPassDirectoriesFilter(fs));
         SolrIndexer indexer = new SolrIndexer(getConf());
-        indexer.indexSolr(solrUrl, crawlDb, linkDb, 
-          Arrays.asList(HadoopFSUtil.getPaths(fstats)));
+        indexer.indexSolr(solrUrl, crawlDb, linkDb,
+            Arrays.asList(HadoopFSUtil.getPaths(fstats)));
         SolrDeleteDuplicates dedup = new SolrDeleteDuplicates();
         dedup.setConf(getConf());
         dedup.dedup(solrUrl);
       }
-      
+
     } else {
       LOG.warn("No URLs to fetch - check your seed list and URL filters.");
     }
-    if (LOG.isInfoEnabled()) { LOG.info("crawl finished: " + dir); }
+    if (LOG.isInfoEnabled()) {
+      LOG.info("crawl finished: " + dir);
+    }
     return 0;
   }
 
-
 }
Index: src/java/org/apache/nutch/crawl/LinkDbFilter.java
===================================================================
--- src/java/org/apache/nutch/crawl/LinkDbFilter.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/LinkDbFilter.java	(working copy)
@@ -31,8 +31,8 @@
 import org.apache.nutch.net.URLNormalizers;
 
 /**
- * This class provides a way to separate the URL normalization
- * and filtering steps from the rest of LinkDb manipulation code.
+ * This class provides a way to separate the URL normalization and filtering
+ * steps from the rest of LinkDb manipulation code.
  * 
  * @author Andrzej Bialecki
  */
@@ -50,13 +50,13 @@
   private URLFilters filters;
 
   private URLNormalizers normalizers;
-  
+
   private String scope;
-  
+
   public static final Logger LOG = LoggerFactory.getLogger(LinkDbFilter.class);
 
   private Text newKey = new Text();
-  
+
   public void configure(JobConf job) {
     filter = job.getBoolean(URL_FILTERING, false);
     normalize = job.getBoolean(URL_NORMALIZING, false);
@@ -69,10 +69,12 @@
     }
   }
 
-  public void close() {}
+  public void close() {
+  }
 
   public void map(Text key, Inlinks value,
-      OutputCollector<Text, Inlinks> output, Reporter reporter) throws IOException {
+      OutputCollector<Text, Inlinks> output, Reporter reporter)
+      throws IOException {
     String url = key.toString();
     Inlinks result = new Inlinks();
     if (normalize) {
@@ -91,7 +93,8 @@
         url = null;
       }
     }
-    if (url == null) return; // didn't pass the filters
+    if (url == null)
+      return; // didn't pass the filters
     Iterator<Inlink> it = value.iterator();
     String fromUrl = null;
     while (it.hasNext()) {
@@ -113,7 +116,7 @@
           fromUrl = null;
         }
       }
-      if (fromUrl != null) { 
+      if (fromUrl != null) {
         result.add(new Inlink(fromUrl, inlink.getAnchor()));
       }
     }
Index: src/java/org/apache/nutch/crawl/DefaultFetchSchedule.java
===================================================================
--- src/java/org/apache/nutch/crawl/DefaultFetchSchedule.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/DefaultFetchSchedule.java	(working copy)
@@ -20,8 +20,8 @@
 import org.apache.hadoop.io.Text;
 
 /**
- * This class implements the default re-fetch schedule. That is, no matter
- * if the page was changed or not, the <code>fetchInterval</code> remains
+ * This class implements the default re-fetch schedule. That is, no matter if
+ * the page was changed or not, the <code>fetchInterval</code> remains
  * unchanged, and the updated page fetchTime will always be set to
  * <code>fetchTime + fetchInterval * 1000</code>.
  * 
@@ -31,14 +31,14 @@
 
   @Override
   public CrawlDatum setFetchSchedule(Text url, CrawlDatum datum,
-          long prevFetchTime, long prevModifiedTime,
-          long fetchTime, long modifiedTime, int state) {
+      long prevFetchTime, long prevModifiedTime, long fetchTime,
+      long modifiedTime, int state) {
     datum = super.setFetchSchedule(url, datum, prevFetchTime, prevModifiedTime,
         fetchTime, modifiedTime, state);
-    if (datum.getFetchInterval() == 0 ) {
+    if (datum.getFetchInterval() == 0) {
       datum.setFetchInterval(defaultInterval);
     }
-    datum.setFetchTime(fetchTime + (long)datum.getFetchInterval() * 1000);
+    datum.setFetchTime(fetchTime + (long) datum.getFetchInterval() * 1000);
     datum.setModifiedTime(modifiedTime);
     return datum;
   }
Index: src/java/org/apache/nutch/crawl/NutchWritable.java
===================================================================
--- src/java/org/apache/nutch/crawl/NutchWritable.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/NutchWritable.java	(working copy)
@@ -20,36 +20,34 @@
 import org.apache.nutch.util.GenericWritableConfigurable;
 
 public class NutchWritable extends GenericWritableConfigurable {
-  
+
   private static Class<? extends Writable>[] CLASSES = null;
-  
+
   static {
-    CLASSES = new Class[] {
-      org.apache.hadoop.io.NullWritable.class, 
-      org.apache.hadoop.io.LongWritable.class,
-      org.apache.hadoop.io.BytesWritable.class,
-      org.apache.hadoop.io.FloatWritable.class,
-      org.apache.hadoop.io.IntWritable.class,
-      org.apache.hadoop.io.Text.class,
-      org.apache.hadoop.io.MD5Hash.class,
-      org.apache.nutch.crawl.CrawlDatum.class,
-      org.apache.nutch.crawl.Inlink.class,
-      org.apache.nutch.crawl.Inlinks.class,
-      org.apache.nutch.crawl.MapWritable.class,
-      org.apache.nutch.fetcher.FetcherOutput.class,
-      org.apache.nutch.metadata.Metadata.class,
-      org.apache.nutch.parse.Outlink.class,
-      org.apache.nutch.parse.ParseText.class,
-      org.apache.nutch.parse.ParseData.class,
-      org.apache.nutch.parse.ParseImpl.class,
-      org.apache.nutch.parse.ParseStatus.class,
-      org.apache.nutch.protocol.Content.class,
-      org.apache.nutch.protocol.ProtocolStatus.class,
-    };
+    CLASSES = new Class[] { org.apache.hadoop.io.NullWritable.class,
+        org.apache.hadoop.io.LongWritable.class,
+        org.apache.hadoop.io.BytesWritable.class,
+        org.apache.hadoop.io.FloatWritable.class,
+        org.apache.hadoop.io.IntWritable.class,
+        org.apache.hadoop.io.Text.class, org.apache.hadoop.io.MD5Hash.class,
+        org.apache.nutch.crawl.CrawlDatum.class,
+        org.apache.nutch.crawl.Inlink.class,
+        org.apache.nutch.crawl.Inlinks.class,
+        org.apache.nutch.crawl.MapWritable.class,
+        org.apache.nutch.fetcher.FetcherOutput.class,
+        org.apache.nutch.metadata.Metadata.class,
+        org.apache.nutch.parse.Outlink.class,
+        org.apache.nutch.parse.ParseText.class,
+        org.apache.nutch.parse.ParseData.class,
+        org.apache.nutch.parse.ParseImpl.class,
+        org.apache.nutch.parse.ParseStatus.class,
+        org.apache.nutch.protocol.Content.class,
+        org.apache.nutch.protocol.ProtocolStatus.class, };
   }
 
-  public NutchWritable() { }
-  
+  public NutchWritable() {
+  }
+
   public NutchWritable(Writable instance) {
     set(instance);
   }
Index: src/java/org/apache/nutch/crawl/Injector.java
===================================================================
--- src/java/org/apache/nutch/crawl/Injector.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/Injector.java	(working copy)
@@ -38,25 +38,31 @@
 import org.apache.nutch.util.NutchJob;
 import org.apache.nutch.util.TimingUtil;
 
-/** This class takes a flat file of URLs and adds them to the of pages to be
- * crawled.  Useful for bootstrapping the system. 
- * The URL files contain one URL per line, optionally followed by custom metadata 
- * separated by tabs with the metadata key separated from the corresponding value by '='. <br>
+/**
+ * This class takes a flat file of URLs and adds them to the of pages to be
+ * crawled. Useful for bootstrapping the system. The URL files contain one URL
+ * per line, optionally followed by custom metadata separated by tabs with the
+ * metadata key separated from the corresponding value by '='. <br>
  * Note that some metadata keys are reserved : <br>
  * - <i>nutch.score</i> : allows to set a custom score for a specific URL <br>
- * - <i>nutch.fetchInterval</i> : allows to set a custom fetch interval for a specific URL <br>
- * e.g. http://www.nutch.org/ \t nutch.score=10 \t nutch.fetchInterval=2592000 \t userType=open_source
+ * - <i>nutch.fetchInterval</i> : allows to set a custom fetch interval for a
+ * specific URL <br>
+ * e.g. http://www.nutch.org/ \t nutch.score=10 \t nutch.fetchInterval=2592000
+ * \t userType=open_source
  **/
 public class Injector extends Configured implements Tool {
   public static final Logger LOG = LoggerFactory.getLogger(Injector.class);
-  
+
   /** metadata key reserved for setting a custom score for a specific URL */
   public static String nutchScoreMDName = "nutch.score";
-  /** metadata key reserved for setting a custom fetchInterval for a specific URL */
+  /**
+   * metadata key reserved for setting a custom fetchInterval for a specific URL
+   */
   public static String nutchFetchIntervalMDName = "nutch.fetchInterval";
 
   /** Normalize and filter injected urls. */
-  public static class InjectMapper implements Mapper<WritableComparable, Text, Text, CrawlDatum> {
+  public static class InjectMapper implements
+      Mapper<WritableComparable, Text, Text, CrawlDatum> {
     private URLNormalizers urlNormalizers;
     private int interval;
     private float scoreInjected;
@@ -72,78 +78,86 @@
       filters = new URLFilters(jobConf);
       scfilters = new ScoringFilters(jobConf);
       scoreInjected = jobConf.getFloat("db.score.injected", 1.0f);
-      curTime = job.getLong("injector.current.time", System.currentTimeMillis());
+      curTime = job
+          .getLong("injector.current.time", System.currentTimeMillis());
     }
 
-    public void close() {}
+    public void close() {
+    }
 
     public void map(WritableComparable key, Text value,
-                    OutputCollector<Text, CrawlDatum> output, Reporter reporter)
-      throws IOException {
-      String url = value.toString();              // value is line of text
+        OutputCollector<Text, CrawlDatum> output, Reporter reporter)
+        throws IOException {
+      String url = value.toString(); // value is line of text
 
       if (url != null && url.trim().startsWith("#")) {
-          /* Ignore line that start with # */
-          return;
+        /* Ignore line that start with # */
+        return;
       }
 
       // if tabs : metadata that could be stored
       // must be name=value and separated by \t
       float customScore = -1f;
       int customInterval = interval;
-      Map<String,String> metadata = new TreeMap<String,String>();
-      if (url.indexOf("\t")!=-1){
-    	  String[] splits = url.split("\t");
-    	  url = splits[0];
-    	  for (int s=1;s<splits.length;s++){
-    		  // find separation between name and value
-    		  int indexEquals = splits[s].indexOf("=");
-    		  if (indexEquals==-1) {
-    			  // skip anything without a =
-    			  continue;		    
-    		  }
-    		  String metaname = splits[s].substring(0, indexEquals);
-    		  String metavalue = splits[s].substring(indexEquals+1);
-    		  if (metaname.equals(nutchScoreMDName)) {
-    			  try {
-    			  customScore = Float.parseFloat(metavalue);}
-    			  catch (NumberFormatException nfe){}
-    		  }
-    		  else if (metaname.equals(nutchFetchIntervalMDName)) {
-    			  try {
-    				  customInterval = Integer.parseInt(metavalue);}
-    			  catch (NumberFormatException nfe){}
-    		  }
-    		  else metadata.put(metaname,metavalue);
-    	  }
+      Map<String, String> metadata = new TreeMap<String, String>();
+      if (url.indexOf("\t") != -1) {
+        String[] splits = url.split("\t");
+        url = splits[0];
+        for (int s = 1; s < splits.length; s++) {
+          // find separation between name and value
+          int indexEquals = splits[s].indexOf("=");
+          if (indexEquals == -1) {
+            // skip anything without a =
+            continue;
+          }
+          String metaname = splits[s].substring(0, indexEquals);
+          String metavalue = splits[s].substring(indexEquals + 1);
+          if (metaname.equals(nutchScoreMDName)) {
+            try {
+              customScore = Float.parseFloat(metavalue);
+            } catch (NumberFormatException nfe) {
+            }
+          } else if (metaname.equals(nutchFetchIntervalMDName)) {
+            try {
+              customInterval = Integer.parseInt(metavalue);
+            } catch (NumberFormatException nfe) {
+            }
+          } else
+            metadata.put(metaname, metavalue);
+        }
       }
       try {
         url = urlNormalizers.normalize(url, URLNormalizers.SCOPE_INJECT);
-        url = filters.filter(url);             // filter the url
+        url = filters.filter(url); // filter the url
       } catch (Exception e) {
-        if (LOG.isWarnEnabled()) { LOG.warn("Skipping " +url+":"+e); }
+        if (LOG.isWarnEnabled()) {
+          LOG.warn("Skipping " + url + ":" + e);
+        }
         url = null;
       }
-      if (url != null) {                          // if it passes
-        value.set(url);                           // collect it
-        CrawlDatum datum = new CrawlDatum(CrawlDatum.STATUS_INJECTED, customInterval);
+      if (url != null) { // if it passes
+        value.set(url); // collect it
+        CrawlDatum datum = new CrawlDatum(CrawlDatum.STATUS_INJECTED,
+            customInterval);
         datum.setFetchTime(curTime);
         // now add the metadata
         Iterator<String> keysIter = metadata.keySet().iterator();
-        while (keysIter.hasNext()){
-        	String keymd = keysIter.next();
-        	String valuemd = metadata.get(keymd);
-        	datum.getMetaData().put(new Text(keymd), new Text(valuemd));
+        while (keysIter.hasNext()) {
+          String keymd = keysIter.next();
+          String valuemd = metadata.get(keymd);
+          datum.getMetaData().put(new Text(keymd), new Text(valuemd));
         }
-        if (customScore != -1) datum.setScore(customScore);
-        else datum.setScore(scoreInjected);
+        if (customScore != -1)
+          datum.setScore(customScore);
+        else
+          datum.setScore(scoreInjected);
         try {
-        	scfilters.injectedScore(value, datum);
+          scfilters.injectedScore(value, datum);
         } catch (ScoringFilterException e) {
-        	if (LOG.isWarnEnabled()) {
-        		LOG.warn("Cannot filter injected score for url " + url
-        				+ ", using default (" + e.getMessage() + ")");
-        	}
+          if (LOG.isWarnEnabled()) {
+            LOG.warn("Cannot filter injected score for url " + url
+                + ", using default (" + e.getMessage() + ")");
+          }
         }
         output.collect(value, datum);
       }
@@ -151,16 +165,20 @@
   }
 
   /** Combine multiple new entries for a url. */
-  public static class InjectReducer implements Reducer<Text, CrawlDatum, Text, CrawlDatum> {
-    public void configure(JobConf job) {}    
-    public void close() {}
+  public static class InjectReducer implements
+      Reducer<Text, CrawlDatum, Text, CrawlDatum> {
+    public void configure(JobConf job) {
+    }
 
+    public void close() {
+    }
+
     private CrawlDatum old = new CrawlDatum();
     private CrawlDatum injected = new CrawlDatum();
-    
+
     public void reduce(Text key, Iterator<CrawlDatum> values,
-                       OutputCollector<Text, CrawlDatum> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, CrawlDatum> output, Reporter reporter)
+        throws IOException {
       boolean oldSet = false;
       while (values.hasNext()) {
         CrawlDatum val = values.next();
@@ -173,19 +191,22 @@
         }
       }
       CrawlDatum res = null;
-      if (oldSet) res = old; // don't overwrite existing value
-      else res = injected;
+      if (oldSet)
+        res = old; // don't overwrite existing value
+      else
+        res = injected;
 
       output.collect(key, res);
     }
   }
 
-  public Injector() {}
-  
+  public Injector() {
+  }
+
   public Injector(Configuration conf) {
     setConf(conf);
   }
-  
+
   public void inject(Path crawlDb, Path urlDir) throws IOException {
     SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
     long start = System.currentTimeMillis();
@@ -195,10 +216,9 @@
       LOG.info("Injector: urlDir: " + urlDir);
     }
 
-    Path tempDir =
-      new Path(getConf().get("mapred.temp.dir", ".") +
-               "/inject-temp-"+
-               Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
+    Path tempDir = new Path(getConf().get("mapred.temp.dir", ".")
+        + "/inject-temp-"
+        + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
 
     // map text input file to a <url,CrawlDatum> file
     if (LOG.isInfoEnabled()) {
@@ -231,14 +251,15 @@
     fs.delete(tempDir, true);
 
     long end = System.currentTimeMillis();
-    LOG.info("Injector: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    LOG.info("Injector: finished at " + sdf.format(end) + ", elapsed: "
+        + TimingUtil.elapsedTime(start, end));
   }
 
   public static void main(String[] args) throws Exception {
     int res = ToolRunner.run(NutchConfiguration.create(), new Injector(), args);
     System.exit(res);
   }
-  
+
   public int run(String[] args) throws Exception {
     if (args.length < 2) {
       System.err.println("Usage: Injector <crawldb> <url_dir>");
Index: src/java/org/apache/nutch/crawl/CrawlDb.java
===================================================================
--- src/java/org/apache/nutch/crawl/CrawlDb.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/CrawlDb.java	(working copy)
@@ -38,8 +38,8 @@
 import org.apache.nutch.util.TimingUtil;
 
 /**
- * This class takes the output of the fetcher and updates the
- * crawldb accordingly.
+ * This class takes the output of the fetcher and updates the crawldb
+ * accordingly.
  */
 public class CrawlDb extends Configured implements Tool {
   public static final Logger LOG = LoggerFactory.getLogger(CrawlDb.class);
@@ -49,21 +49,26 @@
   public static final String CRAWLDB_PURGE_404 = "db.update.purge.404";
 
   public static final String CURRENT_NAME = "current";
-  
+
   public static final String LOCK_NAME = ".locked";
-  
-  public CrawlDb() {}
-  
+
+  public CrawlDb() {
+  }
+
   public CrawlDb(Configuration conf) {
     setConf(conf);
   }
 
-  public void update(Path crawlDb, Path[] segments, boolean normalize, boolean filter) throws IOException {
-    boolean additionsAllowed = getConf().getBoolean(CRAWLDB_ADDITIONS_ALLOWED, true);
+  public void update(Path crawlDb, Path[] segments, boolean normalize,
+      boolean filter) throws IOException {
+    boolean additionsAllowed = getConf().getBoolean(CRAWLDB_ADDITIONS_ALLOWED,
+        true);
     update(crawlDb, segments, normalize, filter, additionsAllowed, false);
   }
-  
-  public void update(Path crawlDb, Path[] segments, boolean normalize, boolean filter, boolean additionsAllowed, boolean force) throws IOException {
+
+  public void update(Path crawlDb, Path[] segments, boolean normalize,
+      boolean filter, boolean additionsAllowed, boolean force)
+      throws IOException {
     FileSystem fs = FileSystem.get(getConf());
     Path lock = new Path(crawlDb, LOCK_NAME);
     LockUtil.createLockFile(fs, lock, force);
@@ -106,25 +111,25 @@
     } catch (IOException e) {
       LockUtil.removeLockFile(fs, lock);
       Path outPath = FileOutputFormat.getOutputPath(job);
-      if (fs.exists(outPath) ) fs.delete(outPath, true);
+      if (fs.exists(outPath))
+        fs.delete(outPath, true);
       throw e;
     }
 
     CrawlDb.install(job, crawlDb);
     long end = System.currentTimeMillis();
-    LOG.info("CrawlDb update: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    LOG.info("CrawlDb update: finished at " + sdf.format(end) + ", elapsed: "
+        + TimingUtil.elapsedTime(start, end));
   }
 
   public static JobConf createJob(Configuration config, Path crawlDb)
-    throws IOException {
-    Path newCrawlDb =
-      new Path(crawlDb,
-               Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
+      throws IOException {
+    Path newCrawlDb = new Path(crawlDb, Integer.toString(new Random()
+        .nextInt(Integer.MAX_VALUE)));
 
     JobConf job = new NutchJob(config);
     job.setJobName("crawldb " + crawlDb);
 
-
     Path current = new Path(crawlDb, CURRENT_NAME);
     if (FileSystem.get(job).exists(current)) {
       FileInputFormat.addInputPath(job, current);
@@ -151,12 +156,14 @@
     Path old = new Path(crawlDb, "old");
     Path current = new Path(crawlDb, CURRENT_NAME);
     if (fs.exists(current)) {
-      if (fs.exists(old)) fs.delete(old, true);
+      if (fs.exists(old))
+        fs.delete(old, true);
       fs.rename(current, old);
     }
     fs.mkdirs(crawlDb);
     fs.rename(newCrawlDb, current);
-    if (fs.exists(old)) fs.delete(old, true);
+    if (fs.exists(old))
+      fs.delete(old, true);
     Path lock = new Path(crawlDb, LOCK_NAME);
     LockUtil.removeLockFile(fs, lock);
   }
@@ -168,14 +175,21 @@
 
   public int run(String[] args) throws Exception {
     if (args.length < 2) {
-      System.err.println("Usage: CrawlDb <crawldb> (-dir <segments> | <seg1> <seg2> ...) [-force] [-normalize] [-filter] [-noAdditions]");
+      System.err
+          .println("Usage: CrawlDb <crawldb> (-dir <segments> | <seg1> <seg2> ...) [-force] [-normalize] [-filter] [-noAdditions]");
       System.err.println("\tcrawldb\tCrawlDb to update");
-      System.err.println("\t-dir segments\tparent directory containing all segments to update from");
-      System.err.println("\tseg1 seg2 ...\tlist of segment names to update from");
-      System.err.println("\t-force\tforce update even if CrawlDb appears to be locked (CAUTION advised)");
-      System.err.println("\t-normalize\tuse URLNormalizer on urls in CrawlDb and segment (usually not needed)");
-      System.err.println("\t-filter\tuse URLFilters on urls in CrawlDb and segment");
-      System.err.println("\t-noAdditions\tonly update already existing URLs, don't add any newly discovered URLs");
+      System.err
+          .println("\t-dir segments\tparent directory containing all segments to update from");
+      System.err
+          .println("\tseg1 seg2 ...\tlist of segment names to update from");
+      System.err
+          .println("\t-force\tforce update even if CrawlDb appears to be locked (CAUTION advised)");
+      System.err
+          .println("\t-normalize\tuse URLNormalizer on urls in CrawlDb and segment (usually not needed)");
+      System.err
+          .println("\t-filter\tuse URLFilters on urls in CrawlDb and segment");
+      System.err
+          .println("\t-noAdditions\tonly update already existing URLs, don't add any newly discovered URLs");
 
       return -1;
     }
@@ -184,7 +198,8 @@
     boolean force = false;
     boolean url404Purging = false;
     final FileSystem fs = FileSystem.get(getConf());
-    boolean additionsAllowed = getConf().getBoolean(CRAWLDB_ADDITIONS_ALLOWED, true);
+    boolean additionsAllowed = getConf().getBoolean(CRAWLDB_ADDITIONS_ALLOWED,
+        true);
     HashSet<Path> dirs = new HashSet<Path>();
     for (int i = 1; i < args.length; i++) {
       if (args[i].equals("-normalize")) {
@@ -196,14 +211,16 @@
       } else if (args[i].equals("-noAdditions")) {
         additionsAllowed = false;
       } else if (args[i].equals("-dir")) {
-        FileStatus[] paths = fs.listStatus(new Path(args[++i]), HadoopFSUtil.getPassDirectoriesFilter(fs));
+        FileStatus[] paths = fs.listStatus(new Path(args[++i]),
+            HadoopFSUtil.getPassDirectoriesFilter(fs));
         dirs.addAll(Arrays.asList(HadoopFSUtil.getPaths(paths)));
       } else {
         dirs.add(new Path(args[i]));
       }
     }
     try {
-      update(new Path(args[0]), dirs.toArray(new Path[dirs.size()]), normalize, filter, additionsAllowed, force);
+      update(new Path(args[0]), dirs.toArray(new Path[dirs.size()]), normalize,
+          filter, additionsAllowed, force);
       return 0;
     } catch (Exception e) {
       LOG.error("CrawlDb update: " + StringUtils.stringifyException(e));
Index: src/java/org/apache/nutch/crawl/CrawlDbMerger.java
===================================================================
--- src/java/org/apache/nutch/crawl/CrawlDbMerger.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/CrawlDbMerger.java	(working copy)
@@ -39,36 +39,42 @@
 import org.apache.nutch.util.TimingUtil;
 
 /**
- * This tool merges several CrawlDb-s into one, optionally filtering
- * URLs through the current URLFilters, to skip prohibited
- * pages.
+ * This tool merges several CrawlDb-s into one, optionally filtering URLs
+ * through the current URLFilters, to skip prohibited pages.
  * 
- * <p>It's possible to use this tool just for filtering - in that case
- * only one CrawlDb should be specified in arguments.</p>
- * <p>If more than one CrawlDb contains information about the same URL,
- * only the most recent version is retained, as determined by the
- * value of {@link org.apache.nutch.crawl.CrawlDatum#getFetchTime()}.
- * However, all metadata information from all versions is accumulated,
- * with newer values taking precedence over older values.
+ * <p>
+ * It's possible to use this tool just for filtering - in that case only one
+ * CrawlDb should be specified in arguments.
+ * </p>
+ * <p>
+ * If more than one CrawlDb contains information about the same URL, only the
+ * most recent version is retained, as determined by the value of
+ * {@link org.apache.nutch.crawl.CrawlDatum#getFetchTime()}. However, all
+ * metadata information from all versions is accumulated, with newer values
+ * taking precedence over older values.
  * 
  * @author Andrzej Bialecki
  */
 public class CrawlDbMerger extends Configured implements Tool {
-  private static final Logger LOG = LoggerFactory.getLogger(CrawlDbMerger.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(CrawlDbMerger.class);
 
-  public static class Merger extends MapReduceBase implements Reducer<Text, CrawlDatum, Text, CrawlDatum> {
+  public static class Merger extends MapReduceBase implements
+      Reducer<Text, CrawlDatum, Text, CrawlDatum> {
     private org.apache.hadoop.io.MapWritable meta;
     private CrawlDatum res = new CrawlDatum();
     private FetchSchedule schedule;
 
-    public void close() throws IOException {}
+    public void close() throws IOException {
+    }
 
     public void configure(JobConf conf) {
       schedule = FetchScheduleFactory.getFetchSchedule(conf);
     }
 
-    public void reduce(Text key, Iterator<CrawlDatum> values, OutputCollector<Text, CrawlDatum> output, Reporter reporter)
-            throws IOException {
+    public void reduce(Text key, Iterator<CrawlDatum> values,
+        OutputCollector<Text, CrawlDatum> output, Reporter reporter)
+        throws IOException {
       long resTime = 0L;
       boolean resSet = false;
       meta = new org.apache.hadoop.io.MapWritable();
@@ -91,7 +97,7 @@
             meta.put(e.getKey(), e.getValue());
           }
           res.set(val);
-          resTime = valTime ;
+          resTime = valTime;
         } else {
           // insert older metadata before newer
           for (Entry<Writable, Writable> e : meta.entrySet()) {
@@ -104,35 +110,42 @@
       output.collect(key, res);
     }
   }
-  
+
   public CrawlDbMerger() {
-    
+
   }
-  
+
   public CrawlDbMerger(Configuration conf) {
     setConf(conf);
   }
 
-  public void merge(Path output, Path[] dbs, boolean normalize, boolean filter) throws Exception {
+  public void merge(Path output, Path[] dbs, boolean normalize, boolean filter)
+      throws Exception {
     SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
     long start = System.currentTimeMillis();
     LOG.info("CrawlDb merge: starting at " + sdf.format(start));
 
     JobConf job = createMergeJob(getConf(), output, normalize, filter);
     for (int i = 0; i < dbs.length; i++) {
-      if (LOG.isInfoEnabled()) { LOG.info("Adding " + dbs[i]); }
+      if (LOG.isInfoEnabled()) {
+        LOG.info("Adding " + dbs[i]);
+      }
       FileInputFormat.addInputPath(job, new Path(dbs[i], CrawlDb.CURRENT_NAME));
     }
     JobClient.runJob(job);
     FileSystem fs = FileSystem.get(getConf());
     fs.mkdirs(output);
-    fs.rename(FileOutputFormat.getOutputPath(job), new Path(output, CrawlDb.CURRENT_NAME));
+    fs.rename(FileOutputFormat.getOutputPath(job), new Path(output,
+        CrawlDb.CURRENT_NAME));
     long end = System.currentTimeMillis();
-    LOG.info("CrawlDb merge: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    LOG.info("CrawlDb merge: finished at " + sdf.format(end) + ", elapsed: "
+        + TimingUtil.elapsedTime(start, end));
   }
 
-  public static JobConf createMergeJob(Configuration conf, Path output, boolean normalize, boolean filter) {
-    Path newCrawlDb = new Path("crawldb-merge-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
+  public static JobConf createMergeJob(Configuration conf, Path output,
+      boolean normalize, boolean filter) {
+    Path newCrawlDb = new Path("crawldb-merge-"
+        + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
 
     JobConf job = new NutchJob(conf);
     job.setJobName("crawldb merge " + output);
@@ -156,16 +169,20 @@
    * @param args
    */
   public static void main(String[] args) throws Exception {
-    int res = ToolRunner.run(NutchConfiguration.create(), new CrawlDbMerger(), args);
+    int res = ToolRunner.run(NutchConfiguration.create(), new CrawlDbMerger(),
+        args);
     System.exit(res);
   }
-  
+
   public int run(String[] args) throws Exception {
     if (args.length < 2) {
-      System.err.println("Usage: CrawlDbMerger <output_crawldb> <crawldb1> [<crawldb2> <crawldb3> ...] [-normalize] [-filter]");
+      System.err
+          .println("Usage: CrawlDbMerger <output_crawldb> <crawldb1> [<crawldb2> <crawldb3> ...] [-normalize] [-filter]");
       System.err.println("\toutput_crawldb\toutput CrawlDb");
-      System.err.println("\tcrawldb1 ...\tinput CrawlDb-s (single input CrawlDb is ok)");
-      System.err.println("\t-normalize\tuse URLNormalizer on urls in the crawldb(s) (usually not needed)");
+      System.err
+          .println("\tcrawldb1 ...\tinput CrawlDb-s (single input CrawlDb is ok)");
+      System.err
+          .println("\t-normalize\tuse URLNormalizer on urls in the crawldb(s) (usually not needed)");
       System.err.println("\t-filter\tuse URLFilters on urls in the crawldb(s)");
       return -1;
     }
@@ -183,8 +200,8 @@
         continue;
       }
       final Path dbPath = new Path(args[i]);
-      if(fs.exists(dbPath))
-       dbs.add(dbPath);
+      if (fs.exists(dbPath))
+        dbs.add(dbPath);
     }
     try {
       merge(output, dbs.toArray(new Path[dbs.size()]), normalize, filter);
Index: src/java/org/apache/nutch/crawl/URLPartitioner.java
===================================================================
--- src/java/org/apache/nutch/crawl/URLPartitioner.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/URLPartitioner.java	(working copy)
@@ -33,8 +33,9 @@
  * Partition urls by host, domain name or IP depending on the value of the
  * parameter 'partition.url.mode' which can be 'byHost', 'byDomain' or 'byIP'
  */
-public class URLPartitioner implements Partitioner<Text,Writable> {
-  private static final Logger LOG = LoggerFactory.getLogger(URLPartitioner.class);
+public class URLPartitioner implements Partitioner<Text, Writable> {
+  private static final Logger LOG = LoggerFactory
+      .getLogger(URLPartitioner.class);
 
   public static final String PARTITION_MODE_KEY = "partition.url.mode";
 
@@ -58,7 +59,8 @@
     normalizers = new URLNormalizers(job, URLNormalizers.SCOPE_PARTITION);
   }
 
-  public void close() {}
+  public void close() {
+  }
 
   /** Hash by domain name. */
   public int getPartition(Text key, Writable value, int numReduceTasks) {
@@ -66,15 +68,16 @@
     URL url = null;
     int hashCode = urlString.hashCode();
     try {
-      urlString = normalizers.normalize(urlString, URLNormalizers.SCOPE_PARTITION);
+      urlString = normalizers.normalize(urlString,
+          URLNormalizers.SCOPE_PARTITION);
       url = new URL(urlString);
       hashCode = url.getHost().hashCode();
     } catch (MalformedURLException e) {
       LOG.warn("Malformed URL: '" + urlString + "'");
     }
 
-    if (mode.equals(PARTITION_MODE_DOMAIN) && url != null) hashCode = URLUtil
-        .getDomainName(url).hashCode();
+    if (mode.equals(PARTITION_MODE_DOMAIN) && url != null)
+      hashCode = URLUtil.getDomainName(url).hashCode();
     else if (mode.equals(PARTITION_MODE_IP)) {
       try {
         InetAddress address = InetAddress.getByName(url.getHost());
Index: src/java/org/apache/nutch/crawl/AdaptiveFetchSchedule.java
===================================================================
--- src/java/org/apache/nutch/crawl/AdaptiveFetchSchedule.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/AdaptiveFetchSchedule.java	(working copy)
@@ -32,11 +32,12 @@
  * If SYNC_DELTA property is true, then:
  * <ul>
  * <li>calculate a <code>delta = fetchTime - modifiedTime</code></li>
- * <li>try to synchronize with the time of change, by shifting the next fetchTime
- * by a fraction of the difference between the last modification time and the last
- * fetch time. I.e. the next fetch time will be set to
+ * <li>try to synchronize with the time of change, by shifting the next
+ * fetchTime by a fraction of the difference between the last modification time
+ * and the last fetch time. I.e. the next fetch time will be set to
  * <code>fetchTime + fetchInterval - delta * SYNC_DELTA_RATE</code></li>
- * <li>if the adjusted fetch interval is bigger than the delta, then <code>fetchInterval = delta</code>.</li>
+ * <li>if the adjusted fetch interval is bigger than the delta, then
+ * <code>fetchInterval = delta</code>.</li>
  * </ul>
  * </li>
  * <li>the minimum value of fetchInterval may not be smaller than MIN_INTERVAL
@@ -44,10 +45,13 @@
  * <li>the maximum value of fetchInterval may not be bigger than MAX_INTERVAL
  * (default is 365 days).</li>
  * </ul>
- * <p>NOTE: values of DEC_FACTOR and INC_FACTOR higher than 0.4f may destabilize the algorithm,
- * so that the fetch interval either increases or decreases infinitely, with little
- * relevance to the page changes. Please use {@link #main(String[])} method to
- * test the values before applying them in a production system.</p>
+ * <p>
+ * NOTE: values of DEC_FACTOR and INC_FACTOR higher than 0.4f may destabilize
+ * the algorithm, so that the fetch interval either increases or decreases
+ * infinitely, with little relevance to the page changes. Please use
+ * {@link #main(String[])} method to test the values before applying them in a
+ * production system.
+ * </p>
  * 
  * @author Andrzej Bialecki
  */
@@ -60,45 +64,50 @@
   private int MAX_INTERVAL;
 
   private int MIN_INTERVAL;
-  
+
   private boolean SYNC_DELTA;
 
   private double SYNC_DELTA_RATE;
-  
+
   public void setConf(Configuration conf) {
     super.setConf(conf);
-    if (conf == null) return;
+    if (conf == null)
+      return;
     INC_RATE = conf.getFloat("db.fetch.schedule.adaptive.inc_rate", 0.2f);
     DEC_RATE = conf.getFloat("db.fetch.schedule.adaptive.dec_rate", 0.2f);
     MIN_INTERVAL = conf.getInt("db.fetch.schedule.adaptive.min_interval", 60);
-    MAX_INTERVAL = conf.getInt("db.fetch.schedule.adaptive.max_interval", SECONDS_PER_DAY * 365 ); // 1 year
+    MAX_INTERVAL = conf.getInt("db.fetch.schedule.adaptive.max_interval",
+        SECONDS_PER_DAY * 365); // 1 year
     SYNC_DELTA = conf.getBoolean("db.fetch.schedule.adaptive.sync_delta", true);
-    SYNC_DELTA_RATE = conf.getFloat("db.fetch.schedule.adaptive.sync_delta_rate", 0.2f);
+    SYNC_DELTA_RATE = conf.getFloat(
+        "db.fetch.schedule.adaptive.sync_delta_rate", 0.2f);
   }
 
   @Override
   public CrawlDatum setFetchSchedule(Text url, CrawlDatum datum,
-          long prevFetchTime, long prevModifiedTime,
-          long fetchTime, long modifiedTime, int state) {
+      long prevFetchTime, long prevModifiedTime, long fetchTime,
+      long modifiedTime, int state) {
     super.setFetchSchedule(url, datum, prevFetchTime, prevModifiedTime,
         fetchTime, modifiedTime, state);
     long refTime = fetchTime;
-    if (modifiedTime <= 0) modifiedTime = fetchTime;
+    if (modifiedTime <= 0)
+      modifiedTime = fetchTime;
     float interval = datum.getFetchInterval();
     switch (state) {
-      case FetchSchedule.STATUS_MODIFIED:
-        interval *= (1.0f - DEC_RATE);
-        break;
-      case FetchSchedule.STATUS_NOTMODIFIED:
-        interval *= (1.0f + INC_RATE);
-        break;
-      case FetchSchedule.STATUS_UNKNOWN:
-        break;
+    case FetchSchedule.STATUS_MODIFIED:
+      interval *= (1.0f - DEC_RATE);
+      break;
+    case FetchSchedule.STATUS_NOTMODIFIED:
+      interval *= (1.0f + INC_RATE);
+      break;
+    case FetchSchedule.STATUS_UNKNOWN:
+      break;
     }
     if (SYNC_DELTA) {
       // try to synchronize with the time of change
       long delta = (fetchTime - modifiedTime) / 1000L;
-      if (delta > interval) interval = delta;
+      if (delta > interval)
+        interval = delta;
       refTime = fetchTime - Math.round(delta * SYNC_DELTA_RATE * 1000);
     }
     if (interval < MIN_INTERVAL) {
@@ -134,30 +143,39 @@
     // let's move the timeline a couple of deltas
     for (int i = 0; i < 10000; i++) {
       if (lastModified + update < curTime) {
-        //System.out.println("i=" + i + ", lastModified=" + lastModified + ", update=" + update + ", curTime=" + curTime);
+        // System.out.println("i=" + i + ", lastModified=" + lastModified +
+        // ", update=" + update + ", curTime=" + curTime);
         changed = true;
         changeCnt++;
         lastModified = curTime;
       }
-      System.out.println(i + ". " + changed + "\twill fetch at " + (p.getFetchTime() / delta) + "\tinterval "
-              + (p.getFetchInterval() / SECONDS_PER_DAY ) + " days" + "\t missed " + miss);
+      System.out.println(i + ". " + changed + "\twill fetch at "
+          + (p.getFetchTime() / delta) + "\tinterval "
+          + (p.getFetchInterval() / SECONDS_PER_DAY) + " days" + "\t missed "
+          + miss);
       if (p.getFetchTime() <= curTime) {
         fetchCnt++;
-        fs.setFetchSchedule(new Text("http://www.example.com"), p,
-                p.getFetchTime(), p.getModifiedTime(), curTime, lastModified,
-                changed ? FetchSchedule.STATUS_MODIFIED : FetchSchedule.STATUS_NOTMODIFIED);
-        System.out.println("\tfetched & adjusted: " + "\twill fetch at " + (p.getFetchTime() / delta) + "\tinterval "
-                + (p.getFetchInterval() / SECONDS_PER_DAY ) + " days");
-        if (!changed) miss++;
-        if (miss > maxMiss) maxMiss = miss;
+        fs.setFetchSchedule(new Text("http://www.example.com"), p, p
+            .getFetchTime(), p.getModifiedTime(), curTime, lastModified,
+            changed ? FetchSchedule.STATUS_MODIFIED
+                : FetchSchedule.STATUS_NOTMODIFIED);
+        System.out.println("\tfetched & adjusted: " + "\twill fetch at "
+            + (p.getFetchTime() / delta) + "\tinterval "
+            + (p.getFetchInterval() / SECONDS_PER_DAY) + " days");
+        if (!changed)
+          miss++;
+        if (miss > maxMiss)
+          maxMiss = miss;
         changed = false;
         totalMiss += miss;
         miss = 0;
       }
-      if (changed) miss++;
+      if (changed)
+        miss++;
       curTime += delta;
     }
     System.out.println("Total missed: " + totalMiss + ", max miss: " + maxMiss);
-    System.out.println("Page changed " + changeCnt + " times, fetched " + fetchCnt + " times.");
+    System.out.println("Page changed " + changeCnt + " times, fetched "
+        + fetchCnt + " times.");
   }
 }
Index: src/java/org/apache/nutch/crawl/Inlink.java
===================================================================
--- src/java/org/apache/nutch/crawl/Inlink.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/Inlink.java	(working copy)
@@ -26,7 +26,8 @@
   private String fromUrl;
   private String anchor;
 
-  public Inlink() {}
+  public Inlink() {
+  }
 
   public Inlink(String fromUrl, String anchor) {
     this.fromUrl = fromUrl;
@@ -40,8 +41,8 @@
 
   /** Skips over one Inlink in the input. */
   public static void skip(DataInput in) throws IOException {
-    Text.skip(in);                                // skip fromUrl
-    Text.skip(in);                                // skip anchor
+    Text.skip(in); // skip fromUrl
+    Text.skip(in); // skip anchor
   }
 
   public void write(DataOutput out) throws IOException {
@@ -55,16 +56,20 @@
     return inlink;
   }
 
-  public String getFromUrl() { return fromUrl; }
-  public String getAnchor() { return anchor; }
+  public String getFromUrl() {
+    return fromUrl;
+  }
 
+  public String getAnchor() {
+    return anchor;
+  }
+
   public boolean equals(Object o) {
     if (!(o instanceof Inlink))
       return false;
-    Inlink other = (Inlink)o;
-    return
-      this.fromUrl.equals(other.fromUrl) &&
-      this.anchor.equals(other.anchor);
+    Inlink other = (Inlink) o;
+    return this.fromUrl.equals(other.fromUrl)
+        && this.anchor.equals(other.anchor);
   }
 
   public int hashCode() {
Index: src/java/org/apache/nutch/crawl/CrawlDbFilter.java
===================================================================
--- src/java/org/apache/nutch/crawl/CrawlDbFilter.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/CrawlDbFilter.java	(working copy)
@@ -30,12 +30,13 @@
 import org.apache.nutch.net.URLNormalizers;
 
 /**
- * This class provides a way to separate the URL normalization
- * and filtering steps from the rest of CrawlDb manipulation code.
+ * This class provides a way to separate the URL normalization and filtering
+ * steps from the rest of CrawlDb manipulation code.
  * 
  * @author Andrzej Bialecki
  */
-public class CrawlDbFilter implements Mapper<Text, CrawlDatum, Text, CrawlDatum> {
+public class CrawlDbFilter implements
+    Mapper<Text, CrawlDatum, Text, CrawlDatum> {
   public static final String URL_FILTERING = "crawldb.url.filters";
 
   public static final String URL_NORMALIZING = "crawldb.url.normalizers";
@@ -51,7 +52,7 @@
   private URLFilters filters;
 
   private URLNormalizers normalizers;
-  
+
   private String scope;
 
   public static final Logger LOG = LoggerFactory.getLogger(CrawlDbFilter.class);
@@ -70,17 +71,19 @@
     }
   }
 
-  public void close() {}
-  
+  public void close() {
+  }
+
   private Text newKey = new Text();
 
   public void map(Text key, CrawlDatum value,
-      OutputCollector<Text, CrawlDatum> output,
-      Reporter reporter) throws IOException {
+      OutputCollector<Text, CrawlDatum> output, Reporter reporter)
+      throws IOException {
 
     String url = key.toString();
 
-    // https://issues.apache.org/jira/browse/NUTCH-1101 check status first, cheaper than normalizing or filtering
+    // https://issues.apache.org/jira/browse/NUTCH-1101 check status first,
+    // cheaper than normalizing or filtering
     if (url404Purging && CrawlDatum.STATUS_DB_GONE == value.getStatus()) {
       url = null;
     }
Index: src/java/org/apache/nutch/crawl/Generator.java
===================================================================
--- src/java/org/apache/nutch/crawl/Generator.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/Generator.java	(working copy)
@@ -51,9 +51,9 @@
  * Generates a subset of a crawl db to fetch. This version allows to generate
  * fetchlists for several segments in one go. Unlike in the initial version
  * (OldGenerator), the IP resolution is done ONLY on the entries which have been
- * selected for fetching. The URLs are partitioned by IP, domain or host within a 
- * segment. We can chose separately how to count the URLS i.e. by domain or host
- * to limit the entries.
+ * selected for fetching. The URLs are partitioned by IP, domain or host within
+ * a segment. We can chose separately how to count the URLS i.e. by domain or
+ * host to limit the entries.
  **/
 public class Generator extends Configured implements Tool {
 
@@ -71,8 +71,8 @@
   public static final String GENERATOR_CUR_TIME = "generate.curTime";
   public static final String GENERATOR_DELAY = "crawl.gen.delay";
   public static final String GENERATOR_MAX_NUM_SEGMENTS = "generate.max.num.segments";
-  
-  // deprecated parameters 
+
+  // deprecated parameters
   public static final String GENERATE_MAX_PER_HOST_BY_IP = "generate.max.per.host.by.ip";
   public static final String GENERATE_MAX_PER_HOST = "generate.max.per.host";
 
@@ -100,25 +100,25 @@
     }
 
     public String toString() {
-      return "url=" + url.toString() + ", datum=" + datum.toString() + ", segnum="
-          + segnum.toString();
+      return "url=" + url.toString() + ", datum=" + datum.toString()
+          + ", segnum=" + segnum.toString();
     }
   }
 
   /** Selects entries due for fetch. */
   public static class Selector implements
-      Mapper<Text,CrawlDatum,FloatWritable,SelectorEntry>,
-      Partitioner<FloatWritable,Writable>,
-      Reducer<FloatWritable,SelectorEntry,FloatWritable,SelectorEntry> {
+      Mapper<Text, CrawlDatum, FloatWritable, SelectorEntry>,
+      Partitioner<FloatWritable, Writable>,
+      Reducer<FloatWritable, SelectorEntry, FloatWritable, SelectorEntry> {
     private LongWritable genTime = new LongWritable(System.currentTimeMillis());
     private long curTime;
     private long limit;
     private long count;
-    private HashMap<String,int[]> hostCounts = new HashMap<String,int[]>();
+    private HashMap<String, int[]> hostCounts = new HashMap<String, int[]>();
     private int segCounts[];
     private int maxCount;
     private boolean byDomain = false;
-    private Partitioner<Text,Writable> partitioner = new URLPartitioner();
+    private Partitioner<Text, Writable> partitioner = new URLPartitioner();
     private URLFilters filters;
     private URLNormalizers normalizers;
     private ScoringFilters scfilters;
@@ -134,46 +134,53 @@
 
     public void configure(JobConf job) {
       curTime = job.getLong(GENERATOR_CUR_TIME, System.currentTimeMillis());
-      limit = job.getLong(GENERATOR_TOP_N, Long.MAX_VALUE) / job.getNumReduceTasks();
+      limit = job.getLong(GENERATOR_TOP_N, Long.MAX_VALUE)
+          / job.getNumReduceTasks();
       maxCount = job.getInt(GENERATOR_MAX_COUNT, -1);
       // back compatibility with old param
       int oldMaxPerHost = job.getInt(GENERATE_MAX_PER_HOST, -1);
-      if (maxCount==-1 && oldMaxPerHost!=-1){
+      if (maxCount == -1 && oldMaxPerHost != -1) {
         maxCount = oldMaxPerHost;
         byDomain = false;
       }
-      if (GENERATOR_COUNT_VALUE_DOMAIN.equals(job.get(GENERATOR_COUNT_MODE))) byDomain = true;
+      if (GENERATOR_COUNT_VALUE_DOMAIN.equals(job.get(GENERATOR_COUNT_MODE)))
+        byDomain = true;
       filters = new URLFilters(job);
       normalise = job.getBoolean(GENERATOR_NORMALISE, true);
-      if (normalise) normalizers = new URLNormalizers(job,
-          URLNormalizers.SCOPE_GENERATE_HOST_COUNT);
+      if (normalise)
+        normalizers = new URLNormalizers(job,
+            URLNormalizers.SCOPE_GENERATE_HOST_COUNT);
       scfilters = new ScoringFilters(job);
       partitioner.configure(job);
       filter = job.getBoolean(GENERATOR_FILTER, true);
       genDelay = job.getLong(GENERATOR_DELAY, 7L) * 3600L * 24L * 1000L;
       long time = job.getLong(Nutch.GENERATE_TIME_KEY, 0L);
-      if (time > 0) genTime.set(time);
+      if (time > 0)
+        genTime.set(time);
       schedule = FetchScheduleFactory.getFetchSchedule(job);
       scoreThreshold = job.getFloat(GENERATOR_MIN_SCORE, Float.NaN);
       maxNumSegments = job.getInt(GENERATOR_MAX_NUM_SEGMENTS, 1);
       segCounts = new int[maxNumSegments];
     }
 
-    public void close() {}
+    public void close() {
+    }
 
     /** Select & invert subset due for fetch. */
     public void map(Text key, CrawlDatum value,
-        OutputCollector<FloatWritable,SelectorEntry> output, Reporter reporter)
+        OutputCollector<FloatWritable, SelectorEntry> output, Reporter reporter)
         throws IOException {
       Text url = key;
       if (filter) {
         // If filtering is on don't generate URLs that don't pass
         // URLFilters
         try {
-          if (filters.filter(url.toString()) == null) return;
+          if (filters.filter(url.toString()) == null)
+            return;
         } catch (URLFilterException e) {
           if (LOG.isWarnEnabled()) {
-            LOG.warn("Couldn't filter url: " + url + " (" + e.getMessage() + ")");
+            LOG.warn("Couldn't filter url: " + url + " (" + e.getMessage()
+                + ")");
           }
         }
       }
@@ -190,8 +197,8 @@
           Nutch.WRITABLE_GENERATE_TIME_KEY);
       if (oldGenTime != null) { // awaiting fetch & update
         if (oldGenTime.get() + genDelay > curTime) // still wait for
-        // update
-        return;
+          // update
+          return;
       }
       float sort = 1.0f;
       try {
@@ -203,7 +210,8 @@
       }
 
       // consider only entries with a score superior to the threshold
-      if (scoreThreshold != Float.NaN && sort < scoreThreshold) return;
+      if (scoreThreshold != Float.NaN && sort < scoreThreshold)
+        return;
 
       // sort by decreasing score, using DecreasingFloatComparator
       sortValue.set(sort);
@@ -215,13 +223,15 @@
     }
 
     /** Partition by host / domain or IP. */
-    public int getPartition(FloatWritable key, Writable value, int numReduceTasks) {
-      return partitioner.getPartition(((SelectorEntry) value).url, key, numReduceTasks);
+    public int getPartition(FloatWritable key, Writable value,
+        int numReduceTasks) {
+      return partitioner.getPartition(((SelectorEntry) value).url, key,
+          numReduceTasks);
     }
 
     /** Collect until limit is reached. */
     public void reduce(FloatWritable key, Iterator<SelectorEntry> values,
-        OutputCollector<FloatWritable,SelectorEntry> output, Reporter reporter)
+        OutputCollector<FloatWritable, SelectorEntry> output, Reporter reporter)
         throws IOException {
 
       while (values.hasNext()) {
@@ -231,7 +241,8 @@
           if (currentsegmentnum < maxNumSegments) {
             count = 0;
             currentsegmentnum++;
-          } else break;
+          } else
+            break;
         }
 
         SelectorEntry entry = values.next();
@@ -264,7 +275,7 @@
         if (maxCount > 0) {
           int[] hostCount = hostCounts.get(hostordomain);
           if (hostCount == null) {
-            hostCount = new int[] {1, 0};
+            hostCount = new int[] { 1, 0 };
             hostCounts.put(hostordomain, hostCount);
           }
 
@@ -272,7 +283,8 @@
           hostCount[1]++;
 
           // check if topN reached, select next segment if it is
-          while (segCounts[hostCount[0]-1] >= limit && hostCount[0] < maxNumSegments) {
+          while (segCounts[hostCount[0] - 1] >= limit
+              && hostCount[0] < maxNumSegments) {
             hostCount[0]++;
             hostCount[1] = 0;
           }
@@ -285,18 +297,23 @@
               hostCount[1] = 0;
             } else {
               if (hostCount[1] == maxCount + 1 && LOG.isInfoEnabled()) {
-                LOG.info("Host or domain " + hostordomain + " has more than " + maxCount
-                    + " URLs for all " + maxNumSegments + " segments. Additional URLs won't be included in the fetchlist.");
+                LOG.info("Host or domain "
+                    + hostordomain
+                    + " has more than "
+                    + maxCount
+                    + " URLs for all "
+                    + maxNumSegments
+                    + " segments. Additional URLs won't be included in the fetchlist.");
               }
               // skip this entry
               continue;
             }
           }
           entry.segnum = new IntWritable(hostCount[0]);
-          segCounts[hostCount[0]-1]++;
+          segCounts[hostCount[0] - 1]++;
         } else {
           entry.segnum = new IntWritable(currentsegmentnum);
-          segCounts[currentsegmentnum-1]++;
+          segCounts[currentsegmentnum - 1]++;
         }
 
         output.collect(key, entry);
@@ -310,16 +327,17 @@
 
   // Allows the reducers to generate one subfile per
   public static class GeneratorOutputFormat extends
-      MultipleSequenceFileOutputFormat<FloatWritable,SelectorEntry> {
+      MultipleSequenceFileOutputFormat<FloatWritable, SelectorEntry> {
     // generate a filename based on the segnum stored for this entry
-    protected String generateFileNameForKeyValue(FloatWritable key, SelectorEntry value,
-        String name) {
+    protected String generateFileNameForKeyValue(FloatWritable key,
+        SelectorEntry value, String name) {
       return "fetchlist-" + value.segnum.toString() + "/" + name;
     }
 
   }
 
-  public static class DecreasingFloatComparator extends FloatWritable.Comparator {
+  public static class DecreasingFloatComparator extends
+      FloatWritable.Comparator {
 
     /** Compares two FloatWritables decreasing. */
     public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
@@ -328,20 +346,22 @@
   }
 
   public static class SelectorInverseMapper extends MapReduceBase implements
-      Mapper<FloatWritable,SelectorEntry,Text,SelectorEntry> {
+      Mapper<FloatWritable, SelectorEntry, Text, SelectorEntry> {
 
     public void map(FloatWritable key, SelectorEntry value,
-        OutputCollector<Text,SelectorEntry> output, Reporter reporter) throws IOException {
+        OutputCollector<Text, SelectorEntry> output, Reporter reporter)
+        throws IOException {
       SelectorEntry entry = (SelectorEntry) value;
       output.collect(entry.url, entry);
     }
   }
 
   public static class PartitionReducer extends MapReduceBase implements
-      Reducer<Text,SelectorEntry,Text,CrawlDatum> {
+      Reducer<Text, SelectorEntry, Text, CrawlDatum> {
 
     public void reduce(Text key, Iterator<SelectorEntry> values,
-        OutputCollector<Text,CrawlDatum> output, Reporter reporter) throws IOException {
+        OutputCollector<Text, CrawlDatum> output, Reporter reporter)
+        throws IOException {
       // if using HashComparator, we get only one input key in case of
       // hash collision
       // so use only URLs from values
@@ -388,15 +408,17 @@
    * Update the CrawlDB so that the next generate won't include the same URLs.
    */
   public static class CrawlDbUpdater extends MapReduceBase implements
-      Mapper<Text,CrawlDatum,Text,CrawlDatum>, Reducer<Text,CrawlDatum,Text,CrawlDatum> {
+      Mapper<Text, CrawlDatum, Text, CrawlDatum>,
+      Reducer<Text, CrawlDatum, Text, CrawlDatum> {
     long generateTime;
 
     public void configure(JobConf job) {
       generateTime = job.getLong(Nutch.GENERATE_TIME_KEY, 0L);
     }
 
-    public void map(Text key, CrawlDatum value, OutputCollector<Text,CrawlDatum> output,
-        Reporter reporter) throws IOException {
+    public void map(Text key, CrawlDatum value,
+        OutputCollector<Text, CrawlDatum> output, Reporter reporter)
+        throws IOException {
       output.collect(key, value);
     }
 
@@ -404,7 +426,8 @@
     private LongWritable genTime = new LongWritable(0L);
 
     public void reduce(Text key, Iterator<CrawlDatum> values,
-        OutputCollector<Text,CrawlDatum> output, Reporter reporter) throws IOException {
+        OutputCollector<Text, CrawlDatum> output, Reporter reporter)
+        throws IOException {
       genTime.set(0L);
       while (values.hasNext()) {
         CrawlDatum val = values.next();
@@ -428,19 +451,21 @@
     }
   }
 
-  public Generator() {}
+  public Generator() {
+  }
 
   public Generator(Configuration conf) {
     setConf(conf);
   }
 
-  public Path[] generate(Path dbDir, Path segments, int numLists, long topN, long curTime)
-      throws IOException {
+  public Path[] generate(Path dbDir, Path segments, int numLists, long topN,
+      long curTime) throws IOException {
 
     JobConf job = new NutchJob(getConf());
     boolean filter = job.getBoolean(GENERATOR_FILTER, true);
     boolean normalise = job.getBoolean(GENERATOR_NORMALISE, true);
-    return generate(dbDir, segments, numLists, topN, curTime, filter, normalise, false, 1);
+    return generate(dbDir, segments, numLists, topN, curTime, filter,
+        normalise, false, 1);
   }
 
   /**
@@ -449,7 +474,8 @@
    **/
   public Path[] generate(Path dbDir, Path segments, int numLists, long topN,
       long curTime, boolean filter, boolean force) throws IOException {
-    return generate(dbDir, segments, numLists, topN, curTime, filter, true, force, 1);
+    return generate(dbDir, segments, numLists, topN, curTime, filter, true,
+        force, 1);
   }
 
   /**
@@ -475,11 +501,11 @@
    *           When an I/O error occurs
    */
   public Path[] generate(Path dbDir, Path segments, int numLists, long topN,
-      long curTime, boolean filter, boolean norm, boolean force, int maxNumSegments)
-      throws IOException {
+      long curTime, boolean filter, boolean norm, boolean force,
+      int maxNumSegments) throws IOException {
 
-    Path tempDir = new Path(getConf().get("mapred.temp.dir", ".") + "/generate-temp-"
-        + System.currentTimeMillis());
+    Path tempDir = new Path(getConf().get("mapred.temp.dir", ".")
+        + "/generate-temp-" + System.currentTimeMillis());
 
     Path lock = new Path(dbDir, CrawlDb.LOCK_NAME);
     FileSystem fs = FileSystem.get(getConf());
@@ -494,8 +520,8 @@
     if (topN != Long.MAX_VALUE) {
       LOG.info("Generator: topN: " + topN);
     }
-    
-    if ("true".equals(getConf().get(GENERATE_MAX_PER_HOST_BY_IP))){
+
+    if ("true".equals(getConf().get(GENERATE_MAX_PER_HOST_BY_IP))) {
       LOG.info("Generator: GENERATE_MAX_PER_HOST_BY_IP will be ignored, use partition.url.mode instead");
     }
 
@@ -548,7 +574,8 @@
     try {
       for (FileStatus stat : status) {
         Path subfetchlist = stat.getPath();
-        if (!subfetchlist.getName().startsWith("fetchlist-")) continue;
+        if (!subfetchlist.getName().startsWith("fetchlist-"))
+          continue;
         // start a new partition job for this segment
         Path newSeg = partitionSegment(fs, segments, subfetchlist, numLists);
         generatedSegments.add(newSeg);
@@ -568,8 +595,8 @@
 
     if (getConf().getBoolean(GENERATE_UPDATE_CRAWLDB, false)) {
       // update the db from tempDir
-      Path tempDir2 = new Path(getConf().get("mapred.temp.dir", ".") + "/generate-temp-"
-          + System.currentTimeMillis());
+      Path tempDir2 = new Path(getConf().get("mapred.temp.dir", ".")
+          + "/generate-temp-" + System.currentTimeMillis());
 
       job = new NutchJob(getConf());
       job.setJobName("generate: updatedb " + dbDir);
@@ -602,7 +629,8 @@
     fs.delete(tempDir, true);
 
     long end = System.currentTimeMillis();
-    LOG.info("Generator: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    LOG.info("Generator: finished at " + sdf.format(end) + ", elapsed: "
+        + TimingUtil.elapsedTime(start, end));
 
     Path[] patharray = new Path[generatedSegments.size()];
     return generatedSegments.toArray(patharray);
@@ -648,7 +676,8 @@
   public static synchronized String generateSegmentName() {
     try {
       Thread.sleep(1000);
-    } catch (Throwable t) {}
+    } catch (Throwable t) {
+    }
     ;
     return sdf.format(new Date(System.currentTimeMillis()));
   }
@@ -657,7 +686,8 @@
    * Generate a fetchlist from the crawldb.
    */
   public static void main(String args[]) throws Exception {
-    int res = ToolRunner.run(NutchConfiguration.create(), new Generator(), args);
+    int res = ToolRunner
+        .run(NutchConfiguration.create(), new Generator(), args);
     System.exit(res);
   }
 
@@ -701,9 +731,10 @@
     }
 
     try {
-      Path[] segs = generate(dbDir, segmentsDir, numFetchers, topN, curTime, filter,
-          norm, force, maxNumSegments);
-      if (segs == null) return -1;
+      Path[] segs = generate(dbDir, segmentsDir, numFetchers, topN, curTime,
+          filter, norm, force, maxNumSegments);
+      if (segs == null)
+        return -1;
     } catch (Exception e) {
       LOG.error("Generator: " + StringUtils.stringifyException(e));
       return -1;
Index: src/java/org/apache/nutch/crawl/LinkDbReader.java
===================================================================
--- src/java/org/apache/nutch/crawl/LinkDbReader.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/LinkDbReader.java	(working copy)
@@ -49,14 +49,14 @@
   private MapFile.Reader[] readers;
 
   public LinkDbReader() {
-    
+
   }
-  
+
   public LinkDbReader(Configuration conf, Path directory) throws Exception {
     setConf(conf);
     init(directory);
   }
-  
+
   public void init(Path directory) throws Exception {
     this.fs = FileSystem.get(getConf());
     this.directory = directory;
@@ -72,16 +72,16 @@
   public Inlinks getInlinks(Text url) throws IOException {
 
     if (readers == null) {
-      synchronized(this) {
-        readers = MapFileOutputFormat.getReaders
-          (fs, new Path(directory, LinkDb.CURRENT_NAME), getConf());
+      synchronized (this) {
+        readers = MapFileOutputFormat.getReaders(fs, new Path(directory,
+            LinkDb.CURRENT_NAME), getConf());
       }
     }
-    
-    return (Inlinks)MapFileOutputFormat.getEntry
-      (readers, PARTITIONER, url, new Inlinks());
+
+    return (Inlinks) MapFileOutputFormat.getEntry(readers, PARTITIONER, url,
+        new Inlinks());
   }
-  
+
   public void close() throws IOException {
     if (readers != null) {
       for (int i = 0; i < readers.length; i++) {
@@ -89,7 +89,7 @@
       }
     }
   }
-  
+
   public void processDumpJob(String linkdb, String output) throws IOException {
     SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
     long start = System.currentTimeMillis();
@@ -113,19 +113,24 @@
     JobClient.runJob(job);
 
     long end = System.currentTimeMillis();
-    LOG.info("LinkDb dump: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    LOG.info("LinkDb dump: finished at " + sdf.format(end) + ", elapsed: "
+        + TimingUtil.elapsedTime(start, end));
   }
-  
+
   public static void main(String[] args) throws Exception {
-    int res = ToolRunner.run(NutchConfiguration.create(), new LinkDbReader(), args);
+    int res = ToolRunner.run(NutchConfiguration.create(), new LinkDbReader(),
+        args);
     System.exit(res);
   }
-  
+
   public int run(String[] args) throws Exception {
     if (args.length < 2) {
-      System.err.println("Usage: LinkDbReader <linkdb> (-dump <out_dir> | -url <url>)");
-      System.err.println("\t-dump <out_dir>\tdump whole link db to a text file in <out_dir>");
-      System.err.println("\t-url <url>\tprint information about <url> to System.out");
+      System.err
+          .println("Usage: LinkDbReader <linkdb> (-dump <out_dir> | -url <url>)");
+      System.err
+          .println("\t-dump <out_dir>\tdump whole link db to a text file in <out_dir>");
+      System.err
+          .println("\t-url <url>\tprint information about <url> to System.out");
       return -1;
     }
     try {
Index: src/java/org/apache/nutch/crawl/TextProfileSignature.java
===================================================================
--- src/java/org/apache/nutch/crawl/TextProfileSignature.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/TextProfileSignature.java	(working copy)
@@ -35,41 +35,50 @@
 import org.apache.nutch.util.NutchConfiguration;
 
 /**
- * <p>An implementation of a page signature. It calculates an MD5 hash
- * of a plain text "profile" of a page. In case there is no text, it
- * calculates a hash using the {@link MD5Signature}.</p>
- * <p>The algorithm to calculate a page "profile" takes the plain text version of
- * a page and performs the following steps:
+ * <p>
+ * An implementation of a page signature. It calculates an MD5 hash of a plain
+ * text "profile" of a page. In case there is no text, it calculates a hash
+ * using the {@link MD5Signature}.
+ * </p>
+ * <p>
+ * The algorithm to calculate a page "profile" takes the plain text version of a
+ * page and performs the following steps:
  * <ul>
  * <li>remove all characters except letters and digits, and bring all characters
  * to lower case,</li>
  * <li>split the text into tokens (all consecutive non-whitespace characters),</li>
- * <li>discard tokens equal or shorter than MIN_TOKEN_LEN (default 2 characters),</li>
+ * <li>discard tokens equal or shorter than MIN_TOKEN_LEN (default 2
+ * characters),</li>
  * <li>sort the list of tokens by decreasing frequency,</li>
- * <li>round down the counts of tokens to the nearest multiple of QUANT
- * (<code>QUANT = QUANT_RATE * maxFreq</code>, where <code>QUANT_RATE</code> is 0.01f
- * by default, and <code>maxFreq</code> is the maximum token frequency). If
- * <code>maxFreq</code> is higher than 1, then QUANT is always higher than 2 (which
- * means that tokens with frequency 1 are always discarded).</li>
- * <li>tokens, which frequency after quantization falls below QUANT, are discarded.</li>
- * <li>create a list of tokens and their quantized frequency, separated by spaces,
- * in the order of decreasing frequency.</li>
+ * <li>round down the counts of tokens to the nearest multiple of QUANT (
+ * <code>QUANT = QUANT_RATE * maxFreq</code>, where <code>QUANT_RATE</code> is
+ * 0.01f by default, and <code>maxFreq</code> is the maximum token frequency).
+ * If <code>maxFreq</code> is higher than 1, then QUANT is always higher than 2
+ * (which means that tokens with frequency 1 are always discarded).</li>
+ * <li>tokens, which frequency after quantization falls below QUANT, are
+ * discarded.</li>
+ * <li>create a list of tokens and their quantized frequency, separated by
+ * spaces, in the order of decreasing frequency.</li>
  * </ul>
  * This list is then submitted to an MD5 hash calculation.
  * 
  * @author Andrzej Bialecki &lt;ab@getopt.org&gt;
  */
 public class TextProfileSignature extends Signature {
-  
+
   Signature fallback = new MD5Signature();
 
   public byte[] calculate(Content content, Parse parse) {
-    int MIN_TOKEN_LEN = getConf().getInt("db.signature.text_profile.min_token_len", 2);
-    float QUANT_RATE = getConf().getFloat("db.signature.text_profile.quant_rate", 0.01f);
+    int MIN_TOKEN_LEN = getConf().getInt(
+        "db.signature.text_profile.min_token_len", 2);
+    float QUANT_RATE = getConf().getFloat(
+        "db.signature.text_profile.quant_rate", 0.01f);
     HashMap<String, Token> tokens = new HashMap<String, Token>();
     String text = null;
-    if (parse != null) text = parse.getText();
-    if (text == null || text.length() == 0) return fallback.calculate(content, parse);
+    if (parse != null)
+      text = parse.getText();
+    if (text == null || text.length() == 0)
+      return fallback.calculate(content, parse);
     StringBuffer curToken = new StringBuffer();
     int maxFreq = 0;
     for (int i = 0; i < text.length(); i++) {
@@ -87,7 +96,8 @@
               tokens.put(s, tok);
             }
             tok.cnt++;
-            if (tok.cnt > maxFreq) maxFreq = tok.cnt;
+            if (tok.cnt > maxFreq)
+              maxFreq = tok.cnt;
           }
           curToken.setLength(0);
         }
@@ -103,17 +113,20 @@
         tokens.put(s, tok);
       }
       tok.cnt++;
-      if (tok.cnt > maxFreq) maxFreq = tok.cnt;
+      if (tok.cnt > maxFreq)
+        maxFreq = tok.cnt;
     }
     Iterator<Token> it = tokens.values().iterator();
     ArrayList<Token> profile = new ArrayList<Token>();
     // calculate the QUANT value
     int QUANT = Math.round(maxFreq * QUANT_RATE);
     if (QUANT < 2) {
-      if (maxFreq > 1) QUANT = 2;
-      else QUANT = 1;
+      if (maxFreq > 1)
+        QUANT = 2;
+      else
+        QUANT = 1;
     }
-    while(it.hasNext()) {
+    while (it.hasNext()) {
       Token t = it.next();
       // round down to the nearest QUANT
       t.cnt = (t.cnt / QUANT) * QUANT;
@@ -128,32 +141,33 @@
     it = profile.iterator();
     while (it.hasNext()) {
       Token t = it.next();
-      if (newText.length() > 0) newText.append("\n");
+      if (newText.length() > 0)
+        newText.append("\n");
       newText.append(t.toString());
     }
     return MD5Hash.digest(newText.toString()).getDigest();
   }
-  
+
   private static class Token {
     public int cnt;
     public String val;
-    
+
     public Token(int cnt, String val) {
       this.cnt = cnt;
       this.val = val;
     }
-    
+
     public String toString() {
       return val + " " + cnt;
     }
   }
-  
+
   private static class TokenComparator implements Comparator<Token> {
     public int compare(Token t1, Token t2) {
       return t2.cnt - t1.cnt;
     }
   }
-  
+
   public static void main(String[] args) throws Exception {
     TextProfileSignature sig = new TextProfileSignature();
     sig.setConf(NutchConfiguration.create());
@@ -161,15 +175,18 @@
     File[] files = new File(args[0]).listFiles();
     for (int i = 0; i < files.length; i++) {
       FileInputStream fis = new FileInputStream(files[i]);
-      BufferedReader br = new BufferedReader(new InputStreamReader(fis, "UTF-8"));
+      BufferedReader br = new BufferedReader(
+          new InputStreamReader(fis, "UTF-8"));
       StringBuffer text = new StringBuffer();
       String line = null;
       while ((line = br.readLine()) != null) {
-        if (text.length() > 0) text.append("\n");
+        if (text.length() > 0)
+          text.append("\n");
         text.append(line);
       }
       br.close();
-      byte[] signature = sig.calculate(null, new ParseImpl(text.toString(), null));
+      byte[] signature = sig.calculate(null, new ParseImpl(text.toString(),
+          null));
       res.put(files[i].toString(), signature);
     }
     Iterator<String> it = res.keySet().iterator();
Index: src/java/org/apache/nutch/crawl/CrawlDbReducer.java
===================================================================
--- src/java/org/apache/nutch/crawl/CrawlDbReducer.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/CrawlDbReducer.java	(working copy)
@@ -35,9 +35,11 @@
 import org.apache.nutch.scoring.ScoringFilters;
 
 /** Merge new page entries with existing entries. */
-public class CrawlDbReducer implements Reducer<Text, CrawlDatum, Text, CrawlDatum> {
-  public static final Logger LOG = LoggerFactory.getLogger(CrawlDbReducer.class);
-  
+public class CrawlDbReducer implements
+    Reducer<Text, CrawlDatum, Text, CrawlDatum> {
+  public static final Logger LOG = LoggerFactory
+      .getLogger(CrawlDbReducer.class);
+
   private int retryMax;
   private CrawlDatum result = new CrawlDatum();
   private InlinkPriorityQueue linked = null;
@@ -51,18 +53,20 @@
     scfilters = new ScoringFilters(job);
     additionsAllowed = job.getBoolean(CrawlDb.CRAWLDB_ADDITIONS_ALLOWED, true);
     int oldMaxInterval = job.getInt("db.max.fetch.interval", 0);
-    maxInterval = job.getInt("db.fetch.interval.max", 0 );
-    if (oldMaxInterval > 0 && maxInterval == 0) maxInterval = oldMaxInterval * FetchSchedule.SECONDS_PER_DAY;
+    maxInterval = job.getInt("db.fetch.interval.max", 0);
+    if (oldMaxInterval > 0 && maxInterval == 0)
+      maxInterval = oldMaxInterval * FetchSchedule.SECONDS_PER_DAY;
     schedule = FetchScheduleFactory.getFetchSchedule(job);
     int maxLinks = job.getInt("db.update.max.inlinks", 10000);
     linked = new InlinkPriorityQueue(maxLinks);
   }
 
-  public void close() {}
+  public void close() {
+  }
 
   public void reduce(Text key, Iterator<CrawlDatum> values,
-                     OutputCollector<Text, CrawlDatum> output, Reporter reporter)
-    throws IOException {
+      OutputCollector<Text, CrawlDatum> output, Reporter reporter)
+      throws IOException {
 
     CrawlDatum fetch = new CrawlDatum();
     CrawlDatum old = new CrawlDatum();
@@ -73,10 +77,11 @@
     boolean multiple = false; // avoid deep copy when only single value exists
     linked.clear();
     org.apache.hadoop.io.MapWritable metaFromParse = null;
-    
+
     while (values.hasNext()) {
-      CrawlDatum datum = (CrawlDatum)values.next();
-      if (!multiple && values.hasNext()) multiple = true;
+      CrawlDatum datum = (CrawlDatum) values.next();
+      if (!multiple && values.hasNext())
+        multiple = true;
       if (CrawlDatum.hasDbStatus(datum)) {
         if (!oldSet) {
           if (multiple) {
@@ -88,7 +93,8 @@
           oldSet = true;
         } else {
           // always take the latest version
-          if (old.getFetchTime() < datum.getFetchTime()) old.set(datum);
+          if (old.getFetchTime() < datum.getFetchTime())
+            old.set(datum);
         }
         continue;
       }
@@ -103,12 +109,13 @@
           fetchSet = true;
         } else {
           // always take the latest version
-          if (fetch.getFetchTime() < datum.getFetchTime()) fetch.set(datum);
+          if (fetch.getFetchTime() < datum.getFetchTime())
+            fetch.set(datum);
         }
         continue;
       }
 
-      switch (datum.getStatus()) {                // collect other info
+      switch (datum.getStatus()) { // collect other info
       case CrawlDatum.STATUS_LINKED:
         CrawlDatum link;
         if (multiple) {
@@ -129,7 +136,7 @@
         LOG.warn("Unknown status, key: " + key + ", datum: " + datum);
       }
     }
-    
+
     // copy the content of the queue into a List
     // in reversed order
     int numLinks = linked.size();
@@ -137,28 +144,31 @@
     for (int i = numLinks - 1; i >= 0; i--) {
       linkList.add(linked.pop());
     }
-    
+
     // if it doesn't already exist, skip it
-    if (!oldSet && !additionsAllowed) return;
-    
+    if (!oldSet && !additionsAllowed)
+      return;
+
     // if there is no fetched datum, perhaps there is a link
     if (!fetchSet && linkList.size() > 0) {
       fetch = linkList.get(0);
       fetchSet = true;
     }
-    
+
     // still no new data - record only unchanged old data, if exists, and return
     if (!fetchSet) {
       if (oldSet) {// at this point at least "old" should be present
         output.collect(key, old);
-        reporter.getCounter("CrawlDB status", CrawlDatum.getStatusName(old.getStatus())).increment(1);
+        reporter.getCounter("CrawlDB status",
+            CrawlDatum.getStatusName(old.getStatus())).increment(1);
       } else {
         LOG.warn("Missing fetch and old value, signature=" + signature);
       }
       return;
     }
-    
-    if (signature == null) signature = fetch.getSignature();
+
+    if (signature == null)
+      signature = fetch.getSignature();
     long prevModifiedTime = oldSet ? old.getModifiedTime() : 0L;
     long prevFetchTime = oldSet ? old.getFetchTime() : 0L;
 
@@ -177,31 +187,31 @@
         result.setModifiedTime(old.getModifiedTime());
       }
     }
-    
-    switch (fetch.getStatus()) {                // determine new status
 
-    case CrawlDatum.STATUS_LINKED:                // it was link
-      if (oldSet) {                          // if old exists
-        result.set(old);                          // use it
+    switch (fetch.getStatus()) { // determine new status
+
+    case CrawlDatum.STATUS_LINKED: // it was link
+      if (oldSet) { // if old exists
+        result.set(old); // use it
       } else {
-        result = schedule.initializeSchedule((Text)key, result);
+        result = schedule.initializeSchedule((Text) key, result);
         result.setStatus(CrawlDatum.STATUS_DB_UNFETCHED);
         try {
-          scfilters.initialScore((Text)key, result);
+          scfilters.initialScore((Text) key, result);
         } catch (ScoringFilterException e) {
           if (LOG.isWarnEnabled()) {
-            LOG.warn("Cannot filter init score for url " + key +
-                     ", using default: " + e.getMessage());
+            LOG.warn("Cannot filter init score for url " + key
+                + ", using default: " + e.getMessage());
           }
           result.setScore(0.0f);
         }
       }
       break;
-      
-    case CrawlDatum.STATUS_FETCH_SUCCESS:         // succesful fetch
-    case CrawlDatum.STATUS_FETCH_REDIR_TEMP:      // successful fetch, redirected
+
+    case CrawlDatum.STATUS_FETCH_SUCCESS: // succesful fetch
+    case CrawlDatum.STATUS_FETCH_REDIR_TEMP: // successful fetch, redirected
     case CrawlDatum.STATUS_FETCH_REDIR_PERM:
-    case CrawlDatum.STATUS_FETCH_NOTMODIFIED:     // successful fetch, notmodified
+    case CrawlDatum.STATUS_FETCH_NOTMODIFIED: // successful fetch, notmodified
       // determine the modification status
       int modified = FetchSchedule.STATUS_UNKNOWN;
       if (fetch.getStatus() == CrawlDatum.STATUS_FETCH_NOTMODIFIED) {
@@ -216,12 +226,14 @@
         }
       }
       // set the schedule
-      result = schedule.setFetchSchedule((Text)key, result, prevFetchTime,
-          prevModifiedTime, fetch.getFetchTime(), fetch.getModifiedTime(), modified);
+      result = schedule.setFetchSchedule((Text) key, result, prevFetchTime,
+          prevModifiedTime, fetch.getFetchTime(), fetch.getModifiedTime(),
+          modified);
       // set the result status and signature
       if (modified == FetchSchedule.STATUS_NOTMODIFIED) {
         result.setStatus(CrawlDatum.STATUS_DB_NOTMODIFIED);
-        if (oldSet) result.setSignature(old.getSignature());
+        if (oldSet)
+          result.setSignature(old.getSignature());
       } else {
         switch (fetch.getStatus()) {
         case CrawlDatum.STATUS_FETCH_SUCCESS:
@@ -234,34 +246,37 @@
           result.setStatus(CrawlDatum.STATUS_DB_REDIR_TEMP);
           break;
         default:
-          LOG.warn("Unexpected status: " + fetch.getStatus() + " resetting to old status.");
-          if (oldSet) result.setStatus(old.getStatus());
-          else result.setStatus(CrawlDatum.STATUS_DB_UNFETCHED);
+          LOG.warn("Unexpected status: " + fetch.getStatus()
+              + " resetting to old status.");
+          if (oldSet)
+            result.setStatus(old.getStatus());
+          else
+            result.setStatus(CrawlDatum.STATUS_DB_UNFETCHED);
         }
         result.setSignature(signature);
         if (metaFromParse != null) {
-            for (Entry<Writable, Writable> e : metaFromParse.entrySet()) {
-              result.getMetaData().put(e.getKey(), e.getValue());
-            }
+          for (Entry<Writable, Writable> e : metaFromParse.entrySet()) {
+            result.getMetaData().put(e.getKey(), e.getValue());
           }
+        }
       }
       // if fetchInterval is larger than the system-wide maximum, trigger
       // an unconditional recrawl. This prevents the page to be stuck at
       // NOTMODIFIED state, when the old fetched copy was already removed with
       // old segments.
       if (maxInterval < result.getFetchInterval())
-        result = schedule.forceRefetch((Text)key, result, false);
+        result = schedule.forceRefetch((Text) key, result, false);
       break;
     case CrawlDatum.STATUS_SIGNATURE:
       if (LOG.isWarnEnabled()) {
         LOG.warn("Lone CrawlDatum.STATUS_SIGNATURE: " + key);
-      }   
+      }
       return;
-    case CrawlDatum.STATUS_FETCH_RETRY:           // temporary failure
+    case CrawlDatum.STATUS_FETCH_RETRY: // temporary failure
       if (oldSet) {
-        result.setSignature(old.getSignature());  // use old signature
+        result.setSignature(old.getSignature()); // use old signature
       }
-      result = schedule.setPageRetrySchedule((Text)key, result, prevFetchTime,
+      result = schedule.setPageRetrySchedule((Text) key, result, prevFetchTime,
           prevModifiedTime, fetch.getFetchTime());
       if (result.getRetriesSinceFetch() < retryMax) {
         result.setStatus(CrawlDatum.STATUS_DB_UNFETCHED);
@@ -270,20 +285,22 @@
       }
       break;
 
-    case CrawlDatum.STATUS_FETCH_GONE:            // permanent failure
+    case CrawlDatum.STATUS_FETCH_GONE: // permanent failure
       if (oldSet)
-        result.setSignature(old.getSignature());  // use old signature
+        result.setSignature(old.getSignature()); // use old signature
       result.setStatus(CrawlDatum.STATUS_DB_GONE);
-      result = schedule.setPageGoneSchedule((Text)key, result, prevFetchTime,
+      result = schedule.setPageGoneSchedule((Text) key, result, prevFetchTime,
           prevModifiedTime, fetch.getFetchTime());
       break;
 
     default:
-      throw new RuntimeException("Unknown status: " + fetch.getStatus() + " " + key);
+      throw new RuntimeException("Unknown status: " + fetch.getStatus() + " "
+          + key);
     }
 
     try {
-      scfilters.updateDbScore((Text)key, oldSet ? old : null, result, linkList);
+      scfilters
+          .updateDbScore((Text) key, oldSet ? old : null, result, linkList);
     } catch (Exception e) {
       if (LOG.isWarnEnabled()) {
         LOG.warn("Couldn't update score, key=" + key + ": " + e);
@@ -292,22 +309,23 @@
     // remove generation time, if any
     result.getMetaData().remove(Nutch.WRITABLE_GENERATE_TIME_KEY);
     output.collect(key, result);
-    reporter.getCounter("CrawlDB status", CrawlDatum.getStatusName(result.getStatus())).increment(1);
+    reporter.getCounter("CrawlDB status",
+        CrawlDatum.getStatusName(result.getStatus())).increment(1);
   }
-  
+
 }
 
 class InlinkPriorityQueue extends PriorityQueue<CrawlDatum> {
-  
+
   public InlinkPriorityQueue(int maxSize) {
     initialize(maxSize);
   }
-  
+
   /** Determines the ordering of objects in this priority queue. **/
   protected boolean lessThan(Object arg0, Object arg1) {
     CrawlDatum candidate = (CrawlDatum) arg0;
     CrawlDatum least = (CrawlDatum) arg1;
     return candidate.getScore() > least.getScore();
   }
-  
+
 }
Index: src/java/org/apache/nutch/crawl/SignatureComparator.java
===================================================================
--- src/java/org/apache/nutch/crawl/SignatureComparator.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/SignatureComparator.java	(working copy)
@@ -23,25 +23,34 @@
   public int compare(Object o1, Object o2) {
     return _compare(o1, o2);
   }
-  
+
   public static int _compare(Object o1, Object o2) {
-    if (o1 == null && o2 == null) return 0;
-    if (o1 == null) return -1;
-    if (o2 == null) return 1;
-    if (!(o1 instanceof byte[])) return -1;
-    if (!(o2 instanceof byte[])) return 1;
-    byte[] data1 = (byte[])o1;
-    byte[] data2 = (byte[])o2;
+    if (o1 == null && o2 == null)
+      return 0;
+    if (o1 == null)
+      return -1;
+    if (o2 == null)
+      return 1;
+    if (!(o1 instanceof byte[]))
+      return -1;
+    if (!(o2 instanceof byte[]))
+      return 1;
+    byte[] data1 = (byte[]) o1;
+    byte[] data2 = (byte[]) o2;
     return _compare(data1, 0, data1.length, data2, 0, data2.length);
   }
-  
-  public static int _compare(byte[] data1, int s1, int l1, byte[] data2, int s2, int l2) {
-    if (l2 > l1) return -1;
-    if (l2 < l1) return 1;
+
+  public static int _compare(byte[] data1, int s1, int l1, byte[] data2,
+      int s2, int l2) {
+    if (l2 > l1)
+      return -1;
+    if (l2 < l1)
+      return 1;
     int res = 0;
     for (int i = 0; i < l1; i++) {
       res = (data1[s1 + i] - data2[s2 + i]);
-      if (res != 0) return res;
+      if (res != 0)
+        return res;
     }
     return 0;
   }
Index: src/java/org/apache/nutch/crawl/SignatureFactory.java
===================================================================
--- src/java/org/apache/nutch/crawl/SignatureFactory.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/SignatureFactory.java	(working copy)
@@ -27,28 +27,30 @@
 
 /**
  * Factory class, which instantiates a Signature implementation according to the
- * current Configuration configuration. This newly created instance is cached in the
- * Configuration instance, so that it could be later retrieved.
+ * current Configuration configuration. This newly created instance is cached in
+ * the Configuration instance, so that it could be later retrieved.
  * 
  * @author Andrzej Bialecki &lt;ab@getopt.org&gt;
  */
 public class SignatureFactory {
-  private static final Logger LOG = LoggerFactory.getLogger(SignatureFactory.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(SignatureFactory.class);
 
-  private SignatureFactory() {}                   // no public ctor
+  private SignatureFactory() {
+  } // no public ctor
 
   /** Return the default Signature implementation. */
   public static Signature getSignature(Configuration conf) {
     String clazz = conf.get("db.signature.class", MD5Signature.class.getName());
     ObjectCache objectCache = ObjectCache.get(conf);
-    Signature impl = (Signature)objectCache.getObject(clazz);
+    Signature impl = (Signature) objectCache.getObject(clazz);
     if (impl == null) {
       try {
         if (LOG.isInfoEnabled()) {
           LOG.info("Using Signature impl: " + clazz);
         }
         Class implClass = Class.forName(clazz);
-        impl = (Signature)implClass.newInstance();
+        impl = (Signature) implClass.newInstance();
         impl.setConf(conf);
         objectCache.setObject(clazz, impl);
       } catch (Exception e) {
Index: src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java
===================================================================
--- src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java	(working copy)
@@ -30,41 +30,48 @@
  * 
  * @author Andrzej Bialecki
  */
-public abstract class AbstractFetchSchedule extends Configured implements FetchSchedule {
-  private static final Logger LOG = LoggerFactory.getLogger(AbstractFetchSchedule.class);
-  
+public abstract class AbstractFetchSchedule extends Configured implements
+    FetchSchedule {
+  private static final Logger LOG = LoggerFactory
+      .getLogger(AbstractFetchSchedule.class);
+
   protected int defaultInterval;
   protected int maxInterval;
-  
+
   public AbstractFetchSchedule() {
     super(null);
   }
-  
+
   public AbstractFetchSchedule(Configuration conf) {
     super(conf);
   }
-  
+
   public void setConf(Configuration conf) {
     super.setConf(conf);
-    if (conf == null) return;
+    if (conf == null)
+      return;
     int oldDefaultInterval = conf.getInt("db.default.fetch.interval", 0);
     defaultInterval = conf.getInt("db.fetch.interval.default", 0);
-    if (oldDefaultInterval > 0 && defaultInterval == 0) defaultInterval = oldDefaultInterval * SECONDS_PER_DAY;
+    if (oldDefaultInterval > 0 && defaultInterval == 0)
+      defaultInterval = oldDefaultInterval * SECONDS_PER_DAY;
     int oldMaxInterval = conf.getInt("db.max.fetch.interval", 0);
-    maxInterval = conf.getInt("db.fetch.interval.max", 0 );
-    if (oldMaxInterval > 0 && maxInterval == 0) maxInterval = oldMaxInterval * FetchSchedule.SECONDS_PER_DAY;
+    maxInterval = conf.getInt("db.fetch.interval.max", 0);
+    if (oldMaxInterval > 0 && maxInterval == 0)
+      maxInterval = oldMaxInterval * FetchSchedule.SECONDS_PER_DAY;
     LOG.info("defaultInterval=" + defaultInterval);
     LOG.info("maxInterval=" + maxInterval);
   }
-  
+
   /**
-   * Initialize fetch schedule related data. Implementations should at least
-   * set the <code>fetchTime</code> and <code>fetchInterval</code>. The default
-   * implementation sets the <code>fetchTime</code> to now, using the
-   * default <code>fetchInterval</code>.
+   * Initialize fetch schedule related data. Implementations should at least set
+   * the <code>fetchTime</code> and <code>fetchInterval</code>. The default
+   * implementation sets the <code>fetchTime</code> to now, using the default
+   * <code>fetchInterval</code>.
    * 
-   * @param url URL of the page.
-   * @param datum datum instance to be initialized (modified in place).
+   * @param url
+   *          URL of the page.
+   * @param datum
+   *          datum instance to be initialized (modified in place).
    */
   public CrawlDatum initializeSchedule(Text url, CrawlDatum datum) {
     datum.setFetchTime(System.currentTimeMillis());
@@ -72,91 +79,113 @@
     datum.setRetriesSinceFetch(0);
     return datum;
   }
-  
+
   /**
    * Sets the <code>fetchInterval</code> and <code>fetchTime</code> on a
-   * successfully fetched page. NOTE: this implementation resets the
-   * retry counter - extending classes should call super.setFetchSchedule() to
+   * successfully fetched page. NOTE: this implementation resets the retry
+   * counter - extending classes should call super.setFetchSchedule() to
    * preserve this behavior.
    */
   public CrawlDatum setFetchSchedule(Text url, CrawlDatum datum,
-          long prevFetchTime, long prevModifiedTime,
-          long fetchTime, long modifiedTime, int state) {
+      long prevFetchTime, long prevModifiedTime, long fetchTime,
+      long modifiedTime, int state) {
     datum.setRetriesSinceFetch(0);
     return datum;
   }
-  
+
   /**
-   * This method specifies how to schedule refetching of pages
-   * marked as GONE. Default implementation increases fetchInterval by 50%,
-   * and if it exceeds the <code>maxInterval</code> it calls
+   * This method specifies how to schedule refetching of pages marked as GONE.
+   * Default implementation increases fetchInterval by 50%, and if it exceeds
+   * the <code>maxInterval</code> it calls
    * {@link #forceRefetch(Text, CrawlDatum, boolean)}.
-   * @param url URL of the page
-   * @param datum datum instance to be adjusted
+   * 
+   * @param url
+   *          URL of the page
+   * @param datum
+   *          datum instance to be adjusted
    * @return adjusted page information, including all original information.
-   * NOTE: this may be a different instance than {@param datum}, but
-   * implementations should make sure that it contains at least all
-   * information from {@param datum}.
+   *         NOTE: this may be a different instance than
+   * @param datum
+   *          , but implementations should make sure that it contains at least
+   *          all information from
+   * @param datum
+   *          .
    */
   public CrawlDatum setPageGoneSchedule(Text url, CrawlDatum datum,
-          long prevFetchTime, long prevModifiedTime, long fetchTime) {
+      long prevFetchTime, long prevModifiedTime, long fetchTime) {
     // no page is truly GONE ... just increase the interval by 50%
     // and try much later.
     datum.setFetchInterval(datum.getFetchInterval() * 1.5f);
-    datum.setFetchTime(fetchTime + (long)datum.getFetchInterval() * 1000);
-    if (maxInterval < datum.getFetchInterval()) forceRefetch(url, datum, false);
+    datum.setFetchTime(fetchTime + (long) datum.getFetchInterval() * 1000);
+    if (maxInterval < datum.getFetchInterval())
+      forceRefetch(url, datum, false);
     return datum;
   }
-  
+
   /**
-   * This method adjusts the fetch schedule if fetching needs to be
-   * re-tried due to transient errors. The default implementation
-   * sets the next fetch time 1 day in the future and increases
-   * the retry counter.
-   * @param url URL of the page
-   * @param datum page information
-   * @param prevFetchTime previous fetch time
-   * @param prevModifiedTime previous modified time
-   * @param fetchTime current fetch time
+   * This method adjusts the fetch schedule if fetching needs to be re-tried due
+   * to transient errors. The default implementation sets the next fetch time 1
+   * day in the future and increases the retry counter.
+   * 
+   * @param url
+   *          URL of the page
+   * @param datum
+   *          page information
+   * @param prevFetchTime
+   *          previous fetch time
+   * @param prevModifiedTime
+   *          previous modified time
+   * @param fetchTime
+   *          current fetch time
    * @return adjusted page information, including all original information.
-   * NOTE: this may be a different instance than {@param datum}, but
-   * implementations should make sure that it contains at least all
-   * information from {@param datum}.
+   *         NOTE: this may be a different instance than
+   * @param datum
+   *          , but implementations should make sure that it contains at least
+   *          all information from
+   * @param datum
+   *          .
    */
   public CrawlDatum setPageRetrySchedule(Text url, CrawlDatum datum,
-          long prevFetchTime, long prevModifiedTime, long fetchTime) {
-    datum.setFetchTime(fetchTime + (long)SECONDS_PER_DAY*1000);
+      long prevFetchTime, long prevModifiedTime, long fetchTime) {
+    datum.setFetchTime(fetchTime + (long) SECONDS_PER_DAY * 1000);
     datum.setRetriesSinceFetch(datum.getRetriesSinceFetch() + 1);
     return datum;
   }
-  
+
   /**
    * This method return the last fetch time of the CrawlDatum
+   * 
    * @return the date as a long.
    */
   public long calculateLastFetchTime(CrawlDatum datum) {
-    return  datum.getFetchTime() - (long)datum.getFetchInterval() * 1000;
+    return datum.getFetchTime() - (long) datum.getFetchInterval() * 1000;
   }
 
   /**
-   * This method provides information whether the page is suitable for
-   * selection in the current fetchlist. NOTE: a true return value does not
-   * guarantee that the page will be fetched, it just allows it to be
-   * included in the further selection process based on scores. The default
-   * implementation checks <code>fetchTime</code>, if it is higher than the
-   * {@param curTime} it returns false, and true otherwise. It will also
-   * check that fetchTime is not too remote (more than <code>maxInterval</code>,
-   * in which case it lowers the interval and returns true.
-   * @param url URL of the page
-   * @param datum datum instance
-   * @param curTime reference time (usually set to the time when the
-   * fetchlist generation process was started).
+   * This method provides information whether the page is suitable for selection
+   * in the current fetchlist. NOTE: a true return value does not guarantee that
+   * the page will be fetched, it just allows it to be included in the further
+   * selection process based on scores. The default implementation checks
+   * <code>fetchTime</code>, if it is higher than the
+   * 
+   * @param curTime
+   *          it returns false, and true otherwise. It will also check that
+   *          fetchTime is not too remote (more than <code>maxInterval</code>,
+   *          in which case it lowers the interval and returns true.
+   * @param url
+   *          URL of the page
+   * @param datum
+   *          datum instance
+   * @param curTime
+   *          reference time (usually set to the time when the fetchlist
+   *          generation process was started).
    * @return true, if the page should be considered for inclusion in the current
-   * fetchlist, otherwise false.
+   *         fetchlist, otherwise false.
    */
   public boolean shouldFetch(Text url, CrawlDatum datum, long curTime) {
     // pages are never truly GONE - we have to check them from time to time.
-    // pages with too long fetchInterval are adjusted so that they fit within
+    // pages with too long fetchInterval are adjusted so that they fit
+    // within
     // maximum fetchInterval (segment retention period).
     if (datum.getFetchTime() - curTime > (long) maxInterval * 1000) {
       if (datum.getFetchInterval() > maxInterval) {
@@ -165,21 +194,25 @@
       datum.setFetchTime(curTime);
     }
     if (datum.getFetchTime() > curTime) {
-      return false;                                   // not time yet
+      return false; // not time yet
     }
     return true;
   }
-  
+
   /**
    * This method resets fetchTime, fetchInterval, modifiedTime,
    * retriesSinceFetch and page signature, so that it forces refetching.
-   * @param url URL of the page
-   * @param datum datum instance
-   * @param asap if true, force refetch as soon as possible - this sets
-   * the fetchTime to now. If false, force refetch whenever the next fetch
-   * time is set.
+   * 
+   * @param url
+   *          URL of the page
+   * @param datum
+   *          datum instance
+   * @param asap
+   *          if true, force refetch as soon as possible - this sets the
+   *          fetchTime to now. If false, force refetch whenever the next fetch
+   *          time is set.
    */
-  public CrawlDatum  forceRefetch(Text url, CrawlDatum datum, boolean asap) {
+  public CrawlDatum forceRefetch(Text url, CrawlDatum datum, boolean asap) {
     // reduce fetchInterval so that it fits within the max value
     if (datum.getFetchInterval() > maxInterval)
       datum.setFetchInterval(maxInterval * 0.9f);
@@ -187,7 +220,8 @@
     datum.setRetriesSinceFetch(0);
     datum.setSignature(null);
     datum.setModifiedTime(0L);
-    if (asap) datum.setFetchTime(System.currentTimeMillis());
+    if (asap)
+      datum.setFetchTime(System.currentTimeMillis());
     return datum;
   }
 
Index: src/java/org/apache/nutch/crawl/FetchScheduleFactory.java
===================================================================
--- src/java/org/apache/nutch/crawl/FetchScheduleFactory.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/FetchScheduleFactory.java	(working copy)
@@ -25,20 +25,23 @@
 /** Creates and caches a {@link FetchSchedule} implementation. */
 public class FetchScheduleFactory {
 
-  public static final Logger LOG = LoggerFactory.getLogger(FetchScheduleFactory.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(FetchScheduleFactory.class);
 
-  private FetchScheduleFactory() {}                   // no public ctor
+  private FetchScheduleFactory() {
+  } // no public ctor
 
   /** Return the FetchSchedule implementation. */
   public static FetchSchedule getFetchSchedule(Configuration conf) {
-    String clazz = conf.get("db.fetch.schedule.class", DefaultFetchSchedule.class.getName());
+    String clazz = conf.get("db.fetch.schedule.class",
+        DefaultFetchSchedule.class.getName());
     ObjectCache objectCache = ObjectCache.get(conf);
-    FetchSchedule impl = (FetchSchedule)objectCache.getObject(clazz);
+    FetchSchedule impl = (FetchSchedule) objectCache.getObject(clazz);
     if (impl == null) {
       try {
         LOG.info("Using FetchSchedule impl: " + clazz);
         Class implClass = Class.forName(clazz);
-        impl = (FetchSchedule)implClass.newInstance();
+        impl = (FetchSchedule) implClass.newInstance();
         impl.setConf(conf);
         objectCache.setObject(clazz, impl);
       } catch (Exception e) {
Index: src/java/org/apache/nutch/parse/ParsePluginsReader.java
===================================================================
--- src/java/org/apache/nutch/parse/ParsePluginsReader.java	(revision 1188252)
+++ src/java/org/apache/nutch/parse/ParsePluginsReader.java	(working copy)
@@ -42,50 +42,50 @@
 // Nutch imports
 import org.apache.nutch.util.NutchConfiguration;
 
-
 /**
  * A reader to load the information stored in the
  * <code>$NUTCH_HOME/conf/parse-plugins.xml</code> file.
- *
+ * 
  * @author mattmann
  * @version 1.0
  */
 class ParsePluginsReader {
-  
+
   /* our log stream */
-  public static final Logger LOG = LoggerFactory.getLogger(ParsePluginsReader.class);
-  
+  public static final Logger LOG = LoggerFactory
+      .getLogger(ParsePluginsReader.class);
+
   /** The property name of the parse-plugins location */
   private static final String PP_FILE_PROP = "parse.plugin.file";
 
   /** the parse-plugins file */
   private String fParsePluginsFile = null;
 
-  
   /**
    * Constructs a new ParsePluginsReader
    */
-  public ParsePluginsReader() { }
-  
+  public ParsePluginsReader() {
+  }
+
   /**
    * Reads the <code>parse-plugins.xml</code> file and returns the
    * {@link #ParsePluginList} defined by it.
-   *
+   * 
    * @return A {@link #ParsePluginList} specified by the
    *         <code>parse-plugins.xml</code> file.
    * @throws Exception
-   *             If any parsing error occurs.
+   *           If any parsing error occurs.
    */
   public ParsePluginList parse(Configuration conf) {
-    
+
     ParsePluginList pList = new ParsePluginList();
-    
+
     // open up the XML file
     DocumentBuilderFactory factory = null;
     DocumentBuilder parser = null;
     Document document = null;
     InputSource inputSource = null;
-    
+
     InputStream ppInputStream = null;
     if (fParsePluginsFile != null) {
       URL parsePluginUrl = null;
@@ -94,56 +94,55 @@
         ppInputStream = parsePluginUrl.openStream();
       } catch (Exception e) {
         if (LOG.isWarnEnabled()) {
-          LOG.warn("Unable to load parse plugins file from URL " +
-                   "[" + fParsePluginsFile + "]. Reason is [" + e + "]");
+          LOG.warn("Unable to load parse plugins file from URL " + "["
+              + fParsePluginsFile + "]. Reason is [" + e + "]");
         }
         return pList;
       }
     } else {
-      ppInputStream = conf.getConfResourceAsInputStream(
-                          conf.get(PP_FILE_PROP));
+      ppInputStream = conf.getConfResourceAsInputStream(conf.get(PP_FILE_PROP));
     }
-    
+
     inputSource = new InputSource(ppInputStream);
-    
+
     try {
       factory = DocumentBuilderFactory.newInstance();
       parser = factory.newDocumentBuilder();
       document = parser.parse(inputSource);
     } catch (Exception e) {
       if (LOG.isWarnEnabled()) {
-        LOG.warn("Unable to parse [" + fParsePluginsFile + "]." +
-                 "Reason is [" + e + "]");
+        LOG.warn("Unable to parse [" + fParsePluginsFile + "]." + "Reason is ["
+            + e + "]");
       }
       return null;
     }
-    
+
     Element parsePlugins = document.getDocumentElement();
-    
+
     // build up the alias hash map
     Map<String, String> aliases = getAliases(parsePlugins);
     // And store it on the parse plugin list
     pList.setAliases(aliases);
-     
+
     // get all the mime type nodes
     NodeList mimeTypes = parsePlugins.getElementsByTagName("mimeType");
-    
+
     // iterate through the mime types
     for (int i = 0; i < mimeTypes.getLength(); i++) {
       Element mimeType = (Element) mimeTypes.item(i);
       String mimeTypeStr = mimeType.getAttribute("name");
-      
+
       // for each mimeType, get the plugin list
       NodeList pluginList = mimeType.getElementsByTagName("plugin");
-      
+
       // iterate through the plugins, add them in order read
       // OR if they have a special order="" attribute, then hold those in
       // a separate list, and then insert them into the final list at the
       // order specified
       if (pluginList != null && pluginList.getLength() > 0) {
         List<String> plugList = new ArrayList<String>(pluginList.getLength());
-        
-        for (int j = 0; j<pluginList.getLength(); j++) {
+
+        for (int j = 0; j < pluginList.getLength(); j++) {
           Element plugin = (Element) pluginList.item(j);
           String pluginId = plugin.getAttribute("id");
           String extId = aliases.get(pluginId);
@@ -163,110 +162,110 @@
             plugList.add(extId);
           }
         }
-        
+
         // now add the plugin list and map it to this mimeType
         pList.setPluginList(mimeTypeStr, plugList);
-        
+
       } else if (LOG.isWarnEnabled()) {
         LOG.warn("ParsePluginsReader:ERROR:no plugins defined for mime type: "
-                 + mimeTypeStr + ", continuing parse");
+            + mimeTypeStr + ", continuing parse");
       }
     }
     return pList;
   }
-  
+
   /**
    * Tests parsing of the parse-plugins.xml file. An alternative name for the
-   * file can be specified via the <code>--file</code> option, although the
-   * file must be located in the <code>$NUTCH_HOME/conf</code> directory.
-   *
+   * file can be specified via the <code>--file</code> option, although the file
+   * must be located in the <code>$NUTCH_HOME/conf</code> directory.
+   * 
    * @param args
-   *            Currently only the --file argument to specify an alternative
-   *            name for the parse-plugins.xml file is supported.
+   *          Currently only the --file argument to specify an alternative name
+   *          for the parse-plugins.xml file is supported.
    */
   public static void main(String[] args) throws Exception {
     String parsePluginFile = null;
     String usage = "ParsePluginsReader [--file <parse plugin file location>]";
-    
-    if (( args.length != 0 && args.length != 2 )
+
+    if ((args.length != 0 && args.length != 2)
         || (args.length == 2 && !"--file".equals(args[0]))) {
       System.err.println(usage);
       System.exit(1);
     }
-    
+
     for (int i = 0; i < args.length; i++) {
       if (args[i].equals("--file")) {
         parsePluginFile = args[++i];
       }
     }
-    
+
     ParsePluginsReader reader = new ParsePluginsReader();
-    
+
     if (parsePluginFile != null) {
       reader.setFParsePluginsFile(parsePluginFile);
     }
-    
+
     ParsePluginList prefs = reader.parse(NutchConfiguration.create());
-    
+
     for (String mimeType : prefs.getSupportedMimeTypes()) {
-      
+
       System.out.println("MIMETYPE: " + mimeType);
       List<String> plugList = prefs.getPluginList(mimeType);
-      
+
       System.out.println("EXTENSION IDs:");
-      
+
       for (String j : plugList) {
         System.out.println(j);
       }
     }
-    
+
   }
-  
+
   /**
    * @return Returns the fParsePluginsFile.
    */
   public String getFParsePluginsFile() {
     return fParsePluginsFile;
   }
-  
+
   /**
    * @param parsePluginsFile
-   *            The fParsePluginsFile to set.
+   *          The fParsePluginsFile to set.
    */
   public void setFParsePluginsFile(String parsePluginsFile) {
     fParsePluginsFile = parsePluginsFile;
   }
-  
+
   private Map<String, String> getAliases(Element parsePluginsRoot) {
 
     Map<String, String> aliases = new HashMap<String, String>();
     NodeList aliasRoot = parsePluginsRoot.getElementsByTagName("aliases");
-	  
+
     if (aliasRoot == null || (aliasRoot != null && aliasRoot.getLength() == 0)) {
       if (LOG.isWarnEnabled()) {
         LOG.warn("No aliases defined in parse-plugins.xml!");
       }
       return aliases;
     }
-	  
+
     if (aliasRoot.getLength() > 1) {
       // log a warning, but try and continue processing
       if (LOG.isWarnEnabled()) {
         LOG.warn("There should only be one \"aliases\" tag in parse-plugins.xml");
       }
     }
-	  
-    Element aliasRootElem = (Element)aliasRoot.item(0);
+
+    Element aliasRootElem = (Element) aliasRoot.item(0);
     NodeList aliasElements = aliasRootElem.getElementsByTagName("alias");
-	  
+
     if (aliasElements != null && aliasElements.getLength() > 0) {
-      for (int i=0; i<aliasElements.getLength(); i++) {
-        Element aliasElem = (Element)aliasElements.item(i);
-	String parsePluginId = aliasElem.getAttribute("name");
-	String extensionId = aliasElem.getAttribute("extension-id");
+      for (int i = 0; i < aliasElements.getLength(); i++) {
+        Element aliasElem = (Element) aliasElements.item(i);
+        String parsePluginId = aliasElem.getAttribute("name");
+        String extensionId = aliasElem.getAttribute("extension-id");
         if (LOG.isTraceEnabled()) {
-          LOG.trace("Found alias: plugin-id: " + parsePluginId +
-                    ", extension-id: " + extensionId);
+          LOG.trace("Found alias: plugin-id: " + parsePluginId
+              + ", extension-id: " + extensionId);
         }
         if (parsePluginId != null && extensionId != null) {
           aliases.put(parsePluginId, extensionId);
@@ -275,5 +274,5 @@
     }
     return aliases;
   }
-  
+
 }
Index: src/java/org/apache/nutch/parse/ParseData.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseData.java	(revision 1188252)
+++ src/java/org/apache/nutch/parse/ParseData.java	(working copy)
@@ -30,8 +30,9 @@
 import org.apache.nutch.metadata.Metadata;
 import org.apache.nutch.util.NutchConfiguration;
 
-
-/** Data extracted from a page's content.
+/**
+ * Data extracted from a page's content.
+ * 
  * @see Parse#getData()
  */
 public final class ParseData extends VersionedWritable {
@@ -45,19 +46,19 @@
   private Metadata parseMeta;
   private ParseStatus status;
   private byte version = VERSION;
-  
+
   public ParseData() {
     contentMeta = new Metadata();
     parseMeta = new Metadata();
   }
 
   public ParseData(ParseStatus status, String title, Outlink[] outlinks,
-                   Metadata contentMeta) {
+      Metadata contentMeta) {
     this(status, title, outlinks, contentMeta, new Metadata());
   }
-  
+
   public ParseData(ParseStatus status, String title, Outlink[] outlinks,
-                   Metadata contentMeta, Metadata parseMeta) {
+      Metadata contentMeta, Metadata parseMeta) {
     this.status = status;
     this.title = title;
     this.outlinks = outlinks;
@@ -70,33 +71,43 @@
   //
 
   /** The status of parsing the page. */
-  public ParseStatus getStatus() { return status; }
-  
+  public ParseStatus getStatus() {
+    return status;
+  }
+
   /** The title of the page. */
-  public String getTitle() { return title; }
+  public String getTitle() {
+    return title;
+  }
 
   /** The outlinks of the page. */
-  public Outlink[] getOutlinks() { return outlinks; }
+  public Outlink[] getOutlinks() {
+    return outlinks;
+  }
 
   /** The original Metadata retrieved from content */
-  public Metadata getContentMeta() { return contentMeta; }
+  public Metadata getContentMeta() {
+    return contentMeta;
+  }
 
   /**
-   * Other content properties.
-   * This is the place to find format-specific properties.
-   * Different parser implementations for different content types will populate
-   * this differently.
+   * Other content properties. This is the place to find format-specific
+   * properties. Different parser implementations for different content types
+   * will populate this differently.
    */
-  public Metadata getParseMeta() { return parseMeta; }
-  
+  public Metadata getParseMeta() {
+    return parseMeta;
+  }
+
   public void setParseMeta(Metadata parseMeta) {
     this.parseMeta = parseMeta;
   }
-  
+
   /**
-   * Get a metadata single value.
-   * This method first looks for the metadata value in the parse metadata. If no
-   * value is found it the looks for the metadata in the content metadata.
+   * Get a metadata single value. This method first looks for the metadata value
+   * in the parse metadata. If no value is found it the looks for the metadata
+   * in the content metadata.
+   * 
    * @see #getContentMeta()
    * @see #getParseMeta()
    */
@@ -107,12 +118,14 @@
     }
     return value;
   }
-  
+
   //
   // Writable methods
   //
 
-  public byte getVersion() { return version; }
+  public byte getVersion() {
+    return version;
+  }
 
   public final void readFields(DataInput in) throws IOException {
 
@@ -121,16 +134,16 @@
     if (version != VERSION)
       throw new VersionMismatchException(VERSION, version);
     status = ParseStatus.read(in);
-    title = Text.readString(in);                   // read title
+    title = Text.readString(in); // read title
 
-    int numOutlinks = in.readInt();    
+    int numOutlinks = in.readInt();
     outlinks = new Outlink[numOutlinks];
     for (int i = 0; i < numOutlinks; i++) {
       outlinks[i] = Outlink.read(in);
     }
-    
+
     if (version < 3) {
-      int propertyCount = in.readInt();             // read metadata
+      int propertyCount = in.readInt(); // read metadata
       contentMeta.clear();
       for (int i = 0; i < propertyCount; i++) {
         contentMeta.add(Text.readString(in), Text.readString(in));
@@ -146,15 +159,15 @@
   }
 
   public final void write(DataOutput out) throws IOException {
-    out.writeByte(VERSION);                       // write version
-    status.write(out);                            // write status
-    Text.writeString(out, title);                 // write title
+    out.writeByte(VERSION); // write version
+    status.write(out); // write status
+    Text.writeString(out, title); // write title
 
-    out.writeInt(outlinks.length);                // write outlinks
+    out.writeInt(outlinks.length); // write outlinks
     for (int i = 0; i < outlinks.length; i++) {
       outlinks[i].write(out);
     }
-    contentMeta.write(out);                      // write content metadata
+    contentMeta.write(out); // write content metadata
     parseMeta.write(out);
   }
 
@@ -171,38 +184,36 @@
   public boolean equals(Object o) {
     if (!(o instanceof ParseData))
       return false;
-    ParseData other = (ParseData)o;
-    return
-      this.status.equals(other.status) &&
-      this.title.equals(other.title) &&
-      Arrays.equals(this.outlinks, other.outlinks) &&
-      this.contentMeta.equals(other.contentMeta) &&
-      this.parseMeta.equals(other.parseMeta);
+    ParseData other = (ParseData) o;
+    return this.status.equals(other.status) && this.title.equals(other.title)
+        && Arrays.equals(this.outlinks, other.outlinks)
+        && this.contentMeta.equals(other.contentMeta)
+        && this.parseMeta.equals(other.parseMeta);
   }
 
   public String toString() {
     StringBuffer buffer = new StringBuffer();
 
-    buffer.append("Version: " + version + "\n" );
-    buffer.append("Status: " + status + "\n" );
-    buffer.append("Title: " + title + "\n" );
+    buffer.append("Version: " + version + "\n");
+    buffer.append("Status: " + status + "\n");
+    buffer.append("Title: " + title + "\n");
 
     if (outlinks != null) {
-      buffer.append("Outlinks: " + outlinks.length + "\n" );
+      buffer.append("Outlinks: " + outlinks.length + "\n");
       for (int i = 0; i < outlinks.length; i++) {
         buffer.append("  outlink: " + outlinks[i] + "\n");
       }
     }
 
-    buffer.append("Content Metadata: " + contentMeta + "\n" );
-    buffer.append("Parse Metadata: " + parseMeta + "\n" );
+    buffer.append("Content Metadata: " + contentMeta + "\n");
+    buffer.append("Parse Metadata: " + parseMeta + "\n");
 
     return buffer.toString();
   }
 
   public static void main(String argv[]) throws Exception {
     String usage = "ParseData (-local | -dfs <namenode:port>) recno segment";
-    
+
     if (argv.length < 3) {
       System.out.println("usage:" + usage);
       return;
@@ -210,13 +221,12 @@
 
     Options opts = new Options();
     Configuration conf = NutchConfiguration.create();
-    
-    GenericOptionsParser parser =
-      new GenericOptionsParser(conf, opts, argv);
-    
+
+    GenericOptionsParser parser = new GenericOptionsParser(conf, opts, argv);
+
     String[] remainingArgs = parser.getRemainingArgs();
     FileSystem fs = FileSystem.get(conf);
-    
+
     try {
       int recno = Integer.parseInt(remainingArgs[0]);
       String segment = remainingArgs[1];
Index: src/java/org/apache/nutch/parse/ParseOutputFormat.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseOutputFormat.java	(revision 1188252)
+++ src/java/org/apache/nutch/parse/ParseOutputFormat.java	(working copy)
@@ -45,25 +45,26 @@
 
 /* Parse content in a segment. */
 public class ParseOutputFormat implements OutputFormat<Text, Parse> {
-  private static final Logger LOG = LoggerFactory.getLogger(ParseOutputFormat.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(ParseOutputFormat.class);
 
   private URLFilters filters;
   private URLNormalizers normalizers;
   private ScoringFilters scfilters;
-  
+
   private static class SimpleEntry implements Entry<Text, CrawlDatum> {
     private Text key;
     private CrawlDatum value;
-    
+
     public SimpleEntry(Text key, CrawlDatum value) {
       this.key = key;
       this.value = value;
     }
-    
+
     public Text getKey() {
       return key;
     }
-    
+
     public CrawlDatum getValue() {
       return value;
     }
@@ -75,215 +76,219 @@
   }
 
   public void checkOutputSpecs(FileSystem fs, JobConf job) throws IOException {
-      Path out = FileOutputFormat.getOutputPath(job);
-      if ((out == null) && (job.getNumReduceTasks() != 0)) {
-          throw new InvalidJobConfException(
-                  "Output directory not set in JobConf.");
-      }
-      if (fs == null) {
-          fs = out.getFileSystem(job);
-      }
-      if (fs.exists(new Path(out, CrawlDatum.PARSE_DIR_NAME)))
-          throw new IOException("Segment already parsed!");
+    Path out = FileOutputFormat.getOutputPath(job);
+    if ((out == null) && (job.getNumReduceTasks() != 0)) {
+      throw new InvalidJobConfException("Output directory not set in JobConf.");
+    }
+    if (fs == null) {
+      fs = out.getFileSystem(job);
+    }
+    if (fs.exists(new Path(out, CrawlDatum.PARSE_DIR_NAME)))
+      throw new IOException("Segment already parsed!");
   }
 
   public RecordWriter<Text, Parse> getRecordWriter(FileSystem fs, JobConf job,
-                                      String name, Progressable progress) throws IOException {
+      String name, Progressable progress) throws IOException {
 
     this.filters = new URLFilters(job);
     this.normalizers = new URLNormalizers(job, URLNormalizers.SCOPE_OUTLINK);
     this.scfilters = new ScoringFilters(job);
     final int interval = job.getInt("db.fetch.interval.default", 2592000);
-    final boolean ignoreExternalLinks = job.getBoolean("db.ignore.external.links", false);
+    final boolean ignoreExternalLinks = job.getBoolean(
+        "db.ignore.external.links", false);
     int maxOutlinksPerPage = job.getInt("db.max.outlinks.per.page", 100);
     final int maxOutlinks = (maxOutlinksPerPage < 0) ? Integer.MAX_VALUE
-                                                     : maxOutlinksPerPage;
-    final CompressionType compType = SequenceFileOutputFormat.getOutputCompressionType(job);
+        : maxOutlinksPerPage;
+    final CompressionType compType = SequenceFileOutputFormat
+        .getOutputCompressionType(job);
     Path out = FileOutputFormat.getOutputPath(job);
-    
+
     Path text = new Path(new Path(out, ParseText.DIR_NAME), name);
     Path data = new Path(new Path(out, ParseData.DIR_NAME), name);
     Path crawl = new Path(new Path(out, CrawlDatum.PARSE_DIR_NAME), name);
-    
-    final String[] parseMDtoCrawlDB = job.get("db.parsemeta.to.crawldb","").split(" *, *");
-    
-    final MapFile.Writer textOut =
-      new MapFile.Writer(job, fs, text.toString(), Text.class, ParseText.class,
-          CompressionType.RECORD, progress);
-    
-    final MapFile.Writer dataOut =
-      new MapFile.Writer(job, fs, data.toString(), Text.class, ParseData.class,
-          compType, progress);
-    
-    final SequenceFile.Writer crawlOut =
-      SequenceFile.createWriter(fs, job, crawl, Text.class, CrawlDatum.class,
-          compType, progress);
-    
+
+    final String[] parseMDtoCrawlDB = job.get("db.parsemeta.to.crawldb", "")
+        .split(" *, *");
+
+    final MapFile.Writer textOut = new MapFile.Writer(job, fs, text.toString(),
+        Text.class, ParseText.class, CompressionType.RECORD, progress);
+
+    final MapFile.Writer dataOut = new MapFile.Writer(job, fs, data.toString(),
+        Text.class, ParseData.class, compType, progress);
+
+    final SequenceFile.Writer crawlOut = SequenceFile.createWriter(fs, job,
+        crawl, Text.class, CrawlDatum.class, compType, progress);
+
     return new RecordWriter<Text, Parse>() {
 
+      public void write(Text key, Parse parse) throws IOException {
 
-        public void write(Text key, Parse parse)
-          throws IOException {
-          
-          String fromUrl = key.toString();
-          String fromHost = null; 
-          String toHost = null;          
-          textOut.append(key, new ParseText(parse.getText()));
-          
-          ParseData parseData = parse.getData();
-          // recover the signature prepared by Fetcher or ParseSegment
-          String sig = parseData.getContentMeta().get(Nutch.SIGNATURE_KEY);
-          if (sig != null) {
-            byte[] signature = StringUtil.fromHexString(sig);
-            if (signature != null) {
-              // append a CrawlDatum with a signature
-              CrawlDatum d = new CrawlDatum(CrawlDatum.STATUS_SIGNATURE, 0);
-              d.setSignature(signature);
-              crawlOut.append(key, d);
-            }
+        String fromUrl = key.toString();
+        String fromHost = null;
+        String toHost = null;
+        textOut.append(key, new ParseText(parse.getText()));
+
+        ParseData parseData = parse.getData();
+        // recover the signature prepared by Fetcher or ParseSegment
+        String sig = parseData.getContentMeta().get(Nutch.SIGNATURE_KEY);
+        if (sig != null) {
+          byte[] signature = StringUtil.fromHexString(sig);
+          if (signature != null) {
+            // append a CrawlDatum with a signature
+            CrawlDatum d = new CrawlDatum(CrawlDatum.STATUS_SIGNATURE, 0);
+            d.setSignature(signature);
+            crawlOut.append(key, d);
           }
-          
+        }
+
         // see if the parse metadata contain things that we'd like
         // to pass to the metadata of the crawlDB entry
         CrawlDatum parseMDCrawlDatum = null;
         for (String mdname : parseMDtoCrawlDB) {
           String mdvalue = parse.getData().getParseMeta().get(mdname);
           if (mdvalue != null) {
-            if (parseMDCrawlDatum == null) parseMDCrawlDatum = new CrawlDatum(
-                CrawlDatum.STATUS_PARSE_META, 0);
+            if (parseMDCrawlDatum == null)
+              parseMDCrawlDatum = new CrawlDatum(CrawlDatum.STATUS_PARSE_META,
+                  0);
             parseMDCrawlDatum.getMetaData().put(new Text(mdname),
                 new Text(mdvalue));
           }
         }
-        if (parseMDCrawlDatum != null) crawlOut.append(key, parseMDCrawlDatum);
+        if (parseMDCrawlDatum != null)
+          crawlOut.append(key, parseMDCrawlDatum);
 
-          try {
-            ParseStatus pstatus = parseData.getStatus();
-            if (pstatus != null && pstatus.isSuccess() &&
-                pstatus.getMinorCode() == ParseStatus.SUCCESS_REDIRECT) {
-              String newUrl = pstatus.getMessage();
-              int refreshTime = Integer.valueOf(pstatus.getArgs()[1]);
-              try {
-                newUrl = normalizers.normalize(newUrl,
-                    URLNormalizers.SCOPE_FETCHER);
-              } catch (MalformedURLException mfue) {
-                newUrl = null;
+        try {
+          ParseStatus pstatus = parseData.getStatus();
+          if (pstatus != null && pstatus.isSuccess()
+              && pstatus.getMinorCode() == ParseStatus.SUCCESS_REDIRECT) {
+            String newUrl = pstatus.getMessage();
+            int refreshTime = Integer.valueOf(pstatus.getArgs()[1]);
+            try {
+              newUrl = normalizers.normalize(newUrl,
+                  URLNormalizers.SCOPE_FETCHER);
+            } catch (MalformedURLException mfue) {
+              newUrl = null;
+            }
+            if (newUrl != null)
+              newUrl = filters.filter(newUrl);
+            String url = key.toString();
+            if (newUrl != null && !newUrl.equals(url)) {
+              String reprUrl = URLUtil.chooseRepr(url, newUrl,
+                  refreshTime < Fetcher.PERM_REFRESH_TIME);
+              CrawlDatum newDatum = new CrawlDatum();
+              newDatum.setStatus(CrawlDatum.STATUS_LINKED);
+              if (reprUrl != null && !reprUrl.equals(newUrl)) {
+                newDatum.getMetaData().put(Nutch.WRITABLE_REPR_URL_KEY,
+                    new Text(reprUrl));
               }
-              if (newUrl != null) newUrl = filters.filter(newUrl);
-              String url = key.toString();
-              if (newUrl != null && !newUrl.equals(url)) {
-                String reprUrl =
-                  URLUtil.chooseRepr(url, newUrl,
-                                     refreshTime < Fetcher.PERM_REFRESH_TIME);
-                CrawlDatum newDatum = new CrawlDatum();
-                newDatum.setStatus(CrawlDatum.STATUS_LINKED);
-                if (reprUrl != null && !reprUrl.equals(newUrl)) {
-                  newDatum.getMetaData().put(Nutch.WRITABLE_REPR_URL_KEY,
-                                             new Text(reprUrl));
-                }
-                crawlOut.append(new Text(newUrl), newDatum);
-              }
+              crawlOut.append(new Text(newUrl), newDatum);
             }
-          } catch (URLFilterException e) {
-            // ignore
           }
+        } catch (URLFilterException e) {
+          // ignore
+        }
 
-          // collect outlinks for subsequent db update
-          Outlink[] links = parseData.getOutlinks();
-          int outlinksToStore = Math.min(maxOutlinks, links.length);
+        // collect outlinks for subsequent db update
+        Outlink[] links = parseData.getOutlinks();
+        int outlinksToStore = Math.min(maxOutlinks, links.length);
+        if (ignoreExternalLinks) {
+          try {
+            fromHost = new URL(fromUrl).getHost().toLowerCase();
+          } catch (MalformedURLException e) {
+            fromHost = null;
+          }
+        } else {
+          fromHost = null;
+        }
+
+        int validCount = 0;
+        CrawlDatum adjust = null;
+        List<Entry<Text, CrawlDatum>> targets = new ArrayList<Entry<Text, CrawlDatum>>(
+            outlinksToStore);
+        List<Outlink> outlinkList = new ArrayList<Outlink>(outlinksToStore);
+        for (int i = 0; i < links.length && validCount < outlinksToStore; i++) {
+          String toUrl = links[i].getToUrl();
+          // ignore links to self (or anchors within the page)
+          if (fromUrl.equals(toUrl)) {
+            continue;
+          }
           if (ignoreExternalLinks) {
             try {
-              fromHost = new URL(fromUrl).getHost().toLowerCase();
+              toHost = new URL(toUrl).getHost().toLowerCase();
             } catch (MalformedURLException e) {
-              fromHost = null;
+              toHost = null;
             }
-          } else {
-            fromHost = null;
+            if (toHost == null || !toHost.equals(fromHost)) { // external links
+              continue; // skip it
+            }
           }
-
-          int validCount = 0;
-          CrawlDatum adjust = null;
-          List<Entry<Text, CrawlDatum>> targets = new ArrayList<Entry<Text, CrawlDatum>>(outlinksToStore);
-          List<Outlink> outlinkList = new ArrayList<Outlink>(outlinksToStore);
-          for (int i = 0; i < links.length && validCount < outlinksToStore; i++) {
-            String toUrl = links[i].getToUrl();
-            // ignore links to self (or anchors within the page)
-            if (fromUrl.equals(toUrl)) {
+          try {
+            toUrl = normalizers.normalize(toUrl, URLNormalizers.SCOPE_OUTLINK); // normalize
+                                                                                // the
+                                                                                // url
+            toUrl = filters.filter(toUrl); // filter the url
+            if (toUrl == null) {
               continue;
             }
-            if (ignoreExternalLinks) {
-              try {
-                toHost = new URL(toUrl).getHost().toLowerCase();
-              } catch (MalformedURLException e) {
-                toHost = null;
-              }
-              if (toHost == null || !toHost.equals(fromHost)) { // external links
-                continue; // skip it
-              }
-            }
-            try {
-              toUrl = normalizers.normalize(toUrl,
-                          URLNormalizers.SCOPE_OUTLINK); // normalize the url
-              toUrl = filters.filter(toUrl);   // filter the url
-              if (toUrl == null) {
-                continue;
-              }
-            } catch (Exception e) {
-              continue;
-            }
-            CrawlDatum target = new CrawlDatum(CrawlDatum.STATUS_LINKED, interval);
-            Text targetUrl = new Text(toUrl);
-            try {
-              scfilters.initialScore(targetUrl, target);
-            } catch (ScoringFilterException e) {
-              LOG.warn("Cannot filter init score for url " + key +
-                       ", using default: " + e.getMessage());
-              target.setScore(0.0f);
-            }
-            
-            targets.add(new SimpleEntry(targetUrl, target));
-            outlinkList.add(links[i]);
-            validCount++;
+          } catch (Exception e) {
+            continue;
           }
+          CrawlDatum target = new CrawlDatum(CrawlDatum.STATUS_LINKED, interval);
+          Text targetUrl = new Text(toUrl);
           try {
-            // compute score contributions and adjustment to the original score
-            adjust = scfilters.distributeScoreToOutlinks((Text)key, parseData, 
-                      targets, null, links.length);
+            scfilters.initialScore(targetUrl, target);
           } catch (ScoringFilterException e) {
-            LOG.warn("Cannot distribute score from " + key + ": " + e.getMessage());
+            LOG.warn("Cannot filter init score for url " + key
+                + ", using default: " + e.getMessage());
+            target.setScore(0.0f);
           }
-          for (Entry<Text, CrawlDatum> target : targets) {
-            crawlOut.append(target.getKey(), target.getValue());
-          }
-          if (adjust != null) crawlOut.append(key, adjust);
 
-          Outlink[] filteredLinks = outlinkList.toArray(new Outlink[outlinkList.size()]);
-          parseData = new ParseData(parseData.getStatus(), parseData.getTitle(), 
-                                    filteredLinks, parseData.getContentMeta(), 
-                                    parseData.getParseMeta());
-          dataOut.append(key, parseData);
-          if (!parse.isCanonical()) {
-            CrawlDatum datum = new CrawlDatum();
-            datum.setStatus(CrawlDatum.STATUS_FETCH_SUCCESS);
-            String timeString = parse.getData().getContentMeta().get(Nutch.FETCH_TIME_KEY);
-            try {
-              datum.setFetchTime(Long.parseLong(timeString));
-            } catch (Exception e) {
-              LOG.warn("Can't read fetch time for: " + key);
-              datum.setFetchTime(System.currentTimeMillis());
-            }
-            crawlOut.append(key, datum);
+          targets.add(new SimpleEntry(targetUrl, target));
+          outlinkList.add(links[i]);
+          validCount++;
+        }
+        try {
+          // compute score contributions and adjustment to the original score
+          adjust = scfilters.distributeScoreToOutlinks((Text) key, parseData,
+              targets, null, links.length);
+        } catch (ScoringFilterException e) {
+          LOG.warn("Cannot distribute score from " + key + ": "
+              + e.getMessage());
+        }
+        for (Entry<Text, CrawlDatum> target : targets) {
+          crawlOut.append(target.getKey(), target.getValue());
+        }
+        if (adjust != null)
+          crawlOut.append(key, adjust);
+
+        Outlink[] filteredLinks = outlinkList.toArray(new Outlink[outlinkList
+            .size()]);
+        parseData = new ParseData(parseData.getStatus(), parseData.getTitle(),
+            filteredLinks, parseData.getContentMeta(), parseData.getParseMeta());
+        dataOut.append(key, parseData);
+        if (!parse.isCanonical()) {
+          CrawlDatum datum = new CrawlDatum();
+          datum.setStatus(CrawlDatum.STATUS_FETCH_SUCCESS);
+          String timeString = parse.getData().getContentMeta()
+              .get(Nutch.FETCH_TIME_KEY);
+          try {
+            datum.setFetchTime(Long.parseLong(timeString));
+          } catch (Exception e) {
+            LOG.warn("Can't read fetch time for: " + key);
+            datum.setFetchTime(System.currentTimeMillis());
           }
+          crawlOut.append(key, datum);
         }
-        
-        public void close(Reporter reporter) throws IOException {
-          textOut.close();
-          dataOut.close();
-          crawlOut.close();
-        }
-        
-      };
-    
+      }
+
+      public void close(Reporter reporter) throws IOException {
+        textOut.close();
+        dataOut.close();
+        crawlOut.close();
+      }
+
+    };
+
   }
 
 }
Index: src/java/org/apache/nutch/parse/HtmlParseFilter.java
===================================================================
--- src/java/org/apache/nutch/parse/HtmlParseFilter.java	(revision 1188252)
+++ src/java/org/apache/nutch/parse/HtmlParseFilter.java	(working copy)
@@ -27,16 +27,19 @@
 import org.apache.nutch.plugin.Pluggable;
 import org.apache.nutch.protocol.Content;
 
-
-/** Extension point for DOM-based HTML parsers.  Permits one to add additional
- * metadata to HTML parses.  All plugins found which implement this extension
+/**
+ * Extension point for DOM-based HTML parsers. Permits one to add additional
+ * metadata to HTML parses. All plugins found which implement this extension
  * point are run sequentially on the parse.
  */
 public interface HtmlParseFilter extends Pluggable, Configurable {
   /** The name of the extension point. */
   final static String X_POINT_ID = HtmlParseFilter.class.getName();
 
-  /** Adds metadata or otherwise modifies a parse of HTML content, given
-   * the DOM tree of a page. */
-  ParseResult filter(Content content, ParseResult parseResult, HTMLMetaTags metaTags, DocumentFragment doc);
+  /**
+   * Adds metadata or otherwise modifies a parse of HTML content, given the DOM
+   * tree of a page.
+   */
+  ParseResult filter(Content content, ParseResult parseResult,
+      HTMLMetaTags metaTags, DocumentFragment doc);
 }
Index: src/java/org/apache/nutch/parse/ParseUtil.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseUtil.java	(revision 1188252)
+++ src/java/org/apache/nutch/parse/ParseUtil.java	(working copy)
@@ -27,124 +27,133 @@
 import org.apache.hadoop.conf.Configuration;
 import org.apache.nutch.protocol.Content;
 
-
 /**
  * A Utility class containing methods to simply perform parsing utilities such
  * as iterating through a preferred list of {@link Parser}s to obtain
  * {@link Parse} objects.
- *
+ * 
  * @author mattmann
  * @author J&eacute;r&ocirc;me Charron
  * @author S&eacute;bastien Le Callonnec
  */
 public class ParseUtil {
-  
+
   /* our log stream */
   public static final Logger LOG = LoggerFactory.getLogger(ParseUtil.class);
   private ParserFactory parserFactory;
   /** Parser timeout set to 30 sec by default. Set -1 to deactivate **/
   private int MAX_PARSE_TIME = 30;
-  
+
   /**
    * 
    * @param conf
    */
   public ParseUtil(Configuration conf) {
     this.parserFactory = new ParserFactory(conf);
-    MAX_PARSE_TIME=conf.getInt("parser.timeout", 30);
+    MAX_PARSE_TIME = conf.getInt("parser.timeout", 30);
   }
-  
+
   /**
    * Performs a parse by iterating through a List of preferred {@link Parser}s
    * until a successful parse is performed and a {@link Parse} object is
    * returned. If the parse is unsuccessful, a message is logged to the
    * <code>WARNING</code> level, and an empty parse is returned.
-   *
-   * @param content The content to try and parse.
+   * 
+   * @param content
+   *          The content to try and parse.
    * @return &lt;key, {@link Parse}&gt; pairs.
-   * @throws ParseException If no suitable parser is found to perform the parse.
+   * @throws ParseException
+   *           If no suitable parser is found to perform the parse.
    */
   public ParseResult parse(Content content) throws ParseException {
     Parser[] parsers = null;
-    
+
     try {
-      parsers = this.parserFactory.getParsers(content.getContentType(), 
-	         content.getUrl() != null ? content.getUrl():"");
+      parsers = this.parserFactory.getParsers(content.getContentType(),
+          content.getUrl() != null ? content.getUrl() : "");
     } catch (ParserNotFound e) {
       if (LOG.isWarnEnabled()) {
-        LOG.warn("No suitable parser found when trying to parse content " + content.getUrl() +
-               " of type " + content.getContentType());
+        LOG.warn("No suitable parser found when trying to parse content "
+            + content.getUrl() + " of type " + content.getContentType());
       }
       throw new ParseException(e.getMessage());
     }
-    
+
     ParseResult parseResult = null;
-    for (int i=0; i<parsers.length; i++) {
+    for (int i = 0; i < parsers.length; i++) {
       if (LOG.isDebugEnabled()) {
-        LOG.debug("Parsing [" + content.getUrl() + "] with [" + parsers[i] + "]");
+        LOG.debug("Parsing [" + content.getUrl() + "] with [" + parsers[i]
+            + "]");
       }
-      if (MAX_PARSE_TIME!=-1)
-      	parseResult = runParser(parsers[i], content);
-      else 
-      	parseResult = parsers[i].getParse(content);
+      if (MAX_PARSE_TIME != -1)
+        parseResult = runParser(parsers[i], content);
+      else
+        parseResult = parsers[i].getParse(content);
 
       if (parseResult != null && !parseResult.isEmpty())
         return parseResult;
     }
-   
-    if (LOG.isWarnEnabled()) { 
-      LOG.warn("Unable to successfully parse content " + content.getUrl() +
-               " of type " + content.getContentType());
+
+    if (LOG.isWarnEnabled()) {
+      LOG.warn("Unable to successfully parse content " + content.getUrl()
+          + " of type " + content.getContentType());
     }
-    return new ParseStatus(new ParseException("Unable to successfully parse content")).getEmptyParseResult(content.getUrl(), null);
+    return new ParseStatus(new ParseException(
+        "Unable to successfully parse content")).getEmptyParseResult(
+        content.getUrl(), null);
   }
-    
+
   /**
    * Method parses a {@link Content} object using the {@link Parser} specified
-   * by the parameter <code>extId</code>, i.e., the Parser's extension ID.
-   * If a suitable {@link Parser} is not found, then a <code>WARNING</code>
-   * level message is logged, and a ParseException is thrown. If the parse is
-   * uncessful for any other reason, then a <code>WARNING</code> level
-   * message is logged, and a <code>ParseStatus.getEmptyParse()</code> is
-   * returned.
-   *
-   * @param extId The extension implementation ID of the {@link Parser} to use
-   *              to parse the specified content.
-   * @param content The content to parse.
-   *
-   * @return &lt;key, {@link Parse}&gt; pairs if the parse is successful, otherwise,
-   *         a single &lt;key, <code>ParseStatus.getEmptyParse()</code>&gt; pair.
-   *
-   * @throws ParseException If there is no suitable {@link Parser} found
-   *                        to perform the parse.
+   * by the parameter <code>extId</code>, i.e., the Parser's extension ID. If a
+   * suitable {@link Parser} is not found, then a <code>WARNING</code> level
+   * message is logged, and a ParseException is thrown. If the parse is
+   * uncessful for any other reason, then a <code>WARNING</code> level message
+   * is logged, and a <code>ParseStatus.getEmptyParse()</code> is returned.
+   * 
+   * @param extId
+   *          The extension implementation ID of the {@link Parser} to use to
+   *          parse the specified content.
+   * @param content
+   *          The content to parse.
+   * 
+   * @return &lt;key, {@link Parse}&gt; pairs if the parse is successful,
+   *         otherwise, a single &lt;key,
+   *         <code>ParseStatus.getEmptyParse()</code>&gt; pair.
+   * 
+   * @throws ParseException
+   *           If there is no suitable {@link Parser} found to perform the
+   *           parse.
    */
   public ParseResult parseByExtensionId(String extId, Content content)
-  throws ParseException {
+      throws ParseException {
     Parser p = null;
-    
+
     try {
       p = this.parserFactory.getParserById(extId);
     } catch (ParserNotFound e) {
       if (LOG.isWarnEnabled()) {
-        LOG.warn("No suitable parser found when trying to parse content " + content.getUrl() +
-            " of type " + content.getContentType());
+        LOG.warn("No suitable parser found when trying to parse content "
+            + content.getUrl() + " of type " + content.getContentType());
       }
       throw new ParseException(e.getMessage());
     }
-    
+
     ParseResult parseResult = null;
-    if (MAX_PARSE_TIME!=-1)
-    	parseResult = runParser(p, content);
-    else 
-    	parseResult = p.getParse(content);
+    if (MAX_PARSE_TIME != -1)
+      parseResult = runParser(p, content);
+    else
+      parseResult = p.getParse(content);
     if (parseResult != null && !parseResult.isEmpty()) {
       return parseResult;
     } else {
       if (LOG.isWarnEnabled()) {
-        LOG.warn("Unable to successfully parse content " + content.getUrl() +
-            " of type " + content.getContentType());
-      }  
-      return new ParseStatus(new ParseException("Unable to successfully parse content")).getEmptyParseResult(content.getUrl(), null);
+        LOG.warn("Unable to successfully parse content " + content.getUrl()
+            + " of type " + content.getContentType());
+      }
+      return new ParseStatus(new ParseException(
+          "Unable to successfully parse content")).getEmptyParseResult(
+          content.getUrl(), null);
     }
   }
 
@@ -168,5 +177,4 @@
     }
     return res;
   }
-  
 }
Index: src/java/org/apache/nutch/parse/ParserNotFound.java
===================================================================
--- src/java/org/apache/nutch/parse/ParserNotFound.java	(revision 1188252)
+++ src/java/org/apache/nutch/parse/ParserNotFound.java	(working copy)
@@ -18,17 +18,17 @@
 
 public class ParserNotFound extends ParseException {
 
-  private static final long serialVersionUID=23993993939L;
+  private static final long serialVersionUID = 23993993939L;
   private String url;
   private String contentType;
 
-  public ParserNotFound(String message){
-    super(message);    
+  public ParserNotFound(String message) {
+    super(message);
   }
-  
+
   public ParserNotFound(String url, String contentType) {
-    this(url, contentType,
-         "parser not found for contentType="+contentType+" url="+url);
+    this(url, contentType, "parser not found for contentType=" + contentType
+        + " url=" + url);
   }
 
   public ParserNotFound(String url, String contentType, String message) {
@@ -37,6 +37,11 @@
     this.contentType = contentType;
   }
 
-  public String getUrl() { return url; }
-  public String getContentType() { return contentType; }
+  public String getUrl() {
+    return url;
+  }
+
+  public String getContentType() {
+    return contentType;
+  }
 }
Index: src/java/org/apache/nutch/parse/ParseResult.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseResult.java	(revision 1188252)
+++ src/java/org/apache/nutch/parse/ParseResult.java	(working copy)
@@ -27,94 +27,116 @@
 import org.apache.hadoop.io.Text;
 
 /**
- * A utility class that stores result of a parse. Internally
- * a ParseResult stores &lt;{@link Text}, {@link Parse}&gt; pairs.
- * <p>Parsers may return multiple results, which correspond to parts
- * or other associated documents related to the original URL.</p>
- * <p>There will be usually one parse result that corresponds directly
- * to the original URL, and possibly many (or none) results that correspond
- * to derived URLs (or sub-URLs).
+ * A utility class that stores result of a parse. Internally a ParseResult
+ * stores &lt;{@link Text}, {@link Parse}&gt; pairs.
+ * <p>
+ * Parsers may return multiple results, which correspond to parts or other
+ * associated documents related to the original URL.
+ * </p>
+ * <p>
+ * There will be usually one parse result that corresponds directly to the
+ * original URL, and possibly many (or none) results that correspond to derived
+ * URLs (or sub-URLs).
  */
 public class ParseResult implements Iterable<Map.Entry<Text, Parse>> {
   private Map<Text, Parse> parseMap;
   private String originalUrl;
-  
+
   public static final Logger LOG = LoggerFactory.getLogger(ParseResult.class);
-  
+
   /**
    * Create a container for parse results.
-   * @param originalUrl the original url from which all parse results
-   * have been obtained.
+   * 
+   * @param originalUrl
+   *          the original url from which all parse results have been obtained.
    */
   public ParseResult(String originalUrl) {
     parseMap = new HashMap<Text, Parse>();
     this.originalUrl = originalUrl;
   }
-  
+
   /**
    * Convenience method for obtaining {@link ParseResult} from a single
    * {@link Parse} output.
-   * @param url canonical url
-   * @param parse single parse output
+   * 
+   * @param url
+   *          canonical url
+   * @param parse
+   *          single parse output
    * @return result containing the single parse output
    */
   public static ParseResult createParseResult(String url, Parse parse) {
     ParseResult parseResult = new ParseResult(url);
-    parseResult.put(new Text(url), new ParseText(parse.getText()), parse.getData());
+    parseResult.put(new Text(url), new ParseText(parse.getText()),
+        parse.getData());
     return parseResult;
   }
-  
+
   /**
    * Checks whether the result is empty.
+   * 
    * @return
    */
   public boolean isEmpty() {
     return parseMap.isEmpty();
   }
-  
+
   /**
    * Return the number of parse outputs (both successful and failed)
    */
   public int size() {
     return parseMap.size();
   }
-  
+
   /**
    * Retrieve a single parse output.
-   * @param key sub-url under which the parse output is stored.
+   * 
+   * @param key
+   *          sub-url under which the parse output is stored.
    * @return parse output corresponding to this sub-url, or null.
    */
   public Parse get(String key) {
     return get(new Text(key));
   }
-  
+
   /**
    * Retrieve a single parse output.
-   * @param key sub-url under which the parse output is stored.
+   * 
+   * @param key
+   *          sub-url under which the parse output is stored.
    * @return parse output corresponding to this sub-url, or null.
    */
   public Parse get(Text key) {
     return parseMap.get(key);
   }
-  
+
   /**
    * Store a result of parsing.
-   * @param key URL or sub-url of this parse result
-   * @param text plain text result
-   * @param data corresponding parse metadata of this result
+   * 
+   * @param key
+   *          URL or sub-url of this parse result
+   * @param text
+   *          plain text result
+   * @param data
+   *          corresponding parse metadata of this result
    */
   public void put(Text key, ParseText text, ParseData data) {
     put(key.toString(), text, data);
   }
-  
+
   /**
    * Store a result of parsing.
-   * @param key URL or sub-url of this parse result
-   * @param text plain text result
-   * @param data corresponding parse metadata of this result
+   * 
+   * @param key
+   *          URL or sub-url of this parse result
+   * @param text
+   *          plain text result
+   * @param data
+   *          corresponding parse metadata of this result
    */
   public void put(String key, ParseText text, ParseData data) {
-    parseMap.put(new Text(key), new ParseImpl(text, data, key.equals(originalUrl)));
+    parseMap.put(new Text(key),
+        new ParseImpl(text, data, key.equals(originalUrl)));
   }
 
   /**
@@ -123,21 +145,21 @@
   public Iterator<Entry<Text, Parse>> iterator() {
     return parseMap.entrySet().iterator();
   }
-  
+
   /**
-   * Remove all results where status is not successful (as determined
-   * by {@link ParseStatus#isSuccess()}). Note that effects of this operation
+   * Remove all results where status is not successful (as determined by
+   * {@link ParseStatus#isSuccess()}). Note that effects of this operation
    * cannot be reversed.
    */
   public void filter() {
-    for(Iterator<Entry<Text, Parse>> i = iterator(); i.hasNext();) {
+    for (Iterator<Entry<Text, Parse>> i = iterator(); i.hasNext();) {
       Entry<Text, Parse> entry = i.next();
       if (!entry.getValue().getData().getStatus().isSuccess()) {
         LOG.warn(entry.getKey() + " is not parsed successfully, filtering");
         i.remove();
       }
     }
-      
+
   }
 
   /**
@@ -145,7 +167,7 @@
    * Parse success is determined by {@link ParseStatus#isSuccess()}
    */
   public boolean isSuccess() {
-    for(Iterator<Entry<Text, Parse>> i = iterator(); i.hasNext();) {
+    for (Iterator<Entry<Text, Parse>> i = iterator(); i.hasNext();) {
       Entry<Text, Parse> entry = i.next();
       if (!entry.getValue().getData().getStatus().isSuccess()) {
         return false;
Index: src/java/org/apache/nutch/parse/Parse.java
===================================================================
--- src/java/org/apache/nutch/parse/Parse.java	(revision 1188252)
+++ src/java/org/apache/nutch/parse/Parse.java	(working copy)
@@ -17,18 +17,22 @@
 
 package org.apache.nutch.parse;
 
-/** The result of parsing a page's raw content.
+/**
+ * The result of parsing a page's raw content.
+ * 
  * @see Parser#getParse(Content)
  */
 public interface Parse {
-  
-  /** The textual content of the page. This is indexed, searched, and used when
-   * generating snippets.*/ 
+
+  /**
+   * The textual content of the page. This is indexed, searched, and used when
+   * generating snippets.
+   */
   String getText();
 
   /** Other data extracted from the page. */
   ParseData getData();
-  
+
   /** Indicates if the parse is coming from a url or a sub-url */
   boolean isCanonical();
 }
Index: src/java/org/apache/nutch/parse/ParseStatus.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseStatus.java	(revision 1188252)
+++ src/java/org/apache/nutch/parse/ParseStatus.java	(working copy)
@@ -1,19 +1,19 @@
 /*
-* Licensed to the Apache Software Foundation (ASF) under one or more
-* contributor license agreements.  See the NOTICE file distributed with
-* this work for additional information regarding copyright ownership.
-* The ASF licenses this file to You under the Apache License, Version 2.0
-* (the "License"); you may not use this file except in compliance with
-* the License.  You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 /*
  * Created on Apr 28, 2005
  * Author: Andrzej Bialecki &lt;ab@getopt.org&gt;
@@ -32,113 +32,121 @@
 
 import org.apache.nutch.metadata.Metadata;
 
-
 /**
  * @author Andrzej Bialecki &lt;ab@getopt.org&gt;
  */
 public class ParseStatus implements Writable {
-  
+
   private final static byte VERSION = 2;
-  
+
   // Primary status codes:
-  
+
   /** Parsing was not performed. */
-  public static final byte NOTPARSED       = 0;
+  public static final byte NOTPARSED = 0;
   /** Parsing succeeded. */
-  public static final byte SUCCESS         = 1;
-  /** General failure. There may be a more specific error message in arguments. */
-  public static final byte FAILED          = 2;
-  
-  public static final String[] majorCodes = {
-          "notparsed",
-          "success",
-          "failed"
-  };
-  
+  public static final byte SUCCESS = 1;
+  /**
+   * General failure. There may be a more specific error message in arguments.
+   */
+  public static final byte FAILED = 2;
+
+  public static final String[] majorCodes = { "notparsed", "success", "failed" };
+
   // Secondary success codes go here:
-  
-  /** Parsed content contains a directive to redirect to another URL.
-   * The target URL can be retrieved from the arguments.
+
+  /**
+   * Parsed content contains a directive to redirect to another URL. The target
+   * URL can be retrieved from the arguments.
    */
-  public static final short SUCCESS_REDIRECT          = 100;
-  
+  public static final short SUCCESS_REDIRECT = 100;
+
   // Secondary failure codes go here:
-  
-  /** Parsing failed. An Exception occured (which may be retrieved from the arguments). */
-  public static final short FAILED_EXCEPTION          = 200;
-  /** Parsing failed. Content was truncated, but the parser cannot handle incomplete content. */
-  public static final short FAILED_TRUNCATED          = 202;
-  /** Parsing failed. Invalid format - the content may be corrupted or of wrong type. */
-  public static final short FAILED_INVALID_FORMAT     = 203;
-  /** Parsing failed. Other related parts of the content are needed to complete
+
+  /**
+   * Parsing failed. An Exception occured (which may be retrieved from the
+   * arguments).
+   */
+  public static final short FAILED_EXCEPTION = 200;
+  /**
+   * Parsing failed. Content was truncated, but the parser cannot handle
+   * incomplete content.
+   */
+  public static final short FAILED_TRUNCATED = 202;
+  /**
+   * Parsing failed. Invalid format - the content may be corrupted or of wrong
+   * type.
+   */
+  public static final short FAILED_INVALID_FORMAT = 203;
+  /**
+   * Parsing failed. Other related parts of the content are needed to complete
    * parsing. The list of URLs to missing parts may be provided in arguments.
    * The Fetcher may decide to fetch these parts at once, then put them into
    * Content.metadata, and supply them for re-parsing.
    */
-  public static final short FAILED_MISSING_PARTS      = 204;
-  /** Parsing failed. There was no content to be parsed - probably caused
-   * by errors at protocol stage.
+  public static final short FAILED_MISSING_PARTS = 204;
+  /**
+   * Parsing failed. There was no content to be parsed - probably caused by
+   * errors at protocol stage.
    */
-  public static final short FAILED_MISSING_CONTENT    = 205;
+  public static final short FAILED_MISSING_CONTENT = 205;
 
-
   public static final ParseStatus STATUS_NOTPARSED = new ParseStatus(NOTPARSED);
   public static final ParseStatus STATUS_SUCCESS = new ParseStatus(SUCCESS);
   public static final ParseStatus STATUS_FAILURE = new ParseStatus(FAILED);
-  
+
   private byte majorCode = 0;
   private short minorCode = 0;
   private String[] args = null;
-  
+
   public byte getVersion() {
     return VERSION;
   }
 
   public ParseStatus() {
-    
+
   }
-  
+
   public ParseStatus(int majorCode, int minorCode, String[] args) {
     this.args = args;
-    this.majorCode = (byte)majorCode;
-    this.minorCode = (short)minorCode;
+    this.majorCode = (byte) majorCode;
+    this.minorCode = (short) minorCode;
   }
-  
+
   public ParseStatus(int majorCode) {
-    this(majorCode, 0, (String[])null);
+    this(majorCode, 0, (String[]) null);
   }
-  
+
   public ParseStatus(int majorCode, String[] args) {
     this(majorCode, 0, args);
   }
-  
+
   public ParseStatus(int majorCode, int minorCode) {
-    this(majorCode, minorCode, (String[])null);
+    this(majorCode, minorCode, (String[]) null);
   }
-  
+
   /** Simplified constructor for passing just a text message. */
   public ParseStatus(int majorCode, int minorCode, String message) {
-    this(majorCode, minorCode, new String[]{message});
+    this(majorCode, minorCode, new String[] { message });
   }
-  
+
   /** Simplified constructor for passing just a text message. */
   public ParseStatus(int majorCode, String message) {
-    this(majorCode, 0, new String[]{message});
+    this(majorCode, 0, new String[] { message });
   }
-  
+
   public ParseStatus(Throwable t) {
-    this(FAILED, FAILED_EXCEPTION, new String[]{t.toString()});
+    this(FAILED, FAILED_EXCEPTION, new String[] { t.toString() });
   }
-  
+
   public static ParseStatus read(DataInput in) throws IOException {
     ParseStatus res = new ParseStatus();
     res.readFields(in);
     return res;
   }
-  
+
   public void readFields(DataInput in) throws IOException {
     byte version = in.readByte();
-    switch(version) {
+    switch (version) {
     case 1:
       majorCode = in.readByte();
       minorCode = in.readShort();
@@ -152,8 +160,8 @@
     default:
       throw new VersionMismatchException(VERSION, version);
     }
- }
-  
+  }
+
   public void write(DataOutput out) throws IOException {
     out.writeByte(VERSION);
     out.writeByte(majorCode);
@@ -164,55 +172,61 @@
       WritableUtils.writeStringArray(out, args);
     }
   }
-  
-  /** A convenience method. Returns true if majorCode is SUCCESS, false
+
+  /**
+   * A convenience method. Returns true if majorCode is SUCCESS, false
    * otherwise.
    */
-  
+
   public boolean isSuccess() {
     return majorCode == SUCCESS;
   }
-  
-  /** A convenience method. Return a String representation of the first
-   * argument, or null.
+
+  /**
+   * A convenience method. Return a String representation of the first argument,
+   * or null.
    */
   public String getMessage() {
     if (args != null && args.length > 0 && args[0] != null)
       return args[0];
     return null;
   }
-  
+
   public String[] getArgs() {
     return args;
   }
-  
+
   public int getMajorCode() {
     return majorCode;
   }
-  
+
   public int getMinorCode() {
     return minorCode;
   }
-  
-  /** A convenience method. Creates an empty Parse instance,
-   * which returns this status.
+
+  /**
+   * A convenience method. Creates an empty Parse instance, which returns this
+   * status.
    */
   public Parse getEmptyParse(Configuration conf) {
     return new EmptyParseImpl(this, conf);
   }
-  
-  /** A convenience method. Creates an empty ParseResult,
-   * which contains this status.
+
+  /**
+   * A convenience method. Creates an empty ParseResult, which contains this
+   * status.
    */
   public ParseResult getEmptyParseResult(String url, Configuration conf) {
     return ParseResult.createParseResult(url, getEmptyParse(conf));
   }
-  
+
   public String toString() {
     StringBuffer res = new StringBuffer();
     String name = null;
-    if (majorCode >= 0 && majorCode < majorCodes.length) name = majorCodes[majorCode];
-    else name = "UNKNOWN!";
+    if (majorCode >= 0 && majorCode < majorCodes.length)
+      name = majorCodes[majorCode];
+    else
+      name = "UNKNOWN!";
     res.append(name + "(" + majorCode + "," + minorCode + ")");
     if (args != null) {
       if (args.length == 1) {
@@ -226,18 +240,18 @@
     }
     return res.toString();
   }
-  
+
   public void setArgs(String[] args) {
     this.args = args;
   }
-  
+
   public void setMessage(String msg) {
     if (args == null || args.length == 0) {
       args = new String[1];
     }
     args[0] = msg;
   }
-  
+
   public void setMajorCode(byte majorCode) {
     this.majorCode = majorCode;
   }
@@ -245,37 +259,45 @@
   public void setMinorCode(short minorCode) {
     this.minorCode = minorCode;
   }
-  
+
   public boolean equals(Object o) {
-    if (o == null) return false;
-    if (!(o instanceof ParseStatus)) return false;
+    if (o == null)
+      return false;
+    if (!(o instanceof ParseStatus))
+      return false;
     boolean res = true;
-    ParseStatus other = (ParseStatus)o;
-    res = res && (this.majorCode == other.majorCode) &&
-      (this.minorCode == other.minorCode);
-    if (!res) return res;
+    ParseStatus other = (ParseStatus) o;
+    res = res && (this.majorCode == other.majorCode)
+        && (this.minorCode == other.minorCode);
+    if (!res)
+      return res;
     if (this.args == null) {
-      if (other.args == null) return true;
-      else return false;
+      if (other.args == null)
+        return true;
+      else
+        return false;
     } else {
-      if (other.args == null) return false;
-      if (other.args.length != this.args.length) return false;
+      if (other.args == null)
+        return false;
+      if (other.args.length != this.args.length)
+        return false;
       for (int i = 0; i < this.args.length; i++) {
-        if (!this.args[i].equals(other.args[i])) return false;
+        if (!this.args[i].equals(other.args[i]))
+          return false;
       }
     }
     return true;
   }
-  
+
   private static class EmptyParseImpl implements Parse {
-    
+
     private ParseData data = null;
-    
+
     public EmptyParseImpl(ParseStatus status, Configuration conf) {
-      data = new ParseData(status, "", new Outlink[0],
-                           new Metadata(), new Metadata());
+      data = new ParseData(status, "", new Outlink[0], new Metadata(),
+          new Metadata());
     }
-    
+
     public ParseData getData() {
       return data;
     }
@@ -283,10 +305,9 @@
     public String getText() {
       return "";
     }
-    
+
     public boolean isCanonical() {
       return true;
     }
   }
 }
-
Index: src/java/org/apache/nutch/parse/ParseText.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseText.java	(revision 1188252)
+++ src/java/org/apache/nutch/parse/ParseText.java	(working copy)
@@ -33,10 +33,12 @@
 
   private final static byte VERSION = 2;
 
-  public ParseText() {}
+  public ParseText() {
+  }
+
   private String text;
-    
-  public ParseText(String text){
+
+  public ParseText(String text) {
     this.text = text;
   }
 
@@ -68,12 +70,14 @@
   //
   // Accessor methods
   //
-  public String getText()  { return text; }
+  public String getText() {
+    return text;
+  }
 
   public boolean equals(Object o) {
     if (!(o instanceof ParseText))
       return false;
-    ParseText other = (ParseText)o;
+    ParseText other = (ParseText) o;
     return this.text.equals(other.text);
   }
 
@@ -90,12 +94,11 @@
     }
     Options opts = new Options();
     Configuration conf = NutchConfiguration.create();
-    
-    GenericOptionsParser parser =
-      new GenericOptionsParser(conf, opts, argv);
-    
+
+    GenericOptionsParser parser = new GenericOptionsParser(conf, opts, argv);
+
     String[] remainingArgs = parser.getRemainingArgs();
-    
+
     FileSystem fs = FileSystem.get(conf);
     try {
       int recno = Integer.parseInt(remainingArgs[0]);
Index: src/java/org/apache/nutch/parse/OutlinkExtractor.java
===================================================================
--- src/java/org/apache/nutch/parse/OutlinkExtractor.java	(revision 1188252)
+++ src/java/org/apache/nutch/parse/OutlinkExtractor.java	(working copy)
@@ -34,8 +34,8 @@
 import org.apache.oro.text.regex.Perl5Matcher;
 
 /**
- * Extractor to extract {@link org.apache.nutch.parse.Outlink}s 
- * / URLs from plain text using Regular Expressions.
+ * Extractor to extract {@link org.apache.nutch.parse.Outlink}s / URLs from
+ * plain text using Regular Expressions.
  * 
  * @see <a
  *      href="http://wiki.java.net/bin/view/Javapedia/RegularExpressions">Comparison
@@ -48,24 +48,27 @@
  * @since 0.7
  */
 public class OutlinkExtractor {
-  private static final Logger LOG = LoggerFactory.getLogger(OutlinkExtractor.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(OutlinkExtractor.class);
 
   /**
    * Regex pattern to get URLs within a plain text.
    * 
    * @see <a
    *      href="http://www.truerwords.net/articles/ut/urlactivation.html">http://www.truerwords.net/articles/ut/urlactivation.html
+
    *      </a>
    */
-  private static final String URL_PATTERN = 
-    "([A-Za-z][A-Za-z0-9+.-]{1,120}:[A-Za-z0-9/](([A-Za-z0-9$_.+!*,;/?:@&~=-])|%[A-Fa-f0-9]{2}){1,333}(#([a-zA-Z0-9][a-zA-Z0-9$_.+!*,;/?:@&~=%-]{0,1000}))?)";
+  private static final String URL_PATTERN = "([A-Za-z][A-Za-z0-9+.-]{1,120}:[A-Za-z0-9/](([A-Za-z0-9$_.+!*,;/?:@&~=-])|%[A-Fa-f0-9]{2}){1,333}(#([a-zA-Z0-9][a-zA-Z0-9$_.+!*,;/?:@&~=%-]{0,1000}))?)";
 
   /**
-   * Extracts <code>Outlink</code> from given plain text.
-   * Applying this method to non-plain-text can result in extremely lengthy
-   * runtimes for parasitic cases (postscript is a known example).
-   * @param plainText  the plain text from wich URLs should be extracted.
+   * Extracts <code>Outlink</code> from given plain text. Applying this method
+   * to non-plain-text can result in extremely lengthy runtimes for parasitic
+   * cases (postscript is a known example).
    * 
+   * @param plainText
+   *          the plain text from wich URLs should be extracted.
+   * 
    * @return Array of <code>Outlink</code>s within found in plainText
    */
   public static Outlink[] getOutlinks(final String plainText, Configuration conf) {
@@ -73,15 +76,18 @@
   }
 
   /**
-   * Extracts <code>Outlink</code> from given plain text and adds anchor
-   * to the extracted <code>Outlink</code>s
+   * Extracts <code>Outlink</code> from given plain text and adds anchor to the
+   * extracted <code>Outlink</code>s
    * 
-   * @param plainText the plain text from wich URLs should be extracted.
-   * @param anchor    the anchor of the url
+   * @param plainText
+   *          the plain text from wich URLs should be extracted.
+   * @param anchor
+   *          the anchor of the url
    * 
    * @return Array of <code>Outlink</code>s within found in plainText
    */
-  public static Outlink[] getOutlinks(final String plainText, String anchor, Configuration conf) {
+  public static Outlink[] getOutlinks(final String plainText, String anchor,
+      Configuration conf) {
     long start = System.currentTimeMillis();
     final List<Outlink> outlinks = new ArrayList<Outlink>();
 
@@ -97,11 +103,11 @@
       MatchResult result;
       String url;
 
-      //loop the matches
+      // loop the matches
       while (matcher.contains(input, pattern)) {
         // if this is taking too long, stop matching
-        //   (SHOULD really check cpu time used so that heavily loaded systems
-        //   do not unnecessarily hit this limit.)
+        // (SHOULD really check cpu time used so that heavily loaded systems
+        // do not unnecessarily hit this limit.)
         if (System.currentTimeMillis() - start >= 60000L) {
           if (LOG.isWarnEnabled()) {
             LOG.warn("Time limit exceeded for getOutLinks");
@@ -117,13 +123,16 @@
         }
       }
     } catch (Exception ex) {
-      // if the matcher fails (perhaps a malformed URL) we just log it and move on
-      if (LOG.isErrorEnabled()) { LOG.error("getOutlinks", ex); }
+      // if the matcher fails (perhaps a malformed URL) we just log it and move
+      // on
+      if (LOG.isErrorEnabled()) {
+        LOG.error("getOutlinks", ex);
+      }
     }
 
     final Outlink[] retval;
 
-    //create array of the Outlinks
+    // create array of the Outlinks
     if (outlinks != null && outlinks.size() > 0) {
       retval = outlinks.toArray(new Outlink[0]);
     } else {
@@ -132,7 +141,6 @@
 
     return retval;
   }
-  
 
   /**
    * Extracts outlinks from a plain text. <br />
@@ -161,7 +169,7 @@
     // url = re.getParen(0);
     //
     // if (LOG.isTraceEnabled()) {
-    //   LOG.trace("Extracted url: " + url);
+    // LOG.trace("Extracted url: " + url);
     // }
     //
     // try {
@@ -191,9 +199,8 @@
   }
 
   /**
-   * Extracts outlinks from a plain text.
-   * </p>
-   * This Method takes the JDK5 Regexp API.
+   * Extracts outlinks from a plain text. </p> This Method takes the JDK5 Regexp
+   * API.
    * 
    * @param plainText
    * 
@@ -241,5 +248,5 @@
     //
     // return retval;
   }
- 
+
 }
Index: src/java/org/apache/nutch/parse/ParserFactory.java
===================================================================
--- src/java/org/apache/nutch/parse/ParserFactory.java	(revision 1188252)
+++ src/java/org/apache/nutch/parse/ParserFactory.java	(working copy)
@@ -39,18 +39,17 @@
 import org.apache.nutch.util.MimeUtil;
 import org.apache.nutch.util.ObjectCache;
 
+/** Creates and caches {@link Parser} plugins. */
+public final class ParserFactory {
 
-/** Creates and caches {@link Parser} plugins.*/
-public final class ParserFactory {
-  
   public static final Logger LOG = LoggerFactory.getLogger(ParserFactory.class);
-  
+
   /** Wildcard for default plugins. */
   public static final String DEFAULT_PLUGIN = "*";
-  
+
   /** Empty extension list for caching purposes. */
   private final List EMPTY_EXTENSION_LIST = Collections.EMPTY_LIST;
-  
+
   private Configuration conf;
   private ExtensionPoint extensionPoint;
   private ParsePluginList parsePluginList;
@@ -60,10 +59,12 @@
     ObjectCache objectCache = ObjectCache.get(conf);
     this.extensionPoint = PluginRepository.get(conf).getExtensionPoint(
         Parser.X_POINT_ID);
-    this.parsePluginList = (ParsePluginList)objectCache.getObject(ParsePluginList.class.getName());
+    this.parsePluginList = (ParsePluginList) objectCache
+        .getObject(ParsePluginList.class.getName());
     if (this.parsePluginList == null) {
       this.parsePluginList = new ParsePluginsReader().parse(conf);
-      objectCache.setObject(ParsePluginList.class.getName(), this.parsePluginList);
+      objectCache.setObject(ParsePluginList.class.getName(),
+          this.parsePluginList);
     }
 
     if (this.extensionPoint == null) {
@@ -73,45 +74,46 @@
       throw new RuntimeException(
           "Parse Plugins preferences could not be loaded.");
     }
-  }                      
-  
-   
+  }
+
   /**
    * Function returns an array of {@link Parser}s for a given content type.
-   *
+   * 
    * The function consults the internal list of parse plugins for the
-   * ParserFactory to determine the list of pluginIds, then gets the
-   * appropriate extension points to instantiate as {@link Parser}s.
-   *
-   * @param contentType The contentType to return the <code>Array</code>
-   *                    of {@link Parser}s for.
-   * @param url The url for the content that may allow us to get the type from
-   *            the file suffix.
+   * ParserFactory to determine the list of pluginIds, then gets the appropriate
+   * extension points to instantiate as {@link Parser}s.
+   * 
+   * @param contentType
+   *          The contentType to return the <code>Array</code> of {@link Parser}
+   *          s for.
+   * @param url
+   *          The url for the content that may allow us to get the type from the
+   *          file suffix.
    * @return An <code>Array</code> of {@link Parser}s for the given contentType.
    *         If there were plugins mapped to a contentType via the
-   *         <code>parse-plugins.xml</code> file, but never enabled via
-   *         the <code>plugin.includes</code> Nutch conf, then those plugins
-   *         won't be part of this array, i.e., they will be skipped.
-   *         So, if the ordered list of parsing plugins for
-   *         <code>text/plain</code> was <code>[parse-text,parse-html,
+   *         <code>parse-plugins.xml</code> file, but never enabled via the
+   *         <code>plugin.includes</code> Nutch conf, then those plugins won't
+   *         be part of this array, i.e., they will be skipped. So, if the
+   *         ordered list of parsing plugins for <code>text/plain</code> was
+   *         <code>[parse-text,parse-html,
    *         parse-rtf]</code>, and only <code>parse-html</code> and
    *         <code>parse-rtf</code> were enabled via
-   *         <code>plugin.includes</code>, then this ordered Array would
-   *         consist of two {@link Parser} interfaces,
+   *         <code>plugin.includes</code>, then this ordered Array would consist
+   *         of two {@link Parser} interfaces,
    *         <code>[parse-html, parse-rtf]</code>.
    */
   public Parser[] getParsers(String contentType, String url)
-  throws ParserNotFound {
-    
+      throws ParserNotFound {
+
     List<Parser> parsers = null;
     List<Extension> parserExts = null;
-    
+
     ObjectCache objectCache = ObjectCache.get(conf);
-    
+
     // TODO once the MimeTypes is available
     // parsers = getExtensions(MimeUtils.map(contentType));
     // if (parsers != null) {
-    //   return parsers;
+    // return parsers;
     // }
     // Last Chance: Guess content-type from file url...
     // parsers = getExtensions(MimeUtils.getMimeType(url));
@@ -122,51 +124,52 @@
     }
 
     parsers = new Vector<Parser>(parserExts.size());
-    for (Iterator i=parserExts.iterator(); i.hasNext(); ){
+    for (Iterator i = parserExts.iterator(); i.hasNext();) {
       Extension ext = (Extension) i.next();
       Parser p = null;
       try {
-        //check to see if we've cached this parser instance yet
+        // check to see if we've cached this parser instance yet
         p = (Parser) objectCache.getObject(ext.getId());
         if (p == null) {
           // go ahead and instantiate it and then cache it
           p = (Parser) ext.getExtensionInstance();
-          objectCache.setObject(ext.getId(),p);
+          objectCache.setObject(ext.getId(), p);
         }
         parsers.add(p);
       } catch (PluginRuntimeException e) {
         if (LOG.isWarnEnabled()) {
           e.printStackTrace(LogUtil.getWarnStream(LOG));
           LOG.warn("ParserFactory:PluginRuntimeException when "
-                 + "initializing parser plugin "
-                 + ext.getDescriptor().getPluginId()
-                 + " instance in getParsers "
-                 + "function: attempting to continue instantiating parsers");
+              + "initializing parser plugin "
+              + ext.getDescriptor().getPluginId() + " instance in getParsers "
+              + "function: attempting to continue instantiating parsers");
         }
       }
     }
-    return parsers.toArray(new Parser[]{});
+    return parsers.toArray(new Parser[] {});
   }
-    
+
   /**
    * Function returns a {@link Parser} instance with the specified
-   * <code>extId</code>, representing its extension ID. If the Parser
-   * instance isn't found, then the function throws a
-   * <code>ParserNotFound</code> exception. If the function is able to find
-   * the {@link Parser} in the internal <code>PARSER_CACHE</code> then it
-   * will return the already instantiated Parser. Otherwise, if it has to
-   * instantiate the Parser itself , then this function will cache that Parser
-   * in the internal <code>PARSER_CACHE</code>.
+   * <code>extId</code>, representing its extension ID. If the Parser instance
+   * isn't found, then the function throws a <code>ParserNotFound</code>
+   * exception. If the function is able to find the {@link Parser} in the
+   * internal <code>PARSER_CACHE</code> then it will return the already
+   * instantiated Parser. Otherwise, if it has to instantiate the Parser itself
+   * , then this function will cache that Parser in the internal
+   * <code>PARSER_CACHE</code>.
    * 
-   * @param id The string extension ID (e.g.,
-   *        "org.apache.nutch.parse.rss.RSSParser",
-   *        "org.apache.nutch.parse.rtf.RTFParseFactory") of the {@link Parser}
-   *        implementation to return.
+   * @param id
+   *          The string extension ID (e.g.,
+   *          "org.apache.nutch.parse.rss.RSSParser",
+   *          "org.apache.nutch.parse.rtf.RTFParseFactory") of the
+   *          {@link Parser} implementation to return.
    * @return A {@link Parser} implementation specified by the parameter
    *         <code>id</code>.
-   * @throws ParserNotFound If the Parser is not found (i.e., registered with
-   *         the extension point), or if the there a
-   *         {@link PluginRuntimeException} instantiating the {@link Parser}.
+   * @throws ParserNotFound
+   *           If the Parser is not found (i.e., registered with the extension
+   *           point), or if the there a {@link PluginRuntimeException}
+   *           instantiating the {@link Parser}.
    */
   public Parser getParserById(String id) throws ParserNotFound {
 
@@ -174,7 +177,7 @@
     Extension parserExt = null;
 
     ObjectCache objectCache = ObjectCache.get(conf);
-    
+
     if (id != null) {
       parserExt = getExtension(extensions, id);
     }
@@ -185,12 +188,12 @@
     if (parserExt == null) {
       throw new ParserNotFound("No Parser Found for id [" + id + "]");
     }
-    
-    // first check the cache	    	   
+
+    // first check the cache
     if (objectCache.getObject(parserExt.getId()) != null) {
       return (Parser) objectCache.getObject(parserExt.getId());
 
-    // if not found in cache, instantiate the Parser    
+      // if not found in cache, instantiate the Parser
     } else {
       try {
         Parser p = (Parser) parserExt.getExtensionInstance();
@@ -198,30 +201,30 @@
         return p;
       } catch (PluginRuntimeException e) {
         if (LOG.isWarnEnabled()) {
-          LOG.warn("Canno initialize parser " +
-                   parserExt.getDescriptor().getPluginId() +
-                   " (cause: " + e.toString());
+          LOG.warn("Canno initialize parser "
+              + parserExt.getDescriptor().getPluginId() + " (cause: "
+              + e.toString());
         }
         throw new ParserNotFound("Cannot init parser for id [" + id + "]");
       }
     }
   }
-  
+
   /**
    * Finds the best-suited parse plugin for a given contentType.
    * 
-   * @param contentType Content-Type for which we seek a parse plugin.
-   * @return a list of extensions to be used for this contentType.
-   *         If none, returns <code>null</code>.
+   * @param contentType
+   *          Content-Type for which we seek a parse plugin.
+   * @return a list of extensions to be used for this contentType. If none,
+   *         returns <code>null</code>.
    */
   protected List<Extension> getExtensions(String contentType) {
-    
+
     ObjectCache objectCache = ObjectCache.get(conf);
     // First of all, tries to clean the content-type
     String type = null;
     type = MimeUtil.cleanMimeType(contentType);
 
-
     List<Extension> extensions = (List<Extension>) objectCache.getObject(type);
 
     // Just compare the reference:
@@ -229,100 +232,105 @@
     if (extensions == EMPTY_EXTENSION_LIST) {
       return null;
     }
-    
+
     if (extensions == null) {
       extensions = findExtensions(type);
       if (extensions != null) {
         objectCache.setObject(type, extensions);
       } else {
-      	// Put the empty extension list into cache
-      	// to remember we don't know any related extension.
+        // Put the empty extension list into cache
+        // to remember we don't know any related extension.
         objectCache.setObject(type, EMPTY_EXTENSION_LIST);
       }
     }
     return extensions;
   }
-  
+
   /**
    * searches a list of suitable parse plugins for the given contentType.
-   * <p>It first looks for a preferred plugin defined in the parse-plugin
-   * file.  If none is found, it returns a list of default plugins.
+   * <p>
+   * It first looks for a preferred plugin defined in the parse-plugin file. If
+   * none is found, it returns a list of default plugins.
    * 
-   * @param contentType Content-Type for which we seek a parse plugin.
-   * @return List - List of extensions to be used for this contentType.
-   *                If none, returns null.
+   * @param contentType
+   *          Content-Type for which we seek a parse plugin.
+   * @return List - List of extensions to be used for this contentType. If none,
+   *         returns null.
    */
   private List<Extension> findExtensions(String contentType) {
-    
+
     Extension[] extensions = this.extensionPoint.getExtensions();
-    
+
     // Look for a preferred plugin.
-    List<String> parsePluginList =
-      this.parsePluginList.getPluginList(contentType);
-    List<Extension> extensionList =
-      matchExtensions(parsePluginList, extensions, contentType);
+    List<String> parsePluginList = this.parsePluginList
+        .getPluginList(contentType);
+    List<Extension> extensionList = matchExtensions(parsePluginList,
+        extensions, contentType);
     if (extensionList != null) {
       return extensionList;
     }
-    
+
     // If none found, look for a default plugin.
     parsePluginList = this.parsePluginList.getPluginList(DEFAULT_PLUGIN);
     return matchExtensions(parsePluginList, extensions, DEFAULT_PLUGIN);
   }
-  
+
   /**
    * Tries to find a suitable parser for the given contentType.
    * <ol>
-   * <li>It checks if a parser which accepts the contentType
-   * can be found in the <code>plugins</code> list;</li>
-   * <li>If this list is empty, it tries to find amongst the loaded
-   * extensions whether some of them might suit and warns the user.</li>
+   * <li>It checks if a parser which accepts the contentType can be found in the
+   * <code>plugins</code> list;</li>
+   * <li>If this list is empty, it tries to find amongst the loaded extensions
+   * whether some of them might suit and warns the user.</li>
    * </ol>
-   * @param plugins List of candidate plugins.
-   * @param extensions Array of loaded extensions.
-   * @param contentType Content-Type for which we seek a parse plugin.
-   * @return List - List of extensions to be used for this contentType.
-   *                If none, returns null.
+   * 
+   * @param plugins
+   *          List of candidate plugins.
+   * @param extensions
+   *          Array of loaded extensions.
+   * @param contentType
+   *          Content-Type for which we seek a parse plugin.
+   * @return List - List of extensions to be used for this contentType. If none,
+   *         returns null.
    */
   private List<Extension> matchExtensions(List<String> plugins,
-                               Extension[] extensions,
-                               String contentType) {
-    
+      Extension[] extensions, String contentType) {
+
     List<Extension> extList = new ArrayList<Extension>();
     if (plugins != null) {
-      
+
       for (String parsePluginId : plugins) {
-        
+
         Extension ext = getExtension(extensions, parsePluginId, contentType);
         // the extension returned may be null
         // that means that it was not enabled in the plugin.includes
         // nutch conf property, but it was mapped in the
         // parse-plugins.xml
-        // file. 
+        // file.
         // OR it was enabled in plugin.includes, but the plugin's plugin.xml
         // file does not claim that the plugin supports the specified mimeType
         // in either case, LOG the appropriate error message to WARN level
-        
+
         if (ext == null) {
-          //try to get it just by its pluginId
+          // try to get it just by its pluginId
           ext = getExtension(extensions, parsePluginId);
-          
-          if (LOG.isWarnEnabled()) { 
+
+          if (LOG.isWarnEnabled()) {
             if (ext != null) {
               // plugin was enabled via plugin.includes
               // its plugin.xml just doesn't claim to support that
               // particular mimeType
-              LOG.warn("ParserFactory:Plugin: " + parsePluginId +
-                       " mapped to contentType " + contentType +
-                       " via parse-plugins.xml, but " + "its plugin.xml " +
-                       "file does not claim to support contentType: " +
-                       contentType);
+              LOG.warn("ParserFactory:Plugin: " + parsePluginId
+                  + " mapped to contentType " + contentType
+                  + " via parse-plugins.xml, but " + "its plugin.xml "
+                  + "file does not claim to support contentType: "
+                  + contentType);
             } else {
               // plugin wasn't enabled via plugin.includes
-              LOG.warn("ParserFactory: Plugin: " + parsePluginId + 
-                       " mapped to contentType " + contentType +
-                       " via parse-plugins.xml, but not enabled via " +
-                       "plugin.includes in nutch-default.xml");                     
+              LOG.warn("ParserFactory: Plugin: " + parsePluginId
+                  + " mapped to contentType " + contentType
+                  + " via parse-plugins.xml, but not enabled via "
+                  + "plugin.includes in nutch-default.xml");
             }
           }
         }
@@ -332,7 +340,7 @@
           extList.add(ext);
         }
       }
-      
+
     } else {
       // okay, there were no list of plugins defined for
       // this mimeType, however, there may be plugins registered
@@ -341,75 +349,78 @@
       // so, iterate through the list of extensions and if you find
       // any extensions where this is the case, throw a
       // NotMappedParserException
-      
-      for (int i=0; i<extensions.length; i++) {
-      	if ("*".equals(extensions[i].getAttribute("contentType"))){
+
+      for (int i = 0; i < extensions.length; i++) {
+        if ("*".equals(extensions[i].getAttribute("contentType"))) {
           extList.add(0, extensions[i]);
-        }
-        else if (extensions[i].getAttribute("contentType") != null
-            && contentType.matches(escapeContentType(extensions[i].getAttribute("contentType")))) {
+        } else if (extensions[i].getAttribute("contentType") != null
+            && contentType.matches(escapeContentType(extensions[i]
+                .getAttribute("contentType")))) {
           extList.add(extensions[i]);
         }
       }
-      
+
       if (extList.size() > 0) {
         if (LOG.isInfoEnabled()) {
           StringBuffer extensionsIDs = new StringBuffer("[");
           boolean isFirst = true;
-          for (Extension ext : extList){
-        	  if (!isFirst) extensionsIDs.append(" - ");
-        	  else isFirst=false;
-        	  extensionsIDs.append(ext.getId());
+          for (Extension ext : extList) {
+            if (!isFirst)
+              extensionsIDs.append(" - ");
+            else
+              isFirst = false;
+            extensionsIDs.append(ext.getId());
           }
-    	  extensionsIDs.append("]");
-          LOG.info("The parsing plugins: " + extensionsIDs.toString() +
-                   " are enabled via the plugin.includes system " +
-                   "property, and all claim to support the content type " +
-                   contentType + ", but they are not mapped to it  in the " +
-                   "parse-plugins.xml file");
+          extensionsIDs.append("]");
+          LOG.info("The parsing plugins: " + extensionsIDs.toString()
+              + " are enabled via the plugin.includes system "
+              + "property, and all claim to support the content type "
+              + contentType + ", but they are not mapped to it  in the "
+              + "parse-plugins.xml file");
         }
       } else if (LOG.isDebugEnabled()) {
-        LOG.debug("ParserFactory:No parse plugins mapped or enabled for " +
-                  "contentType " + contentType);
+        LOG.debug("ParserFactory:No parse plugins mapped or enabled for "
+            + "contentType " + contentType);
       }
     }
-    
+
     return (extList.size() > 0) ? extList : null;
   }
-  
+
   private String escapeContentType(String contentType) {
-  	// Escapes contentType in order to use as a regex 
-  	// (and keep backwards compatibility).
-  	// This enables to accept multiple types for a single parser. 
-  	return contentType.replace("+", "\\+").replace(".", "\\.");
-	}
+    // Escapes contentType in order to use as a regex
+    // (and keep backwards compatibility).
+    // This enables to accept multiple types for a single parser.
+    return contentType.replace("+", "\\+").replace(".", "\\.");
+  }
 
   private boolean match(Extension extension, String id, String type) {
-    return ((id.equals(extension.getId())) &&
-            (extension.getAttribute("contentType").equals("*") || 
-             type.matches(escapeContentType(extension.getAttribute("contentType"))) ||
-             type.equals(DEFAULT_PLUGIN)));
+    return ((id.equals(extension.getId())) && (extension.getAttribute(
+        "contentType").equals("*")
+        || type
+            .matches(escapeContentType(extension.getAttribute("contentType"))) || type
+          .equals(DEFAULT_PLUGIN)));
   }
-  
+
   /** Get an extension from its id and supported content-type. */
   private Extension getExtension(Extension[] list, String id, String type) {
-    for (int i=0; i<list.length; i++) {
+    for (int i = 0; i < list.length; i++) {
       if (match(list[i], id, type)) {
         return list[i];
       }
     }
     return null;
   }
-    
+
   private Extension getExtension(Extension[] list, String id) {
-    for (int i=0; i<list.length; i++) {
+    for (int i = 0; i < list.length; i++) {
       if (id.equals(list[i].getId())) {
         return list[i];
       }
     }
     return null;
   }
-  
+
   private Extension getExtensionFromAlias(Extension[] list, String id) {
     return getExtension(list, parsePluginList.getAliases().get(id));
   }
Index: src/java/org/apache/nutch/parse/Outlink.java
===================================================================
--- src/java/org/apache/nutch/parse/Outlink.java	(revision 1188252)
+++ src/java/org/apache/nutch/parse/Outlink.java	(working copy)
@@ -28,11 +28,13 @@
   private String toUrl;
   private String anchor;
 
-  public Outlink() {}
+  public Outlink() {
+  }
 
   public Outlink(String toUrl, String anchor) throws MalformedURLException {
     this.toUrl = toUrl;
-    if (anchor == null) anchor = "";
+    if (anchor == null)
+      anchor = "";
     this.anchor = anchor;
   }
 
@@ -43,8 +45,8 @@
 
   /** Skips over one Outlink in the input. */
   public static void skip(DataInput in) throws IOException {
-    Text.skip(in);                                // skip toUrl
-    Text.skip(in);                                // skip anchor
+    Text.skip(in); // skip toUrl
+    Text.skip(in); // skip anchor
   }
 
   public void write(DataOutput out) throws IOException {
@@ -58,21 +60,25 @@
     return outlink;
   }
 
-  public String getToUrl() { return toUrl; }
-  public String getAnchor() { return anchor; }
+  public String getToUrl() {
+    return toUrl;
+  }
 
+  public String getAnchor() {
+    return anchor;
+  }
 
   public boolean equals(Object o) {
     if (!(o instanceof Outlink))
       return false;
-    Outlink other = (Outlink)o;
-    return
-      this.toUrl.equals(other.toUrl) &&
-      this.anchor.equals(other.anchor);
+    Outlink other = (Outlink) o;
+    return this.toUrl.equals(other.toUrl) && this.anchor.equals(other.anchor);
   }
 
   public String toString() {
-    return "toUrl: " + toUrl + " anchor: " + anchor;  // removed "\n". toString, not printLine... WD.
+    return "toUrl: " + toUrl + " anchor: " + anchor; // removed "\n".
+    // toString, not
+    // printLine... WD.
   }
 
 }
Index: src/java/org/apache/nutch/parse/ParsePluginList.java
===================================================================
--- src/java/org/apache/nutch/parse/ParsePluginList.java	(revision 1188252)
+++ src/java/org/apache/nutch/parse/ParsePluginList.java	(working copy)
@@ -22,25 +22,23 @@
 import java.util.List;
 import java.util.Map;
 
-
 /**
  * This class represents a natural ordering for which parsing plugin should get
  * called for a particular mimeType. It provides methods to store the
  * parse-plugins.xml data, and methods to retreive the name of the appropriate
  * parsing plugin for a contentType.
- *
+ * 
  * @author mattmann
  * @version 1.0
  */
 class ParsePluginList {
-  
+
   /* a map to link mimeType to an ordered list of parsing plugins */
   private Map<String, List<String>> fMimeTypeToPluginMap = null;
-  
+
   /* A list of aliases */
   private Map<String, String> aliases = null;
-  
-  
+
   /**
    * Constructs a new ParsePluginList
    */
@@ -48,7 +46,7 @@
     fMimeTypeToPluginMap = new HashMap<String, List<String>>();
     aliases = new HashMap<String, String>();
   }
-  
+
   List<String> getPluginList(String mimeType) {
     return fMimeTypeToPluginMap.get(mimeType);
   }
@@ -56,18 +54,18 @@
   void setAliases(Map<String, String> aliases) {
     this.aliases = aliases;
   }
-  
+
   Map<String, String> getAliases() {
     return aliases;
   }
-  
+
   void setPluginList(String mimeType, List<String> l) {
     fMimeTypeToPluginMap.put(mimeType, l);
   }
-  
+
   List<String> getSupportedMimeTypes() {
-    return Arrays.asList(fMimeTypeToPluginMap.keySet().toArray(
-            new String[] {}));
+    return Arrays
+        .asList(fMimeTypeToPluginMap.keySet().toArray(new String[] {}));
   }
-  
+
 }
Index: src/java/org/apache/nutch/parse/HtmlParseFilters.java
===================================================================
--- src/java/org/apache/nutch/parse/HtmlParseFilters.java	(revision 1188252)
+++ src/java/org/apache/nutch/parse/HtmlParseFilters.java	(working copy)
@@ -27,76 +27,81 @@
 
 import org.w3c.dom.DocumentFragment;
 
-/** Creates and caches {@link HtmlParseFilter} implementing plugins.*/
+/** Creates and caches {@link HtmlParseFilter} implementing plugins. */
 public class HtmlParseFilters {
 
   private HtmlParseFilter[] htmlParseFilters;
-  
+
   public static final String HTMLPARSEFILTER_ORDER = "htmlparsefilter.order";
 
   public HtmlParseFilters(Configuration conf) {
-        String order = conf.get(HTMLPARSEFILTER_ORDER);
-        ObjectCache objectCache = ObjectCache.get(conf);
-        this.htmlParseFilters = (HtmlParseFilter[]) objectCache.getObject(HtmlParseFilter.class.getName());
-        if (htmlParseFilters == null) {
-          /*
-           * If ordered filters are required, prepare array of filters based on
-           * property
-           */
-          String[] orderedFilters = null;
-          if (order != null && !order.trim().equals("")) {
-            orderedFilters = order.split("\\s+");
+    String order = conf.get(HTMLPARSEFILTER_ORDER);
+    ObjectCache objectCache = ObjectCache.get(conf);
+    this.htmlParseFilters = (HtmlParseFilter[]) objectCache
+        .getObject(HtmlParseFilter.class.getName());
+    if (htmlParseFilters == null) {
+      /*
+       * If ordered filters are required, prepare array of filters based on
+       * property
+       */
+      String[] orderedFilters = null;
+      if (order != null && !order.trim().equals("")) {
+        orderedFilters = order.split("\\s+");
+      }
+      HashMap<String, HtmlParseFilter> filterMap = new HashMap<String, HtmlParseFilter>();
+      try {
+        ExtensionPoint point = PluginRepository.get(conf).getExtensionPoint(
+            HtmlParseFilter.X_POINT_ID);
+        if (point == null)
+          throw new RuntimeException(HtmlParseFilter.X_POINT_ID + " not found.");
+        Extension[] extensions = point.getExtensions();
+        for (int i = 0; i < extensions.length; i++) {
+          Extension extension = extensions[i];
+          HtmlParseFilter parseFilter = (HtmlParseFilter) extension
+              .getExtensionInstance();
+          if (!filterMap.containsKey(parseFilter.getClass().getName())) {
+            filterMap.put(parseFilter.getClass().getName(), parseFilter);
           }
-            HashMap<String, HtmlParseFilter> filterMap =
-              new HashMap<String, HtmlParseFilter>();
-            try {
-                ExtensionPoint point = PluginRepository.get(conf).getExtensionPoint(HtmlParseFilter.X_POINT_ID);
-                if (point == null)
-                    throw new RuntimeException(HtmlParseFilter.X_POINT_ID + " not found.");
-                Extension[] extensions = point.getExtensions();
-                for (int i = 0; i < extensions.length; i++) {
-                    Extension extension = extensions[i];
-                    HtmlParseFilter parseFilter = (HtmlParseFilter) extension.getExtensionInstance();
-                    if (!filterMap.containsKey(parseFilter.getClass().getName())) {
-                        filterMap.put(parseFilter.getClass().getName(), parseFilter);
-                    }
-                }
-                HtmlParseFilter[] htmlParseFilters = filterMap.values().toArray(new HtmlParseFilter[filterMap.size()]);
-                /*
-                 * If no ordered filters required, just get the filters in an
-                 * indeterminate order
-                 */
-                if (orderedFilters == null) {
-                  objectCache.setObject(HtmlParseFilter.class.getName(), htmlParseFilters);
-                }
-                /* Otherwise run the filters in the required order */
-                else {
-                  ArrayList<HtmlParseFilter> filters = new ArrayList<HtmlParseFilter>();
-                  for (int i = 0; i < orderedFilters.length; i++) {
-                    HtmlParseFilter filter = filterMap
-                        .get(orderedFilters[i]);
-                    if (filter != null) {
-                      filters.add(filter);
-                    }
-                  }
-                  objectCache.setObject(HtmlParseFilter.class.getName(), filters
-                      .toArray(new HtmlParseFilter[filters.size()]));
-                }
-            } catch (PluginRuntimeException e) {
-                throw new RuntimeException(e);
+        }
+        HtmlParseFilter[] htmlParseFilters = filterMap.values().toArray(
+            new HtmlParseFilter[filterMap.size()]);
+        /*
+         * If no ordered filters required, just get the filters in an
+         * indeterminate order
+         */
+        if (orderedFilters == null) {
+          objectCache.setObject(HtmlParseFilter.class.getName(),
+              htmlParseFilters);
+        }
+        /* Otherwise run the filters in the required order */
+        else {
+          ArrayList<HtmlParseFilter> filters = new ArrayList<HtmlParseFilter>();
+          for (int i = 0; i < orderedFilters.length; i++) {
+            HtmlParseFilter filter = filterMap.get(orderedFilters[i]);
+            if (filter != null) {
+              filters.add(filter);
             }
-            this.htmlParseFilters = (HtmlParseFilter[]) objectCache.getObject(HtmlParseFilter.class.getName());
+          }
+          objectCache.setObject(HtmlParseFilter.class.getName(),
+              filters.toArray(new HtmlParseFilter[filters.size()]));
         }
-    }                  
+      } catch (PluginRuntimeException e) {
+        throw new RuntimeException(e);
+      }
+      this.htmlParseFilters = (HtmlParseFilter[]) objectCache
+          .getObject(HtmlParseFilter.class.getName());
+    }
+  }
 
   /** Run all defined filters. */
-  public ParseResult filter(Content content, ParseResult parseResult, HTMLMetaTags metaTags, DocumentFragment doc) {
+  public ParseResult filter(Content content, ParseResult parseResult,
+      HTMLMetaTags metaTags, DocumentFragment doc) {
 
     // loop on each filter
-    for (int i = 0 ; i < this.htmlParseFilters.length; i++) {
+    for (int i = 0; i < this.htmlParseFilters.length; i++) {
       // call filter interface
-      parseResult =
-        htmlParseFilters[i].filter(content, parseResult, metaTags, doc);
+      parseResult = htmlParseFilters[i].filter(content, parseResult, metaTags,
+          doc);
 
       // any failure on parse obj, return
       if (!parseResult.isSuccess()) {
Index: src/java/org/apache/nutch/parse/HTMLMetaTags.java
===================================================================
--- src/java/org/apache/nutch/parse/HTMLMetaTags.java	(revision 1188252)
+++ src/java/org/apache/nutch/parse/HTMLMetaTags.java	(working copy)
@@ -22,8 +22,8 @@
 import java.util.Properties;
 
 /**
- * This class holds the information about HTML "meta" tags extracted from 
- * a page. Some special tags have convenience methods for easy checking.
+ * This class holds the information about HTML "meta" tags extracted from a
+ * page. Some special tags have convenience methods for easy checking.
  */
 public class HTMLMetaTags {
   private boolean noIndex = false;
@@ -154,8 +154,8 @@
   }
 
   /**
-   * A convenience method. Returns the current value of <code>refreshTime</code>.
-   * The value may be invalid if {@link #getRefresh()}returns
+   * A convenience method. Returns the current value of <code>refreshTime</code>
+   * . The value may be invalid if {@link #getRefresh()}returns
    * <code>false</code>.
    */
   public int getRefreshTime() {
@@ -177,26 +177,22 @@
   public Properties getHttpEquivTags() {
     return httpEquivTags;
   }
-  
+
   public String toString() {
     StringBuffer sb = new StringBuffer();
-    sb.append("base=" + baseHref
-            + ", noCache=" + noCache
-            + ", noFollow=" + noFollow
-            + ", noIndex=" + noIndex
-            + ", refresh=" + refresh
-            + ", refreshHref=" + refreshHref + "\n"
-            );
+    sb.append("base=" + baseHref + ", noCache=" + noCache + ", noFollow="
+        + noFollow + ", noIndex=" + noIndex + ", refresh=" + refresh
+        + ", refreshHref=" + refreshHref + "\n");
     sb.append(" * general tags:\n");
     Iterator it = generalTags.keySet().iterator();
     while (it.hasNext()) {
-      String key = (String)it.next();
+      String key = (String) it.next();
       sb.append("   - " + key + "\t=\t" + generalTags.get(key) + "\n");
     }
     sb.append(" * http-equiv tags:\n");
     it = httpEquivTags.keySet().iterator();
     while (it.hasNext()) {
-      String key = (String)it.next();
+      String key = (String) it.next();
       sb.append("   - " + key + "\t=\t" + httpEquivTags.get(key) + "\n");
     }
     return sb.toString();
Index: src/java/org/apache/nutch/parse/ParseCallable.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseCallable.java	(revision 1188252)
+++ src/java/org/apache/nutch/parse/ParseCallable.java	(working copy)
@@ -24,7 +24,7 @@
 class ParseCallable implements Callable<ParseResult> {
   private Parser p;
   private Content content;
-  
+
   public ParseCallable(Parser p, Content content) {
     this.p = p;
     this.content = content;
@@ -33,5 +33,5 @@
   @Override
   public ParseResult call() throws Exception {
     return p.getParse(content);
-  }    
+  }
 }
\ No newline at end of file
Index: src/java/org/apache/nutch/parse/Parser.java
===================================================================
--- src/java/org/apache/nutch/parse/Parser.java	(revision 1188252)
+++ src/java/org/apache/nutch/parse/Parser.java	(working copy)
@@ -24,33 +24,35 @@
 import org.apache.nutch.plugin.Pluggable;
 import org.apache.nutch.protocol.Content;
 
-/** A parser for content generated by a {@link org.apache.nutch.protocol.Protocol}
- * implementation.  This interface is implemented by extensions.  Nutch's core
- * contains no page parsing code.
+/**
+ * A parser for content generated by a
+ * {@link org.apache.nutch.protocol.Protocol} implementation. This interface is
+ * implemented by extensions. Nutch's core contains no page parsing code.
  */
 public interface Parser extends Pluggable, Configurable {
   /** The name of the extension point. */
   public final static String X_POINT_ID = Parser.class.getName();
 
-  /** 
+  /**
    * <p>
-   * This method parses the given content and returns a map of
-   * &lt;key, parse&gt; pairs. {@link Parse} instances will be persisted 
-   * under the given key.
+   * This method parses the given content and returns a map of &lt;key,
+   * parse&gt; pairs. {@link Parse} instances will be persisted under the given
+   * key.
    * </p>
    * <p>
-   * Note: Meta-redirects should be followed only when they are coming from
-   * the original URL. That is: <br> 
+   * Note: Meta-redirects should be followed only when they are coming from the
+   * original URL. That is: <br>
    * Assume fetcher is in parsing mode and is currently processing
-   * foo.bar.com/redirect.html. If this url contains a meta redirect
-   * to another url, fetcher should only follow the redirect if the map
-   * contains an entry of the form &lt;"foo.bar.com/redirect.html", 
-   * {@link Parse} with a {@link ParseStatus} indicating the redirect&gt;.
+   * foo.bar.com/redirect.html. If this url contains a meta redirect to another
+   * url, fetcher should only follow the redirect if the map contains an entry
+   * of the form &lt;"foo.bar.com/redirect.html", {@link Parse} with a
+   * {@link ParseStatus} indicating the redirect&gt;.
    * </p>
    * 
-   * @param c Content to be parsed
+   * @param c
+   *          Content to be parsed
    * @return a map containing &lt;key, parse&gt; pairs
    * @since NUTCH-443
    */
-   ParseResult getParse(Content c);
+  ParseResult getParse(Content c);
 }
Index: src/java/org/apache/nutch/parse/ParseImpl.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseImpl.java	(revision 1188252)
+++ src/java/org/apache/nutch/parse/ParseImpl.java	(working copy)
@@ -20,8 +20,9 @@
 import java.io.*;
 import org.apache.hadoop.io.*;
 
-
-/** The result of parsing a page's raw content.
+/**
+ * The result of parsing a page's raw content.
+ * 
  * @see Parser#getParse(Content)
  */
 public class ParseImpl implements Parse, Writable {
@@ -29,7 +30,8 @@
   private ParseData data;
   private boolean isCanonical;
 
-  public ParseImpl() {}
+  public ParseImpl() {
+  }
 
   public ParseImpl(Parse parse) {
     this(new ParseText(parse.getText()), parse.getData(), true);
@@ -38,7 +40,7 @@
   public ParseImpl(String text, ParseData data) {
     this(new ParseText(text), data, true);
   }
-  
+
   public ParseImpl(ParseText text, ParseData data) {
     this(text, data, true);
   }
@@ -49,12 +51,18 @@
     this.isCanonical = isCanonical;
   }
 
-  public String getText() { return text.getText(); }
+  public String getText() {
+    return text.getText();
+  }
 
-  public ParseData getData() { return data; }
+  public ParseData getData() {
+    return data;
+  }
 
-  public boolean isCanonical() { return isCanonical; }
-  
+  public boolean isCanonical() {
+    return isCanonical;
+  }
+
   public final void write(DataOutput out) throws IOException {
     out.writeBoolean(isCanonical);
     text.write(out);
Index: src/java/org/apache/nutch/parse/ParseSegment.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseSegment.java	(revision 1188252)
+++ src/java/org/apache/nutch/parse/ParseSegment.java	(working copy)
@@ -44,13 +44,13 @@
     Reducer<Text, Writable, Text, Writable> {
 
   public static final Logger LOG = LoggerFactory.getLogger(ParseSegment.class);
-  
+
   private ScoringFilters scfilters;
-  
+
   public ParseSegment() {
     this(null);
   }
-  
+
   public ParseSegment(Configuration conf) {
     super(conf);
   }
@@ -60,21 +60,22 @@
     this.scfilters = new ScoringFilters(job);
   }
 
-  public void close() {}
-  
+  public void close() {
+  }
+
   private Text newKey = new Text();
 
   public void map(WritableComparable key, Content content,
-                  OutputCollector<Text, ParseImpl> output, Reporter reporter)
-    throws IOException {
+      OutputCollector<Text, ParseImpl> output, Reporter reporter)
+      throws IOException {
     // convert on the fly from old UTF8 keys
     if (key instanceof UTF8) {
       newKey.set(key.toString());
       key = newKey;
     }
-    
-    int status =
-      Integer.parseInt(content.getMetadata().get(Nutch.FETCH_STATUS_KEY));
+
+    int status = Integer.parseInt(content.getMetadata().get(
+        Nutch.FETCH_STATUS_KEY));
     if (status != CrawlDatum.STATUS_FETCH_SUCCESS) {
       // content not fetched successfully, skip document
       LOG.debug("Skipping " + key + " as content is not fetched successfully");
@@ -85,7 +86,8 @@
     try {
       parseResult = new ParseUtil(getConf()).parse(content);
     } catch (Exception e) {
-      LOG.warn("Error parsing: " + key + ": " + StringUtils.stringifyException(e));
+      LOG.warn("Error parsing: " + key + ": "
+          + StringUtils.stringifyException(e));
       return;
     }
 
@@ -95,7 +97,8 @@
       ParseStatus parseStatus = parse.getData().getStatus();
 
       LOG.info("Parsing: " + url);
-      reporter.incrCounter("ParserStatus", ParseStatus.majorCodes[parseStatus.getMajorCode()], 1);
+      reporter.incrCounter("ParserStatus",
+          ParseStatus.majorCodes[parseStatus.getMajorCode()], 1);
 
       if (!parseStatus.isSuccess()) {
         LOG.warn("Error parsing: " + key + ": " + parseStatus);
@@ -103,32 +106,34 @@
       }
 
       // pass segment name to parse data
-      parse.getData().getContentMeta().set(Nutch.SEGMENT_NAME_KEY, 
-                                           getConf().get(Nutch.SEGMENT_NAME_KEY));
+      parse.getData().getContentMeta()
+          .set(Nutch.SEGMENT_NAME_KEY, getConf().get(Nutch.SEGMENT_NAME_KEY));
 
       // compute the new signature
-      byte[] signature = 
-        SignatureFactory.getSignature(getConf()).calculate(content, parse); 
-      parse.getData().getContentMeta().set(Nutch.SIGNATURE_KEY, 
-          StringUtil.toHexString(signature));
-      
+      byte[] signature = SignatureFactory.getSignature(getConf()).calculate(
+          content, parse);
+      parse.getData().getContentMeta()
+          .set(Nutch.SIGNATURE_KEY, StringUtil.toHexString(signature));
+
       try {
         scfilters.passScoreAfterParsing(url, content, parse);
       } catch (ScoringFilterException e) {
         if (LOG.isWarnEnabled()) {
           e.printStackTrace(LogUtil.getWarnStream(LOG));
-          LOG.warn("Error passing score: "+ url +": "+e.getMessage());
+          LOG.warn("Error passing score: " + url + ": " + e.getMessage());
         }
       }
-      output.collect(url, new ParseImpl(new ParseText(parse.getText()), 
-                                        parse.getData(), parse.isCanonical()));
+      output.collect(
+          url,
+          new ParseImpl(new ParseText(parse.getText()), parse.getData(), parse
+              .isCanonical()));
     }
   }
 
   public void reduce(Text key, Iterator<Writable> values,
-                     OutputCollector<Text, Writable> output, Reporter reporter)
-    throws IOException {
-    output.collect(key, (Writable)values.next()); // collect first value
+      OutputCollector<Text, Writable> output, Reporter reporter)
+      throws IOException {
+    output.collect(key, (Writable) values.next()); // collect first value
   }
 
   public void parse(Path segment) throws IOException {
@@ -148,7 +153,7 @@
     job.setInputFormat(SequenceFileInputFormat.class);
     job.setMapperClass(ParseSegment.class);
     job.setReducerClass(ParseSegment.class);
-    
+
     FileOutputFormat.setOutputPath(job, segment);
     job.setOutputFormat(ParseOutputFormat.class);
     job.setOutputKeyClass(Text.class);
@@ -156,15 +161,16 @@
 
     JobClient.runJob(job);
     long end = System.currentTimeMillis();
-    LOG.info("ParseSegment: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    LOG.info("ParseSegment: finished at " + sdf.format(end) + ", elapsed: "
+        + TimingUtil.elapsedTime(start, end));
   }
 
-
   public static void main(String[] args) throws Exception {
-	int res = ToolRunner.run(NutchConfiguration.create(), new ParseSegment(), args);
-	System.exit(res);
+    int res = ToolRunner.run(NutchConfiguration.create(), new ParseSegment(),
+        args);
+    System.exit(res);
   }
-	  
+
   public int run(String[] args) throws Exception {
     Path segment;
 
@@ -173,7 +179,7 @@
     if (args.length == 0) {
       System.err.println(usage);
       System.exit(-1);
-    }      
+    }
     segment = new Path(args[0]);
     parse(segment);
     return 0;
Index: src/java/org/apache/nutch/util/SuffixStringMatcher.java
===================================================================
--- src/java/org/apache/nutch/util/SuffixStringMatcher.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/SuffixStringMatcher.java	(working copy)
@@ -21,8 +21,8 @@
 import java.util.Iterator;
 
 /**
- * A class for efficiently matching <code>String</code>s against a set
- * of suffixes.  Zero-length <code>Strings</code> are ignored.
+ * A class for efficiently matching <code>String</code>s against a set of
+ * suffixes. Zero-length <code>Strings</code> are ignored.
  */
 public class SuffixStringMatcher extends TrieStringMatcher {
 
@@ -32,7 +32,7 @@
    */
   public SuffixStringMatcher(String[] suffixes) {
     super();
-    for (int i= 0; i < suffixes.length; i++)
+    for (int i = 0; i < suffixes.length; i++)
       addPatternBackward(suffixes[i]);
   }
 
@@ -43,20 +43,20 @@
    */
   public SuffixStringMatcher(Collection suffixes) {
     super();
-    Iterator iter= suffixes.iterator();
+    Iterator iter = suffixes.iterator();
     while (iter.hasNext())
-      addPatternBackward((String)iter.next());
+      addPatternBackward((String) iter.next());
   }
 
   /**
-   * Returns true if the given <code>String</code> is matched by a
-   * suffix in the trie
+   * Returns true if the given <code>String</code> is matched by a suffix in the
+   * trie
    */
   public boolean matches(String input) {
-    TrieNode node= root;
-    for (int i= input.length() - 1; i >= 0; i--) {
-      node= node.getChild(input.charAt(i));
-      if (node == null) 
+    TrieNode node = root;
+    for (int i = input.length() - 1; i >= 0; i--) {
+      node = node.getChild(input.charAt(i));
+      if (node == null)
         return false;
       if (node.isTerminal())
         return true;
@@ -64,16 +64,15 @@
     return false;
   }
 
-
   /**
    * Returns the shortest suffix of <code>input<code> that is matched,
    * or <code>null<code> if no match exists.
    */
   public String shortestMatch(String input) {
-    TrieNode node= root;
-    for (int i= input.length() - 1; i >= 0; i--) {
-      node= node.getChild(input.charAt(i));
-      if (node == null) 
+    TrieNode node = root;
+    for (int i = input.length() - 1; i >= 0; i--) {
+      node = node.getChild(input.charAt(i));
+      if (node == null)
         return null;
       if (node.isTerminal())
         return input.substring(i);
@@ -86,29 +85,26 @@
    * or <code>null<code> if no match exists.
    */
   public String longestMatch(String input) {
-    TrieNode node= root;
-    String result= null;
-    for (int i= input.length() - 1; i >= 0; i--) {
-      node= node.getChild(input.charAt(i));
-      if (node == null) 
+    TrieNode node = root;
+    String result = null;
+    for (int i = input.length() - 1; i >= 0; i--) {
+      node = node.getChild(input.charAt(i));
+      if (node == null)
         break;
       if (node.isTerminal())
-        result= input.substring(i);
+        result = input.substring(i);
     }
     return result;
   }
 
   public static final void main(String[] argv) {
-    SuffixStringMatcher matcher= 
-      new SuffixStringMatcher( 
-        new String[] 
-        {"a", "abcd", "bcd", "bcdefg", "defg", "aac", "baz", "foo", "foobar"} );
+    SuffixStringMatcher matcher = new SuffixStringMatcher(new String[] { "a",
+        "abcd", "bcd", "bcdefg", "defg", "aac", "baz", "foo", "foobar" });
 
-    String[] tests= {"a", "ac", "abcd", "abcdefg", "apple", "aa", "aac",
-                    "aaccca", "abaz", "baz", "bazooka", "fo", "foobar",
-                    "kite", };
+    String[] tests = { "a", "ac", "abcd", "abcdefg", "apple", "aa", "aac",
+        "aaccca", "abaz", "baz", "bazooka", "fo", "foobar", "kite", };
 
-    for (int i= 0; i < tests.length; i++) {
+    for (int i = 0; i < tests.length; i++) {
       System.out.println("testing: " + tests[i]);
       System.out.println("   matches: " + matcher.matches(tests[i]));
       System.out.println("  shortest: " + matcher.shortestMatch(tests[i]));
Index: src/java/org/apache/nutch/util/URLUtil.java
===================================================================
--- src/java/org/apache/nutch/util/URLUtil.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/URLUtil.java	(working copy)
@@ -26,37 +26,41 @@
 
 /** Utility class for URL analysis */
 public class URLUtil {
-  
+
   /**
-   * Resolve relative URL-s and fix a few java.net.URL errors
-   * in handling of URLs with embedded params and pure query
-   * targets.
-   * @param base base url
-   * @param target target url (may be relative)
+   * Resolve relative URL-s and fix a few java.net.URL errors in handling of
+   * URLs with embedded params and pure query targets.
+   * 
+   * @param base
+   *          base url
+   * @param target
+   *          target url (may be relative)
    * @return resolved absolute url.
    * @throws MalformedURLException
    */
   public static URL resolveURL(URL base, String target)
-          throws MalformedURLException {
+      throws MalformedURLException {
     target = target.trim();
 
-    /* this is probably not needed anymore - see NUTCH-797.
-    // handle params that are embedded into the base url - move them to target
-    // so URL class constructs the new url class properly
-    if (base.toString().indexOf(';') > 0)
-      return fixEmbeddedParams(base, target);
-    */
-    
+    /*
+     * this is probably not needed anymore - see NUTCH-797. // handle params
+     * that are embedded into the base url - move them to target // so URL class
+     * constructs the new url class properly if (base.toString().indexOf(';') >
+     * 0) return fixEmbeddedParams(base, target);
+     */
+
     // handle the case that there is a target that is a pure query,
     // for example
     // http://careers3.accenture.com/Careers/ASPX/Search.aspx?co=0&sk=0
     // It has urls in the page of the form href="?co=0&sk=0&pg=1", and by
     // default
     // URL constructs the base+target combo as
-    // http://careers3.accenture.com/Careers/ASPX/?co=0&sk=0&pg=1, incorrectly
+    // http://careers3.accenture.com/Careers/ASPX/?co=0&sk=0&pg=1,
+    // incorrectly
     // dropping the Search.aspx target
     //
-    // Browsers handle these just fine, they must have an exception similar to
+    // Browsers handle these just fine, they must have an exception similar
+    // to
     // this
     if (target.startsWith("?")) {
       return fixPureQueryTargets(base, target);
@@ -66,9 +70,10 @@
   }
 
   /** Handle the case in RFC3986 section 5.4.1 example 7, and similar. */
-   static URL fixPureQueryTargets(URL base, String target)
-          throws MalformedURLException {
-    if (!target.startsWith("?")) return new URL(base, target);
+  static URL fixPureQueryTargets(URL base, String target)
+      throws MalformedURLException {
+    if (!target.startsWith("?"))
+      return new URL(base, target);
 
     String basePath = base.getPath();
     String baseRightMost = "";
@@ -77,7 +82,8 @@
       baseRightMost = basePath.substring(baseRightMostIdx + 1);
     }
 
-    if (target.startsWith("?")) target = baseRightMost + target;
+    if (target.startsWith("?"))
+      target = baseRightMost + target;
 
     return new URL(base, target);
   }
@@ -103,7 +109,7 @@
    *           If the url is not a well formed URL.
    */
   private static URL fixEmbeddedParams(URL base, String target)
-          throws MalformedURLException {
+      throws MalformedURLException {
 
     // the target contains params information or the base doesn't then no
     // conversion necessary, return regular URL
@@ -116,12 +122,14 @@
     int startParams = baseURL.indexOf(';');
     String params = baseURL.substring(startParams);
 
-    // if the target has a query string then put the params information after
-    // any path but before the query string, otherwise just append to the path
+    // if the target has a query string then put the params information
+    // after
+    // any path but before the query string, otherwise just append to the
+    // path
     int startQS = target.indexOf('?');
     if (startQS >= 0) {
       target = target.substring(0, startQS) + params
-              + target.substring(startQS);
+          + target.substring(startQS);
     } else {
       target += params;
     }
@@ -129,125 +137,142 @@
     return new URL(base, target);
   }
 
-  private static Pattern IP_PATTERN = Pattern.compile("(\\d{1,3}\\.){3}(\\d{1,3})");
+  private static Pattern IP_PATTERN = Pattern
+      .compile("(\\d{1,3}\\.){3}(\\d{1,3})");
 
-  /** Returns the domain name of the url. The domain name of a url is
-   *  the substring of the url's hostname, w/o subdomain names. As an
-   *  example <br><code>
+  /**
+   * Returns the domain name of the url. The domain name of a url is the
+   * substring of the url's hostname, w/o subdomain names. As an example <br>
+   * <code>
    *  getDomainName(conf, new URL(http://lucene.apache.org/))
    *  </code><br>
-   *  will return <br><code> apache.org</code>
-   *   */
+   * will return <br>
+   * <code> apache.org</code>
+   * */
   public static String getDomainName(URL url) {
     DomainSuffixes tlds = DomainSuffixes.getInstance();
     String host = url.getHost();
-    //it seems that java returns hostnames ending with .
-    if(host.endsWith("."))
+    // it seems that java returns hostnames ending with .
+    if (host.endsWith("."))
       host = host.substring(0, host.length() - 1);
-    if(IP_PATTERN.matcher(host).matches())
+    if (IP_PATTERN.matcher(host).matches())
       return host;
-    
+
     int index = 0;
     String candidate = host;
-    for(;index >= 0;) {
+    for (; index >= 0;) {
       index = candidate.indexOf('.');
-      String subCandidate = candidate.substring(index+1); 
-      if(tlds.isDomainSuffix(subCandidate)) {
-        return candidate; 
+      String subCandidate = candidate.substring(index + 1);
+      if (tlds.isDomainSuffix(subCandidate)) {
+        return candidate;
       }
       candidate = subCandidate;
     }
     return candidate;
   }
 
-  /** Returns the domain name of the url. The domain name of a url is
-   *  the substring of the url's hostname, w/o subdomain names. As an
-   *  example <br><code>
+  /**
+   * Returns the domain name of the url. The domain name of a url is the
+   * substring of the url's hostname, w/o subdomain names. As an example <br>
+   * <code>
    *  getDomainName(conf, new http://lucene.apache.org/)
    *  </code><br>
-   *  will return <br><code> apache.org</code>
+   * will return <br>
+   * <code> apache.org</code>
+   * 
    * @throws MalformedURLException
    */
   public static String getDomainName(String url) throws MalformedURLException {
     return getDomainName(new URL(url));
   }
 
-  /** Returns whether the given urls have the same domain name.
-   * As an example, <br>
+  /**
+   * Returns whether the given urls have the same domain name. As an example, <br>
    * <code> isSameDomain(new URL("http://lucene.apache.org")
    * , new URL("http://people.apache.org/"))
    * <br> will return true. </code>
-   *
+   * 
    * @return true if the domain names are equal
    */
   public static boolean isSameDomainName(URL url1, URL url2) {
     return getDomainName(url1).equalsIgnoreCase(getDomainName(url2));
   }
 
-  /**Returns whether the given urls have the same domain name.
-  * As an example, <br>
-  * <code> isSameDomain("http://lucene.apache.org"
-  * ,"http://people.apache.org/")
-  * <br> will return true. </code>
-  * @return true if the domain names are equal
-  * @throws MalformedURLException
-  */
+  /**
+   * Returns whether the given urls have the same domain name. As an example, <br>
+   * <code> isSameDomain("http://lucene.apache.org"
+   * ,"http://people.apache.org/")
+   * <br> will return true. </code>
+   * 
+   * @return true if the domain names are equal
+   * @throws MalformedURLException
+   */
   public static boolean isSameDomainName(String url1, String url2)
-    throws MalformedURLException {
+      throws MalformedURLException {
     return isSameDomainName(new URL(url1), new URL(url2));
   }
 
-  /** Returns the {@link DomainSuffix} corresponding to the
-   * last public part of the hostname
+  /**
+   * Returns the {@link DomainSuffix} corresponding to the last public part of
+   * the hostname
    */
   public static DomainSuffix getDomainSuffix(URL url) {
     DomainSuffixes tlds = DomainSuffixes.getInstance();
     String host = url.getHost();
-    if(IP_PATTERN.matcher(host).matches())
+    if (IP_PATTERN.matcher(host).matches())
       return null;
-    
+
     int index = 0;
     String candidate = host;
-    for(;index >= 0;) {
+    for (; index >= 0;) {
       index = candidate.indexOf('.');
-      String subCandidate = candidate.substring(index+1);
+      String subCandidate = candidate.substring(index + 1);
       DomainSuffix d = tlds.get(subCandidate);
-      if(d != null) {
-        return d; 
+      if (d != null) {
+        return d;
       }
       candidate = subCandidate;
     }
     return null;
   }
 
-  /** Returns the {@link DomainSuffix} corresponding to the
-   * last public part of the hostname
+  /**
+   * Returns the {@link DomainSuffix} corresponding to the last public part of
+   * the hostname
    */
-  public static DomainSuffix getDomainSuffix(String url) throws MalformedURLException {
+  public static DomainSuffix getDomainSuffix(String url)
+      throws MalformedURLException {
     return getDomainSuffix(new URL(url));
   }
 
-  /** Partitions of the hostname of the url by "."  */
+  /** Partitions of the hostname of the url by "." */
   public static String[] getHostSegments(URL url) {
     String host = url.getHost();
-    //return whole hostname, if it is an ipv4
-    //TODO : handle ipv6
-    if(IP_PATTERN.matcher(host).matches())
-      return new String[] {host};
+    // return whole hostname, if it is an ipv4
+    // TODO : handle ipv6
+    if (IP_PATTERN.matcher(host).matches())
+      return new String[] { host };
     return host.split("\\.");
   }
 
-  /** Partitions of the hostname of the url by "."
-   * @throws MalformedURLException */
-  public static String[] getHostSegments(String url) throws MalformedURLException {
-   return getHostSegments(new URL(url));
+  /**
+   * Partitions of the hostname of the url by "."
+   * 
+   * @throws MalformedURLException
+   */
+  public static String[] getHostSegments(String url)
+      throws MalformedURLException {
+    return getHostSegments(new URL(url));
   }
 
   /**
-   * <p>Given two urls, a src and a destination of a redirect, it returns the 
-   * representative url.<p>
+   * <p>
+   * Given two urls, a src and a destination of a redirect, it returns the
+   * representative url.
+   * <p>
    * 
-   * <p>This method implements an extended version of the algorithm used by the
+   * <p>
+   * This method implements an extended version of the algorithm used by the
    * Yahoo! Slurp crawler described here:<br>
    * <a href=
    * "http://help.yahoo.com/l/nz/yahooxtra/search/webcrawler/slurp-11.html"> How
@@ -255,46 +280,63 @@
    * <br>
    * <ol>
    * <li>Choose target url if either url is malformed.</li>
-   * <li>If different domains the keep the destination whether or not the 
+   * <li>If different domains the keep the destination whether or not the
    * redirect is temp or perm</li>
-   * <ul><li>a.com -> b.com*</li></ul>
+   * <ul>
+   * <li>a.com -> b.com*</li>
+   * </ul>
    * <li>If the redirect is permanent and the source is root, keep the source.</li>
-   * <ul><li>*a.com -> a.com?y=1 || *a.com -> a.com/xyz/index.html</li></ul>
-   * <li>If the redirect is permanent and the source is not root and the 
+   * <ul>
+   * <li>*a.com -> a.com?y=1 || *a.com -> a.com/xyz/index.html</li>
+   * </ul>
+   * <li>If the redirect is permanent and the source is not root and the
    * destination is root, keep the destination</li>
-   * <ul><li>a.com/xyz/index.html -> a.com*</li></ul>
+   * <ul>
+   * <li>a.com/xyz/index.html -> a.com*</li>
+   * </ul>
    * <li>If the redirect is permanent and neither the source nor the destination
    * is root, then keep the destination</li>
-   * <ul><li>a.com/xyz/index.html -> a.com/abc/page.html*</li></ul>
+   * <ul>
+   * <li>a.com/xyz/index.html -> a.com/abc/page.html*</li>
+   * </ul>
    * <li>If the redirect is temporary and source is root and destination is not
    * root, then keep the source</li>
-   * <ul><li>*a.com -> a.com/xyz/index.html</li></ul>
+   * <ul>
+   * <li>*a.com -> a.com/xyz/index.html</li>
+   * </ul>
    * <li>If the redirect is temporary and source is not root and destination is
    * root, then keep the destination</li>
-   * <ul><li>a.com/xyz/index.html -> a.com*</li></ul>
+   * <ul>
+   * <li>a.com/xyz/index.html -> a.com*</li>
+   * </ul>
    * <li>If the redirect is temporary and neither the source or the destination
-   * is root, then keep the shortest url.  First check for the shortest host,
-   * and if both are equal then check by path.  Path is first by length then by
-   * the number of / path separators.</li>
+   * is root, then keep the shortest url. First check for the shortest host, and
+   * if both are equal then check by path. Path is first by length then by the
+   * number of / path separators.</li>
    * <ul>
    * <li>a.com/xyz/index.html -> a.com/abc/page.html*</li>
    * <li>*www.a.com/xyz/index.html -> www.news.a.com/xyz/index.html</li>
    * </ul>
    * <li>If the redirect is temporary and both the source and the destination
    * are root, then keep the shortest sub-domain</li>
-   * <ul><li>*www.a.com -> www.news.a.com</li></ul>
+   * <ul>
+   * <li>*www.a.com -> www.news.a.com</li>
+   * </ul>
    * <br>
-   * While not in this logic there is a further piece of representative url 
-   * logic that occurs during indexing and after scoring.  During creation of 
-   * the basic fields before indexing, if a url has a representative url stored
-   * we check both the url and its representative url (which should never be 
-   * the same) against their linkrank scores and the highest scoring one is 
-   * kept as the url and the lower scoring one is held as the orig url inside 
-   * of the index.
+   * While not in this logic there is a further piece of representative url
+   * logic that occurs during indexing and after scoring. During creation of the
+   * basic fields before indexing, if a url has a representative url stored we
+   * check both the url and its representative url (which should never be the
+   * same) against their linkrank scores and the highest scoring one is kept as
+   * the url and the lower scoring one is held as the orig url inside of the
+   * index.
    * 
-   * @param src The source url.
-   * @param dst The destination url.
-   * @param temp Is the redirect a temporary redirect.
+   * @param src
+   *          The source url.
+   * @param dst
+   *          The destination url.
+   * @param temp
+   *          Is the redirect a temporary redirect.
    * 
    * @return String The representative url.
    */
@@ -306,8 +348,7 @@
     try {
       srcUrl = new URL(src);
       dstUrl = new URL(dst);
-    }
-    catch (MalformedURLException e) {
+    } catch (MalformedURLException e) {
       return dst;
     }
 
@@ -325,27 +366,28 @@
 
     // 1) different domain them keep dest, temp or perm
     // a.com -> b.com*
-    //    
+    //
     // 2) permanent and root, keep src
     // *a.com -> a.com?y=1 || *a.com -> a.com/xyz/index.html
-    //      
+    //
     // 3) permanent and not root and dest root, keep dest
     // a.com/xyz/index.html -> a.com*
-    //      
+    //
     // 4) permanent and neither root keep dest
     // a.com/xyz/index.html -> a.com/abc/page.html*
-    //      
+    //
     // 5) temp and root and dest not root keep src
     // *a.com -> a.com/xyz/index.html
-    //  
+    //
     // 7) temp and not root and dest root keep dest
     // a.com/xyz/index.html -> a.com*
-    //  
-    // 8) temp and neither root, keep shortest, if hosts equal by path else by
+    //
+    // 8) temp and neither root, keep shortest, if hosts equal by path else
+    // by
     // hosts. paths are first by length then by number of / separators
     // a.com/xyz/index.html -> a.com/abc/page.html*
     // *www.a.com/xyz/index.html -> www.news.a.com/xyz/index.html
-    //  
+    //
     // 9) temp and both root keep shortest sub domain
     // *www.a.com -> www.news.a.com
 
@@ -357,39 +399,35 @@
 
     // if it is a permanent redirect
     if (!temp) {
-      
+
       // if source is root return source, otherwise destination
       if (srcRoot) {
         return src;
-      }
-      else {
+      } else {
         return dst;
       }
-    }
-    else { // temporary redirect
+    } else { // temporary redirect
 
       // source root and destination not root
       if (srcRoot && !destRoot) {
         return src;
-      }
-      else if (!srcRoot && destRoot) { // destination root and source not
+      } else if (!srcRoot && destRoot) { // destination root and source
+        // not
         return dst;
-      }
-      else if (!srcRoot && !destRoot && (srcHost.equals(dstHost))) {
+      } else if (!srcRoot && !destRoot && (srcHost.equals(dstHost))) {
 
-        // source and destination hosts are the same, check paths, host length
+        // source and destination hosts are the same, check paths, host
+        // length
         int numSrcPaths = srcFile.split("/").length;
         int numDstPaths = dstFile.split("/").length;
         if (numSrcPaths != numDstPaths) {
           return (numDstPaths < numSrcPaths ? dst : src);
-        }
-        else {
+        } else {
           int srcPathLength = srcFile.length();
           int dstPathLength = dstFile.length();
           return (dstPathLength < srcPathLength ? dst : src);
         }
-      }
-      else {
+      } else {
 
         // different host names and both root take the shortest
         int numSrcSubs = srcHost.split("\\.").length;
@@ -403,51 +441,51 @@
    * Returns the lowercased hostname for the url or null if the url is not well
    * formed.
    * 
-   * @param url The url to check.
+   * @param url
+   *          The url to check.
    * @return String The hostname for the url.
    */
   public static String getHost(String url) {
     try {
       return new URL(url).getHost().toLowerCase();
-    }
-    catch (MalformedURLException e) {
+    } catch (MalformedURLException e) {
       return null;
     }
   }
 
   /**
-   * Returns the page for the url.  The page consists of the protocol, host,
-   * and path, but does not include the query string.  The host is lowercased
-   * but the path is not.
+   * Returns the page for the url. The page consists of the protocol, host, and
+   * path, but does not include the query string. The host is lowercased but the
+   * path is not.
    * 
-   * @param url The url to check.
+   * @param url
+   *          The url to check.
    * @return String The page for the url.
    */
   public static String getPage(String url) {
     try {
-      // get the full url, and replace the query string with and empty string
+      // get the full url, and replace the query string with and empty
+      // string
       url = url.toLowerCase();
       String queryStr = new URL(url).getQuery();
       return (queryStr != null) ? url.replace("?" + queryStr, "") : url;
-    }
-    catch (MalformedURLException e) {
+    } catch (MalformedURLException e) {
       return null;
     }
   }
-  
+
   /** For testing */
-  public static void main(String[] args){
-    
-    if(args.length!=1) {
+  public static void main(String[] args) {
+
+    if (args.length != 1) {
       System.err.println("Usage : URLUtil <url>");
-      return ;
+      return;
     }
-    
+
     String url = args[0];
     try {
       System.out.println(URLUtil.getDomainName(new URL(url)));
-    }
-    catch (MalformedURLException ex) {
+    } catch (MalformedURLException ex) {
       ex.printStackTrace();
     }
   }
Index: src/java/org/apache/nutch/util/HadoopFSUtil.java
===================================================================
--- src/java/org/apache/nutch/util/HadoopFSUtil.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/HadoopFSUtil.java	(working copy)
@@ -25,48 +25,48 @@
 
 public class HadoopFSUtil {
 
-    /**
-     * Returns PathFilter that passes all paths through.
-     */
-    public static PathFilter getPassAllFilter() {
-        return new PathFilter() {
-            public boolean accept(Path arg0) {
-                return true;
-            }
-        };
-    }
+  /**
+   * Returns PathFilter that passes all paths through.
+   */
+  public static PathFilter getPassAllFilter() {
+    return new PathFilter() {
+      public boolean accept(Path arg0) {
+        return true;
+      }
+    };
+  }
 
-    /**
-     * Returns PathFilter that passes directories through.
-     */
-    public static PathFilter getPassDirectoriesFilter(final FileSystem fs) {
-        return new PathFilter() {
-            public boolean accept(final Path path) {
-                try {
-                    return fs.getFileStatus(path).isDir();
-                } catch (IOException ioe) {
-                    return false;
-                }
-            }
+  /**
+   * Returns PathFilter that passes directories through.
+   */
+  public static PathFilter getPassDirectoriesFilter(final FileSystem fs) {
+    return new PathFilter() {
+      public boolean accept(final Path path) {
+        try {
+          return fs.getFileStatus(path).isDir();
+        } catch (IOException ioe) {
+          return false;
+        }
+      }
 
-        };
+    };
+  }
+
+  /**
+   * Turns an array of FileStatus into an array of Paths.
+   */
+  public static Path[] getPaths(FileStatus[] stats) {
+    if (stats == null) {
+      return null;
     }
-    
-    /**
-     * Turns an array of FileStatus into an array of Paths.
-     */
-    public static Path[] getPaths(FileStatus[] stats) {
-      if (stats == null) {
-        return null;
-      }
-      if (stats.length == 0) {
-        return new Path[0];
-      }
-      Path[] res = new Path[stats.length];
-      for (int i = 0; i < stats.length; i++) {
-        res[i] = stats[i].getPath();
-      }
-      return res;
+    if (stats.length == 0) {
+      return new Path[0];
     }
+    Path[] res = new Path[stats.length];
+    for (int i = 0; i < stats.length; i++) {
+      res[i] = stats[i].getPath();
+    }
+    return res;
+  }
 
 }
Index: src/java/org/apache/nutch/util/StringUtil.java
===================================================================
--- src/java/org/apache/nutch/util/StringUtil.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/StringUtil.java	(working copy)
@@ -18,42 +18,42 @@
 package org.apache.nutch.util;
 
 /**
- * A collection of String processing utility methods. 
+ * A collection of String processing utility methods.
  */
 public class StringUtil {
 
   /**
-   * Returns a copy of <code>s</code> padded with trailing spaces so
-   * that it's length is <code>length</code>.  Strings already
-   * <code>length</code> characters long or longer are not altered.
+   * Returns a copy of <code>s</code> padded with trailing spaces so that it's
+   * length is <code>length</code>. Strings already <code>length</code>
+   * characters long or longer are not altered.
    */
   public static String rightPad(String s, int length) {
-    StringBuffer sb= new StringBuffer(s);
-    for (int i= length - s.length(); i > 0; i--) 
+    StringBuffer sb = new StringBuffer(s);
+    for (int i = length - s.length(); i > 0; i--)
       sb.append(" ");
     return sb.toString();
   }
 
   /**
-   * Returns a copy of <code>s</code> padded with leading spaces so
-   * that it's length is <code>length</code>.  Strings already
-   * <code>length</code> characters long or longer are not altered.
+   * Returns a copy of <code>s</code> padded with leading spaces so that it's
+   * length is <code>length</code>. Strings already <code>length</code>
+   * characters long or longer are not altered.
    */
   public static String leftPad(String s, int length) {
-    StringBuffer sb= new StringBuffer();
-    for (int i= length - s.length(); i > 0; i--) 
+    StringBuffer sb = new StringBuffer();
+    for (int i = length - s.length(); i > 0; i--)
       sb.append(" ");
     sb.append(s);
     return sb.toString();
   }
 
+  private static final char[] HEX_DIGITS = { '0', '1', '2', '3', '4', '5', '6',
+      '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
 
-  private static final char[] HEX_DIGITS =
-  {'0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'};
-
   /**
    * Convenience call for {@link #toHexString(byte[], String, int)}, where
    * <code>sep = null; lineLen = Integer.MAX_VALUE</code>.
+   * 
    * @param buf
    */
   public static String toHexString(byte[] buf) {
@@ -63,37 +63,48 @@
   /**
    * Get a text representation of a byte[] as hexadecimal String, where each
    * pair of hexadecimal digits corresponds to consecutive bytes in the array.
-   * @param buf input data
-   * @param sep separate every pair of hexadecimal digits with this separator, or
-   * null if no separation is needed.
-   * @param lineLen break the output String into lines containing output for lineLen
-   * bytes.
+   * 
+   * @param buf
+   *          input data
+   * @param sep
+   *          separate every pair of hexadecimal digits with this separator, or
+   *          null if no separation is needed.
+   * @param lineLen
+   *          break the output String into lines containing output for lineLen
+   *          bytes.
    */
   public static String toHexString(byte[] buf, String sep, int lineLen) {
-    if (buf == null) return null;
-    if (lineLen <= 0) lineLen = Integer.MAX_VALUE;
+    if (buf == null)
+      return null;
+    if (lineLen <= 0)
+      lineLen = Integer.MAX_VALUE;
     StringBuffer res = new StringBuffer(buf.length * 2);
     for (int i = 0; i < buf.length; i++) {
       int b = buf[i];
       res.append(HEX_DIGITS[(b >> 4) & 0xf]);
       res.append(HEX_DIGITS[b & 0xf]);
-      if (i > 0 && (i % lineLen) == 0) res.append('\n');
-      else if (sep != null && i < lineLen - 1) res.append(sep); 
+      if (i > 0 && (i % lineLen) == 0)
+        res.append('\n');
+      else if (sep != null && i < lineLen - 1)
+        res.append(sep);
     }
     return res.toString();
   }
-  
+
   /**
    * Convert a String containing consecutive (no inside whitespace) hexadecimal
-   * digits into a corresponding byte array. If the number of digits is not even,
-   * a '0' will be appended in the front of the String prior to conversion.
-   * Leading and trailing whitespace is ignored.
-   * @param text input text
+   * digits into a corresponding byte array. If the number of digits is not
+   * even, a '0' will be appended in the front of the String prior to
+   * conversion. Leading and trailing whitespace is ignored.
+   * 
+   * @param text
+   *          input text
    * @return converted byte array, or null if unable to convert
    */
   public static byte[] fromHexString(String text) {
     text = text.trim();
-    if (text.length() % 2 != 0) text = "0" + text;
+    if (text.length() % 2 != 0)
+      text = "0" + text;
     int resLen = text.length() / 2;
     int loNibble, hiNibble;
     byte[] res = new byte[resLen];
@@ -101,12 +112,13 @@
       int j = i << 1;
       hiNibble = charToNibble(text.charAt(j));
       loNibble = charToNibble(text.charAt(j + 1));
-      if (loNibble == -1 || hiNibble == -1) return null;
-      res[i] = (byte)(hiNibble << 4 | loNibble);
+      if (loNibble == -1 || hiNibble == -1)
+        return null;
+      res[i] = (byte) (hiNibble << 4 | loNibble);
     }
     return res;
   }
-  
+
   private static final int charToNibble(char c) {
     if (c >= '0' && c <= '9') {
       return c - '0';
@@ -129,8 +141,8 @@
   public static void main(String[] args) {
     if (args.length != 1)
       System.out.println("Usage: StringUtil <encoding name>");
-    else 
-      System.out.println(args[0] + " is resolved to " +
-                         EncodingDetector.resolveEncodingAlias(args[0]));
+    else
+      System.out.println(args[0] + " is resolved to "
+          + EncodingDetector.resolveEncodingAlias(args[0]));
   }
 }
Index: src/java/org/apache/nutch/util/CommandRunner.java
===================================================================
--- src/java/org/apache/nutch/util/CommandRunner.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/CommandRunner.java	(working copy)
@@ -82,11 +82,11 @@
   }
 
   public void evaluate() throws IOException {
-      this.exec();
+    this.exec();
   }
 
   /**
-   *
+   * 
    * @return process exit value (return code) or -1 if timed out.
    * @throws IOException
    */
@@ -94,13 +94,11 @@
     Process proc = Runtime.getRuntime().exec(_command);
     _barrier = new CyclicBarrier(3 + ((_stdin != null) ? 1 : 0));
 
-    PullerThread so =
-      new PullerThread("STDOUT", proc.getInputStream(), _stdout);
+    PullerThread so = new PullerThread("STDOUT", proc.getInputStream(), _stdout);
     so.setDaemon(true);
     so.start();
 
-    PullerThread se =
-      new PullerThread("STDERR", proc.getErrorStream(), _stderr);
+    PullerThread se = new PullerThread("STDERR", proc.getErrorStream(), _stderr);
     se.setDaemon(true);
     se.start();
 
@@ -145,11 +143,12 @@
             Thread.sleep(1000);
             _xit = proc.exitValue();
           } catch (InterruptedException ie) {
-              if (Thread.interrupted()) {
-                  break; // stop waiting on an interrupt for this thread
-              } else {
-                  continue;
-              }
+            if (Thread.interrupted()) {
+              break; // stop waiting on an interrupt for this
+              // thread
+            } else {
+              continue;
+            }
           } catch (IllegalThreadStateException iltse) {
             continue;
           }
@@ -181,11 +180,8 @@
 
     private boolean _closeInput;
 
-    protected PumperThread(
-      String name,
-      InputStream is,
-      OutputStream os,
-      boolean closeInput) {
+    protected PumperThread(String name, InputStream is, OutputStream os,
+        boolean closeInput) {
       super(name);
       _is = is;
       _os = os;
@@ -218,12 +214,12 @@
         }
       }
       try {
-         _barrier.await();
-       } catch (InterruptedException ie) {
-         /* IGNORE */
-       } catch (BrokenBarrierException bbe) {
-         /* IGNORE */
-       }
+        _barrier.await();
+      } catch (InterruptedException ie) {
+        /* IGNORE */
+      } catch (BrokenBarrierException bbe) {
+        /* IGNORE */
+      }
     }
   }
 
@@ -269,8 +265,9 @@
 
     for (int i = 0; i < args.length; i++) {
       if (args[i].equals("-timeout")) {
-        timeout = Integer.parseInt(args[++i]);;
-      } else if (i != args.length-2) {
+        timeout = Integer.parseInt(args[++i]);
+        ;
+      } else if (i != args.length - 2) {
         System.err.println(usage);
         System.exit(-1);
       } else {
@@ -290,6 +287,6 @@
 
     cr.evaluate();
 
-    System.err.println("output value: "+cr.getExitValue());
+    System.err.println("output value: " + cr.getExitValue());
   }
 }
Index: src/java/org/apache/nutch/util/NutchConfiguration.java
===================================================================
--- src/java/org/apache/nutch/util/NutchConfiguration.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/NutchConfiguration.java	(working copy)
@@ -23,37 +23,42 @@
 
 import org.apache.hadoop.conf.Configuration;
 
-
-/** Utility to create Hadoop {@link Configuration}s that include Nutch-specific
- * resources.  */
+/**
+ * Utility to create Hadoop {@link Configuration}s that include Nutch-specific
+ * resources.
+ */
 public class NutchConfiguration {
   public static final String UUID_KEY = "nutch.conf.uuid";
-  
-  private NutchConfiguration() {}                 // singleton
-  
+
+  private NutchConfiguration() {
+  } // singleton
+
   /*
-   * Configuration.hashCode() doesn't return values that
-   * correspond to a unique set of parameters. This is a workaround
-   * so that we can track instances of Configuration created by Nutch.
+   * Configuration.hashCode() doesn't return values that correspond to a unique
+   * set of parameters. This is a workaround so that we can track instances of
+   * Configuration created by Nutch.
    */
   private static void setUUID(Configuration conf) {
     UUID uuid = UUID.randomUUID();
     conf.set(UUID_KEY, uuid.toString());
   }
-  
+
   /**
-   * Retrieve a Nutch UUID of this configuration object, or null
-   * if the configuration was created elsewhere.
-   * @param conf configuration instance
+   * Retrieve a Nutch UUID of this configuration object, or null if the
+   * configuration was created elsewhere.
+   * 
+   * @param conf
+   *          configuration instance
    * @return uuid or null
    */
   public static String getUUID(Configuration conf) {
     return conf.get(UUID_KEY);
   }
 
-  /** Create a {@link Configuration} for Nutch. This will load the standard
-   * Nutch resources, <code>nutch-default.xml</code> and
-   * <code>nutch-site.xml</code> overrides.
+  /**
+   * Create a {@link Configuration} for Nutch. This will load the standard Nutch
+   * resources, <code>nutch-default.xml</code> and <code>nutch-site.xml</code>
+   * overrides.
    */
   public static Configuration create() {
     Configuration conf = new Configuration();
@@ -61,14 +66,19 @@
     addNutchResources(conf);
     return conf;
   }
-  
-  /** Create a {@link Configuration} from supplied properties.
-   * @param addNutchResources if true, then first <code>nutch-default.xml</code>,
-   * and then <code>nutch-site.xml</code> will be loaded prior to applying the
-   * properties. Otherwise these resources won't be used.
-   * @param nutchProperties a set of properties to define (or override)
+
+  /**
+   * Create a {@link Configuration} from supplied properties.
+   * 
+   * @param addNutchResources
+   *          if true, then first <code>nutch-default.xml</code>, and then
+   *          <code>nutch-site.xml</code> will be loaded prior to applying the
+   *          properties. Otherwise these resources won't be used.
+   * @param nutchProperties
+   *          a set of properties to define (or override)
    */
-  public static Configuration create(boolean addNutchResources, Properties nutchProperties) {
+  public static Configuration create(boolean addNutchResources,
+      Properties nutchProperties) {
     Configuration conf = new Configuration();
     setUUID(conf);
     if (addNutchResources) {
@@ -83,8 +93,8 @@
   /**
    * Add the standard Nutch resources to {@link Configuration}.
    * 
-   * @param conf               Configuration object to which
-   *                           configuration is to be added.
+   * @param conf
+   *          Configuration object to which configuration is to be added.
    */
   private static Configuration addNutchResources(Configuration conf) {
     conf.addResource("nutch-default.xml");
@@ -92,4 +102,3 @@
     return conf;
   }
 }
-
Index: src/java/org/apache/nutch/util/NutchJob.java
===================================================================
--- src/java/org/apache/nutch/util/NutchJob.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/NutchJob.java	(working copy)
@@ -20,7 +20,7 @@
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.mapred.JobConf;
 
-/** A {@link JobConf} for Nutch jobs.  */
+/** A {@link JobConf} for Nutch jobs. */
 public class NutchJob extends JobConf {
 
   public NutchJob(Configuration conf) {
@@ -28,4 +28,3 @@
   }
 
 }
-
Index: src/java/org/apache/nutch/util/DomUtil.java
===================================================================
--- src/java/org/apache/nutch/util/DomUtil.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/DomUtil.java	(working copy)
@@ -38,7 +38,6 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-
 public class DomUtil {
 
   private final static Logger LOG = LoggerFactory.getLogger(DomUtil.class);
@@ -61,10 +60,10 @@
       input.setEncoding("UTF-8");
       parser.parse(input);
       int i = 0;
-      while (! (parser.getDocument().getChildNodes().item(i) instanceof Element)) {
-       i++;
-      } 
-      element = (Element)parser.getDocument().getChildNodes().item(i);
+      while (!(parser.getDocument().getChildNodes().item(i) instanceof Element)) {
+        i++;
+      }
+      element = (Element) parser.getDocument().getChildNodes().item(i);
     } catch (FileNotFoundException e) {
       e.printStackTrace(LogUtil.getWarnStream(LOG));
     } catch (SAXException e) {
Index: src/java/org/apache/nutch/util/LogUtil.java
===================================================================
--- src/java/org/apache/nutch/util/LogUtil.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/LogUtil.java	(working copy)
@@ -26,10 +26,9 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-
 /**
  * Utility class for logging.
- *
+ * 
  * @author J&eacute;r&ocirc;me Charron
  */
 public class LogUtil {
@@ -38,8 +37,8 @@
 
   private static Method TRACE = null;
   private static Method DEBUG = null;
-  private static Method INFO  = null;
-  private static Method WARN  = null;
+  private static Method INFO = null;
+  private static Method WARN = null;
   private static Method ERROR = null;
   private static Method FATAL = null;
 
@@ -47,18 +46,17 @@
     try {
       TRACE = Logger.class.getMethod("trace", new Class[] { String.class });
       DEBUG = Logger.class.getMethod("debug", new Class[] { String.class });
-      INFO  = Logger.class.getMethod("info",  new Class[] { String.class });
-      WARN  = Logger.class.getMethod("warn",  new Class[] { String.class });
+      INFO = Logger.class.getMethod("info", new Class[] { String.class });
+      WARN = Logger.class.getMethod("warn", new Class[] { String.class });
       ERROR = Logger.class.getMethod("error", new Class[] { String.class });
       FATAL = Logger.class.getMethod("error", new Class[] { String.class });
-    } catch(Exception e) {
+    } catch (Exception e) {
       if (LOG.isErrorEnabled()) {
         LOG.error("Cannot init log methods", e);
       }
     }
   }
-  
-  
+
   public static PrintStream getTraceStream(final Logger logger) {
     return getLogStream(logger, TRACE);
   }
@@ -70,7 +68,7 @@
   public static PrintStream getInfoStream(final Logger logger) {
     return getLogStream(logger, INFO);
   }
-  
+
   public static PrintStream getWarnStream(final Logger logger) {
     return getLogStream(logger, WARN);
   }
@@ -82,34 +80,35 @@
   public static PrintStream getFatalStream(final Logger logger) {
     return getLogStream(logger, FATAL);
   }
-  
+
   /** Returns a stream that, when written to, adds log lines. */
-  private static PrintStream getLogStream(final Logger logger, final Method method) {
+  private static PrintStream getLogStream(final Logger logger,
+      final Method method) {
     return new PrintStream(new ByteArrayOutputStream() {
-        private int scan = 0;
+      private int scan = 0;
 
-        private boolean hasNewline() {
-          for (; scan < count; scan++) {
-            if (buf[scan] == '\n')
-              return true;
-          }
-          return false;
+      private boolean hasNewline() {
+        for (; scan < count; scan++) {
+          if (buf[scan] == '\n')
+            return true;
         }
+        return false;
+      }
 
-        public void flush() throws IOException {
-          if (!hasNewline())
-            return;
-          try {
-            method.invoke(logger, new String[] { toString().trim() });
-          } catch (Exception e) {
-            if (LOG.isErrorEnabled()) {
-              LOG.error("Cannot log with method [" + method + "]", e);
-            }
+      public void flush() throws IOException {
+        if (!hasNewline())
+          return;
+        try {
+          method.invoke(logger, new String[] { toString().trim() });
+        } catch (Exception e) {
+          if (LOG.isErrorEnabled()) {
+            LOG.error("Cannot log with method [" + method + "]", e);
           }
-          reset();
-          scan = 0;
         }
-      }, true);
+        reset();
+        scan = 0;
+      }
+    }, true);
   }
 
 }
Index: src/java/org/apache/nutch/util/EncodingDetector.java
===================================================================
--- src/java/org/apache/nutch/util/EncodingDetector.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/EncodingDetector.java	(working copy)
@@ -40,27 +40,26 @@
 
 /**
  * A simple class for detecting character encodings.
- *
+ * 
  * <p>
  * Broadly this encompasses two functions, which are distinctly separate:
- *
+ * 
  * <ol>
- *  <li>Auto detecting a set of "clues" from input text.</li>
- *  <li>Taking a set of clues and making a "best guess" as to the
- *      "real" encoding.</li>
+ * <li>Auto detecting a set of "clues" from input text.</li>
+ * <li>Taking a set of clues and making a "best guess" as to the "real"
+ * encoding.</li>
  * </ol>
  * </p>
- *
+ * 
  * <p>
- * A caller will often have some extra information about what the
- * encoding might be (e.g. from the HTTP header or HTML meta-tags, often
- * wrong but still potentially useful clues). The types of clues may differ
- * from caller to caller. Thus a typical calling sequence is:
+ * A caller will often have some extra information about what the encoding might
+ * be (e.g. from the HTTP header or HTML meta-tags, often wrong but still
+ * potentially useful clues). The types of clues may differ from caller to
+ * caller. Thus a typical calling sequence is:
  * <ul>
- *    <li>Run step (1) to generate a set of auto-detected clues;</li>
- *    <li>Combine these clues with the caller-dependent "extra clues"
- *        available;</li>
- *    <li>Run step (2) to guess what the most probable answer is.</li>
+ * <li>Run step (1) to generate a set of auto-detected clues;</li>
+ * <li>Combine these clues with the caller-dependent "extra clues" available;</li>
+ * <li>Run step (2) to guess what the most probable answer is.</li>
  * </p>
  */
 public class EncodingDetector {
@@ -90,34 +89,32 @@
     }
 
     public String toString() {
-      return value + " (" + source +
-           ((confidence >= 0) ? ", " + confidence + "% confidence" : "") + ")";
+      return value + " (" + source
+          + ((confidence >= 0) ? ", " + confidence + "% confidence" : "") + ")";
     }
 
     public boolean isEmpty() {
-      return (value==null || "".equals(value));
+      return (value == null || "".equals(value));
     }
 
     public boolean meetsThreshold() {
-      return (confidence < 0 ||
-               (minConfidence >= 0 && confidence >= minConfidence));
+      return (confidence < 0 || (minConfidence >= 0 && confidence >= minConfidence));
     }
   }
 
-  public static final Logger LOG = LoggerFactory.getLogger(EncodingDetector.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(EncodingDetector.class);
 
   public static final int NO_THRESHOLD = -1;
 
-  public static final String MIN_CONFIDENCE_KEY =
-    "encodingdetector.charset.min.confidence";
+  public static final String MIN_CONFIDENCE_KEY = "encodingdetector.charset.min.confidence";
 
-  private static final HashMap<String, String> ALIASES =
-    new HashMap<String, String>();
+  private static final HashMap<String, String> ALIASES = new HashMap<String, String>();
 
   private static final HashSet<String> DETECTABLES = new HashSet<String>();
 
   // CharsetDetector will die without a minimum amount of data.
-  private static final int MIN_LENGTH=4;
+  private static final int MIN_LENGTH = 4;
 
   static {
     DETECTABLES.add("text/html");
@@ -130,23 +127,22 @@
     DETECTABLES.add("application/rss+xml");
     DETECTABLES.add("application/xhtml+xml");
     /*
-     * the following map is not an alias mapping table, but
-     * maps character encodings which are often used in mislabelled
-     * documents to their correct encodings. For instance,
-     * there are a lot of documents labelled 'ISO-8859-1' which contain
-     * characters not covered by ISO-8859-1 but covered by windows-1252.
-     * Because windows-1252 is a superset of ISO-8859-1 (sharing code points
-     * for the common part), it's better to treat ISO-8859-1 as
-     * synonymous with windows-1252 than to reject, as invalid, documents
-     * labelled as ISO-8859-1 that have characters outside ISO-8859-1.
+     * the following map is not an alias mapping table, but maps character
+     * encodings which are often used in mislabelled documents to their correct
+     * encodings. For instance, there are a lot of documents labelled
+     * 'ISO-8859-1' which contain characters not covered by ISO-8859-1 but
+     * covered by windows-1252. Because windows-1252 is a superset of ISO-8859-1
+     * (sharing code points for the common part), it's better to treat
+     * ISO-8859-1 as synonymous with windows-1252 than to reject, as invalid,
+     * documents labelled as ISO-8859-1 that have characters outside ISO-8859-1.
      */
     ALIASES.put("ISO-8859-1", "windows-1252");
     ALIASES.put("EUC-KR", "x-windows-949");
     ALIASES.put("x-EUC-CN", "GB18030");
     ALIASES.put("GBK", "GB18030");
-    //ALIASES.put("Big5", "Big5HKSCS");
-    //ALIASES.put("TIS620", "Cp874");
-    //ALIASES.put("ISO-8859-11", "Cp874");
+    // ALIASES.put("Big5", "Big5HKSCS");
+    // ALIASES.put("TIS620", "Cp874");
+    // ALIASES.put("ISO-8859-11", "Cp874");
 
   }
 
@@ -190,8 +186,9 @@
     }
 
     // add character encoding coming from HTTP response header
-    addClue(parseCharacterEncoding(
-        content.getMetadata().get(Response.CONTENT_TYPE)), "header");
+    addClue(
+        parseCharacterEncoding(content.getMetadata().get(Response.CONTENT_TYPE)),
+        "header");
   }
 
   public void addClue(String value, String source, int confidence) {
@@ -210,21 +207,23 @@
 
   /**
    * Guess the encoding with the previously specified list of clues.
-   *
-   * @param content Content instance
-   * @param defaultValue Default encoding to return if no encoding can be
-   * detected with enough confidence. Note that this will <b>not</b> be
-   * normalized with {@link EncodingDetector#resolveEncodingAlias}
-   *
+   * 
+   * @param content
+   *          Content instance
+   * @param defaultValue
+   *          Default encoding to return if no encoding can be detected with
+   *          enough confidence. Note that this will <b>not</b> be normalized
+   *          with {@link EncodingDetector#resolveEncodingAlias}
+   * 
    * @return Guessed encoding or defaultValue
    */
   public String guessEncoding(Content content, String defaultValue) {
     /*
-     * This algorithm could be replaced by something more sophisticated;
-     * ideally we would gather a bunch of data on where various clues
-     * (autodetect, HTTP headers, HTML meta tags, etc.) disagree, tag each with
-     * the correct answer, and use machine learning/some statistical method
-     * to generate a better heuristic.
+     * This algorithm could be replaced by something more sophisticated; ideally
+     * we would gather a bunch of data on where various clues (autodetect, HTTP
+     * headers, HTML meta tags, etc.) disagree, tag each with the correct
+     * answer, and use machine learning/some statistical method to generate a
+     * better heuristic.
      */
 
     String base = content.getBaseUrl();
@@ -234,10 +233,9 @@
     }
 
     /*
-     * Go down the list of encoding "clues". Use a clue if:
-     *  1. Has a confidence value which meets our confidence threshold, OR
-     *  2. Doesn't meet the threshold, but is the best try,
-     *     since nothing else is available.
+     * Go down the list of encoding "clues". Use a clue if: 1. Has a confidence
+     * value which meets our confidence threshold, OR 2. Doesn't meet the
+     * threshold, but is the best try, since nothing else is available.
      */
     EncodingClue defaultClue = new EncodingClue(defaultValue, "default");
     EncodingClue bestClue = defaultClue;
@@ -249,8 +247,8 @@
       String charset = clue.value;
       if (minConfidence >= 0 && clue.confidence >= minConfidence) {
         if (LOG.isTraceEnabled()) {
-          LOG.trace(base + ": Choosing encoding: " + charset +
-                    " with confidence " + clue.confidence);
+          LOG.trace(base + ": Choosing encoding: " + charset
+              + " with confidence " + clue.confidence);
         }
         return resolveEncodingAlias(charset).toLowerCase();
       } else if (clue.confidence == NO_THRESHOLD && bestClue == defaultClue) {
@@ -270,10 +268,10 @@
   }
 
   /*
-   * Strictly for analysis, look for "disagreements." The top guess from
-   * each source is examined; if these meet the threshold and disagree, then
-   * we log the information -- useful for testing or generating training data
-   * for a better heuristic.
+   * Strictly for analysis, look for "disagreements." The top guess from each
+   * source is examined; if these meet the threshold and disagree, then we log
+   * the information -- useful for testing or generating training data for a
+   * better heuristic.
    */
   private void findDisagreements(String url, List<EncodingClue> newClues) {
     HashSet<String> valsSeen = new HashSet<String>();
@@ -295,9 +293,9 @@
     if (disagreement) {
       // dump all values in case of disagreement
       StringBuffer sb = new StringBuffer();
-      sb.append("Disagreement: "+url+"; ");
+      sb.append("Disagreement: " + url + "; ");
       for (int i = 0; i < newClues.size(); i++) {
-        if (i>0) {
+        if (i > 0) {
           sb.append(", ");
         }
         sb.append(newClues.get(i));
@@ -312,7 +310,7 @@
         return null;
       String canonicalName = new String(Charset.forName(encoding).name());
       return ALIASES.containsKey(canonicalName) ? ALIASES.get(canonicalName)
-                                                : canonicalName;
+          : canonicalName;
     } catch (Exception e) {
       LOG.warn("Invalid encoding " + encoding + " detected, using default.");
       return null;
@@ -320,14 +318,14 @@
   }
 
   /**
-   * Parse the character encoding from the specified content type header.
-   * If the content type is null, or there is no explicit character encoding,
-   * <code>null</code> is returned.
-   * <br />
-   * This method was copied from org.apache.catalina.util.RequestUtil,
-   * which is licensed under the Apache License, Version 2.0 (the "License").
-   *
-   * @param contentType a content type header
+   * Parse the character encoding from the specified content type header. If the
+   * content type is null, or there is no explicit character encoding,
+   * <code>null</code> is returned. <br />
+   * This method was copied from org.apache.catalina.util.RequestUtil, which is
+   * licensed under the Apache License, Version 2.0 (the "License").
+   * 
+   * @param contentType
+   *          a content type header
    */
   public static String parseCharacterEncoding(String contentType) {
     if (contentType == null)
@@ -341,7 +339,7 @@
       encoding = encoding.substring(0, end);
     encoding = encoding.trim();
     if ((encoding.length() > 2) && (encoding.startsWith("\""))
-      && (encoding.endsWith("\"")))
+        && (encoding.endsWith("\"")))
       encoding = encoding.substring(1, encoding.length() - 1);
     return (encoding.trim());
 
@@ -354,12 +352,12 @@
     }
 
     Configuration conf = NutchConfiguration.create();
-    EncodingDetector detector =
-      new EncodingDetector(NutchConfiguration.create());
+    EncodingDetector detector = new EncodingDetector(
+        NutchConfiguration.create());
 
     // do everything as bytes; don't want any conversion
-    BufferedInputStream istr =
-      new BufferedInputStream(new FileInputStream(args[0]));
+    BufferedInputStream istr = new BufferedInputStream(new FileInputStream(
+        args[0]));
     ByteArrayOutputStream ostr = new ByteArrayOutputStream();
     byte[] bytes = new byte[1000];
     boolean more = true;
@@ -378,8 +376,8 @@
     byte[] data = ostr.toByteArray();
 
     // make a fake Content
-    Content content =
-      new Content("", "", data, "text/html", new Metadata(), conf);
+    Content content = new Content("", "", data, "text/html", new Metadata(),
+        conf);
 
     detector.autoDetectClues(content, true);
     String encoding = detector.guessEncoding(content,
Index: src/java/org/apache/nutch/util/DeflateUtils.java
===================================================================
--- src/java/org/apache/nutch/util/DeflateUtils.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/DeflateUtils.java	(working copy)
@@ -28,19 +28,18 @@
 import org.slf4j.LoggerFactory;
 
 /**
- *  A collection of utility methods for working on deflated data.
+ * A collection of utility methods for working on deflated data.
  */
 public class DeflateUtils {
-  
+
   private static final Logger LOG = LoggerFactory.getLogger(DeflateUtils.class);
   private static final int EXPECTED_COMPRESSION_RATIO = 5;
   private static final int BUF_SIZE = 4096;
 
   /**
-   * Returns an inflated copy of the input array.  If the deflated 
-   * input has been truncated or corrupted, a best-effort attempt is
-   * made to inflate as much as possible.  If no data can be extracted
-   * <code>null</code> is returned.
+   * Returns an inflated copy of the input array. If the deflated input has been
+   * truncated or corrupted, a best-effort attempt is made to inflate as much as
+   * possible. If no data can be extracted <code>null</code> is returned.
    */
   public static final byte[] inflateBestEffort(byte[] in) {
     return inflateBestEffort(in, Integer.MAX_VALUE);
@@ -48,38 +47,37 @@
 
   /**
    * Returns an inflated copy of the input array, truncated to
-   * <code>sizeLimit</code> bytes, if necessary.  If the deflated input
-   * has been truncated or corrupted, a best-effort attempt is made to
-   * inflate as much as possible.  If no data can be extracted
-   * <code>null</code> is returned.
+   * <code>sizeLimit</code> bytes, if necessary. If the deflated input has been
+   * truncated or corrupted, a best-effort attempt is made to inflate as much as
+   * possible. If no data can be extracted <code>null</code> is returned.
    */
   public static final byte[] inflateBestEffort(byte[] in, int sizeLimit) {
-    // decompress using InflaterInputStream 
-    ByteArrayOutputStream outStream = 
-      new ByteArrayOutputStream(EXPECTED_COMPRESSION_RATIO * in.length);
+    // decompress using InflaterInputStream
+    ByteArrayOutputStream outStream = new ByteArrayOutputStream(
+        EXPECTED_COMPRESSION_RATIO * in.length);
 
     // "true" because HTTP does not provide zlib headers
     Inflater inflater = new Inflater(true);
-    InflaterInputStream inStream = 
-      new InflaterInputStream(new ByteArrayInputStream(in), inflater);
+    InflaterInputStream inStream = new InflaterInputStream(
+        new ByteArrayInputStream(in), inflater);
 
     byte[] buf = new byte[BUF_SIZE];
     int written = 0;
     while (true) {
       try {
-	int size = inStream.read(buf);
-	if (size <= 0) 
-	  break;
-	if ((written + size) > sizeLimit) {
-	  outStream.write(buf, 0, sizeLimit - written);
-	  break;
-	}
-	outStream.write(buf, 0, size);
-	written+= size;
+        int size = inStream.read(buf);
+        if (size <= 0)
+          break;
+        if ((written + size) > sizeLimit) {
+          outStream.write(buf, 0, sizeLimit - written);
+          break;
+        }
+        outStream.write(buf, 0, size);
+        written += size;
       } catch (Exception e) {
-	LOG.info( "Caught Exception in inflateBestEffort" );
+        LOG.info("Caught Exception in inflateBestEffort");
         e.printStackTrace(LogUtil.getWarnStream(LOG));
-	break;
+        break;
       }
     }
     try {
@@ -90,23 +88,24 @@
     return outStream.toByteArray();
   }
 
-
   /**
-   * Returns an inflated copy of the input array.  
-   * @throws IOException if the input cannot be properly decompressed
+   * Returns an inflated copy of the input array.
+   * 
+   * @throws IOException
+   *           if the input cannot be properly decompressed
    */
   public static final byte[] inflate(byte[] in) throws IOException {
-    // decompress using InflaterInputStream 
-    ByteArrayOutputStream outStream = 
-      new ByteArrayOutputStream(EXPECTED_COMPRESSION_RATIO * in.length);
+    // decompress using InflaterInputStream
+    ByteArrayOutputStream outStream = new ByteArrayOutputStream(
+        EXPECTED_COMPRESSION_RATIO * in.length);
 
-    InflaterInputStream inStream = 
-      new InflaterInputStream ( new ByteArrayInputStream(in) );
+    InflaterInputStream inStream = new InflaterInputStream(
+        new ByteArrayInputStream(in));
 
     byte[] buf = new byte[BUF_SIZE];
     while (true) {
       int size = inStream.read(buf);
-      if (size <= 0) 
+      if (size <= 0)
         break;
       outStream.write(buf, 0, size);
     }
@@ -119,9 +118,9 @@
    * Returns a deflated copy of the input array.
    */
   public static final byte[] deflate(byte[] in) {
-    // compress using DeflaterOutputStream 
-    ByteArrayOutputStream byteOut = 
-      new ByteArrayOutputStream(in.length / EXPECTED_COMPRESSION_RATIO);
+    // compress using DeflaterOutputStream
+    ByteArrayOutputStream byteOut = new ByteArrayOutputStream(in.length
+        / EXPECTED_COMPRESSION_RATIO);
 
     DeflaterOutputStream outStream = new DeflaterOutputStream(byteOut);
 
Index: src/java/org/apache/nutch/util/MimeUtil.java
===================================================================
--- src/java/org/apache/nutch/util/MimeUtil.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/MimeUtil.java	(working copy)
@@ -35,12 +35,12 @@
  * @author mattmann
  * @since NUTCH-608
  * 
- * <p>
- * This is a facade class to insulate Nutch from its underlying Mime Type
- * substrate library, <a href="http://incubator.apache.org/tika/">Apache Tika</a>.
- * Any mime handling code should be placed in this utility class, and hidden
- * from the Nutch classes that rely on it.
- * </p>
+ *        <p>
+ *        This is a facade class to insulate Nutch from its underlying Mime Type
+ *        substrate library, <a href="http://incubator.apache.org/tika/">Apache
+ *        Tika</a>. Any mime handling code should be placed in this utility
+ *        class, and hidden from the Nutch classes that rely on it.
+ *        </p>
  */
 public final class MimeUtil {
 
@@ -53,7 +53,8 @@
   private boolean mimeMagic;
 
   /* our log stream */
-  private static final Logger LOG = LoggerFactory.getLogger(MimeUtil.class.getName());
+  private static final Logger LOG = LoggerFactory.getLogger(MimeUtil.class
+      .getName());
 
   public MimeUtil(Configuration conf) {
     ObjectCache objectCache = ObjectCache.get(conf);
@@ -61,25 +62,26 @@
         .getName());
     if (mimeTypez == null) {
       try {
-          String customMimeTypeFile = conf.get("mime.types.file");
-          if (customMimeTypeFile!=null && customMimeTypeFile.equals("")==false){
-              try {
-              mimeTypez = MimeTypesFactory.create(conf
-                      .getConfResourceAsInputStream(customMimeTypeFile));
-              }
-              catch (Exception e){
-                  LOG.error("Can't load mime.types.file : "+customMimeTypeFile+" using Tika's default");
-              }
+        String customMimeTypeFile = conf.get("mime.types.file");
+        if (customMimeTypeFile != null
+            && customMimeTypeFile.equals("") == false) {
+          try {
+            mimeTypez = MimeTypesFactory.create(conf
+                .getConfResourceAsInputStream(customMimeTypeFile));
+          } catch (Exception e) {
+            LOG.error("Can't load mime.types.file : " + customMimeTypeFile
+                + " using Tika's default");
           }
-          if (mimeTypez==null)
-              mimeTypez = MimeTypes.getDefaultMimeTypes();
+        }
+        if (mimeTypez == null)
+          mimeTypez = MimeTypes.getDefaultMimeTypes();
       } catch (Exception e) {
-        LOG.error("Exception in MimeUtil "+e.getMessage());
+        LOG.error("Exception in MimeUtil " + e.getMessage());
         throw new RuntimeException(e);
       }
       objectCache.setObject(MimeTypes.class.getName(), mimeTypez);
     }
-    
+
     this.mimeTypes = mimeTypez;
     this.mimeMagic = conf.getBoolean("mime.type.magic", true);
   }
@@ -115,17 +117,17 @@
   /**
    * A facade interface to trying all the possible mime type resolution
    * strategies available within Tika. First, the mime type provided in
-   * <code>typeName</code> is cleaned, with {@link #cleanMimeType(String)}.
-   * Then the cleaned mime type is looked up in the underlying Tika
-   * {@link MimeTypes} registry, by its cleaned name. If the {@link MimeType} is
-   * found, then that mime type is used, otherwise {@link URL} resolution is
-   * used to try and determine the mime type. If that means is unsuccessful, and
-   * if <code>mime.type.magic</code> is enabled in {@link NutchConfiguration},
-   * then mime type magic resolution is used to try and obtain a
+   * <code>typeName</code> is cleaned, with {@link #cleanMimeType(String)}. Then
+   * the cleaned mime type is looked up in the underlying Tika {@link MimeTypes}
+   * registry, by its cleaned name. If the {@link MimeType} is found, then that
+   * mime type is used, otherwise {@link URL} resolution is used to try and
+   * determine the mime type. If that means is unsuccessful, and if
+   * <code>mime.type.magic</code> is enabled in {@link NutchConfiguration}, then
+   * mime type magic resolution is used to try and obtain a
    * better-than-the-default approximation of the {@link MimeType}.
    * 
    * @param typeName
-   *          The original mime type, returned from a {@link ProtocolOutput}.
+   *          The original mime type, returned from a {@link ProtocolOutput} .
    * @param url
    *          The given {@link URL}, that Nutch was trying to crawl.
    * @param data
@@ -138,8 +140,7 @@
 
     try {
       cleanedMimeType = MimeUtil.cleanMimeType(typeName) != null ? this.mimeTypes
-          .forName(MimeUtil.cleanMimeType(typeName)).getName()
-          : null;
+          .forName(MimeUtil.cleanMimeType(typeName)).getName() : null;
     } catch (MimeTypeException mte) {
       // Seems to be a malformed mime type name...
     }
@@ -162,20 +163,25 @@
     }
 
     // if magic is enabled use mime magic to guess if the mime type returned
-    // from the magic guess is different than the one that's already set so far
-    // if it is, and it's not the default mime type, then go with the mime type
+    // from the magic guess is different than the one that's already set so
+    // far
+    // if it is, and it's not the default mime type, then go with the mime
+    // type
     // returned by the magic
     if (this.mimeMagic) {
       MimeType magicType = this.mimeTypes.getMimeType(data);
-      if (magicType != null && !magicType.getName().equals(MimeTypes.OCTET_STREAM)
-          && !magicType.getName().equals(MimeTypes.PLAIN_TEXT)
-          && type != null && !type.getName().equals(magicType.getName())) {
-        // If magic enabled and the current mime type differs from that of the
+      if (magicType != null
+          && !magicType.getName().equals(MimeTypes.OCTET_STREAM)
+          && !magicType.getName().equals(MimeTypes.PLAIN_TEXT) && type != null
+          && !type.getName().equals(magicType.getName())) {
+        // If magic enabled and the current mime type differs from that
+        // of the
         // one returned from the magic, take the magic mimeType
         type = magicType;
       }
 
-      // if type is STILL null after all the resolution strategies, go for the
+      // if type is STILL null after all the resolution strategies, go for
+      // the
       // default type
       if (type == null) {
         try {
@@ -195,8 +201,8 @@
    * @param url
    *          A string representation of the document {@link URL} to sense the
    *          {@link MimeType} for.
-   * @return An appropriate {@link MimeType}, identified from the given
-   *         Document url in string form.
+   * @return An appropriate {@link MimeType}, identified from the given Document
+   *         url in string form.
    */
   public MimeType getMimeType(String url) {
     return this.mimeTypes.getMimeType(url);
@@ -208,8 +214,8 @@
    * 
    * @param name
    *          The name of a valid {@link MimeType} in the Tika mime registry.
-   * @return The object representation of the {@link MimeType}, if it exists,
-   *         or null otherwise.
+   * @return The object representation of the {@link MimeType}, if it exists, or
+   *         null otherwise.
    */
   public MimeType forName(String name) {
     try {
Index: src/java/org/apache/nutch/util/TimingUtil.java
===================================================================
--- src/java/org/apache/nutch/util/TimingUtil.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/TimingUtil.java	(working copy)
@@ -21,35 +21,39 @@
 
 public class TimingUtil {
 
-    private static long[] TIME_FACTOR = { 60 * 60 * 1000, 60 * 1000, 1000 };
+  private static long[] TIME_FACTOR = { 60 * 60 * 1000, 60 * 1000, 1000 };
 
-    /**
-     * Calculate the elapsed time between two times specified in milliseconds.
-     * @param start The start of the time period
-     * @param end The end of the time period
-     * @return a string of the form "XhYmZs" when the elapsed time is X hours, Y minutes and Z seconds or null if start > end.
-     */
-    public static String elapsedTime(long start, long end){
-        if (start > end) {
-            return null;
-        }
+  /**
+   * Calculate the elapsed time between two times specified in milliseconds.
+   * 
+   * @param start
+   *          The start of the time period
+   * @param end
+   *          The end of the time period
+   * @return a string of the form "XhYmZs" when the elapsed time is X hours, Y
+   *         minutes and Z seconds or null if start > end.
+   */
+  public static String elapsedTime(long start, long end) {
+    if (start > end) {
+      return null;
+    }
 
-        long[] elapsedTime = new long[TIME_FACTOR.length];
+    long[] elapsedTime = new long[TIME_FACTOR.length];
 
-        for (int i = 0; i < TIME_FACTOR.length; i++) {
-            elapsedTime[i] = start > end ? -1 : (end - start) / TIME_FACTOR[i];
-            start += TIME_FACTOR[i] * elapsedTime[i];
-        }
+    for (int i = 0; i < TIME_FACTOR.length; i++) {
+      elapsedTime[i] = start > end ? -1 : (end - start) / TIME_FACTOR[i];
+      start += TIME_FACTOR[i] * elapsedTime[i];
+    }
 
-        NumberFormat nf = NumberFormat.getInstance();
-        nf.setMinimumIntegerDigits(2);
-        StringBuffer buf = new StringBuffer();
-        for (int i = 0; i < elapsedTime.length; i++) {
-            if (i > 0) {
-                buf.append(":");
-            }
-            buf.append(nf.format(elapsedTime[i]));
-        }
-        return buf.toString();
+    NumberFormat nf = NumberFormat.getInstance();
+    nf.setMinimumIntegerDigits(2);
+    StringBuffer buf = new StringBuffer();
+    for (int i = 0; i < elapsedTime.length; i++) {
+      if (i > 0) {
+        buf.append(":");
+      }
+      buf.append(nf.format(elapsedTime[i]));
     }
+    return buf.toString();
+  }
 }
Index: src/java/org/apache/nutch/util/LockUtil.java
===================================================================
--- src/java/org/apache/nutch/util/LockUtil.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/LockUtil.java	(working copy)
@@ -28,22 +28,29 @@
  * @author Andrzej Bialecki
  */
 public class LockUtil {
-  
+
   /**
    * Create a lock file.
-   * @param fs filesystem
-   * @param lockFile name of the lock file
-   * @param accept if true, and the target file exists, consider it valid. If false
-   * and the target file exists, throw an IOException.
-   * @throws IOException if accept is false, and the target file already exists,
-   * or if it's a directory.
+   * 
+   * @param fs
+   *          filesystem
+   * @param lockFile
+   *          name of the lock file
+   * @param accept
+   *          if true, and the target file exists, consider it valid. If false
+   *          and the target file exists, throw an IOException.
+   * @throws IOException
+   *           if accept is false, and the target file already exists, or if
+   *           it's a directory.
    */
-  public static void createLockFile(FileSystem fs, Path lockFile, boolean accept) throws IOException {
+  public static void createLockFile(FileSystem fs, Path lockFile, boolean accept)
+      throws IOException {
     if (fs.exists(lockFile)) {
-      if(!accept)
+      if (!accept)
         throw new IOException("lock file " + lockFile + " already exists.");
       if (fs.getFileStatus(lockFile).isDir())
-        throw new IOException("lock file " + lockFile + " already exists and is a directory.");
+        throw new IOException("lock file " + lockFile
+            + " already exists and is a directory.");
       // do nothing - the file already exists.
     } else {
       // make sure parents exist
@@ -55,16 +62,23 @@
   /**
    * Remove lock file. NOTE: applications enforce the semantics of this file -
    * this method simply removes any file with a given name.
-   * @param fs filesystem
-   * @param lockFile lock file name
+   * 
+   * @param fs
+   *          filesystem
+   * @param lockFile
+   *          lock file name
    * @return false, if the lock file doesn't exist. True, if it existed and was
-   * successfully removed.
-   * @throws IOException if lock file exists but it is a directory.
+   *         successfully removed.
+   * @throws IOException
+   *           if lock file exists but it is a directory.
    */
-  public static boolean removeLockFile(FileSystem fs, Path lockFile) throws IOException {
-    if (!fs.exists(lockFile)) return false;
+  public static boolean removeLockFile(FileSystem fs, Path lockFile)
+      throws IOException {
+    if (!fs.exists(lockFile))
+      return false;
     if (fs.getFileStatus(lockFile).isDir())
-      throw new IOException("lock file " + lockFile + " exists but is a directory!");
+      throw new IOException("lock file " + lockFile
+          + " exists but is a directory!");
     return fs.delete(lockFile, false);
   }
 }
Index: src/java/org/apache/nutch/util/domain/DomainStatistics.java
===================================================================
--- src/java/org/apache/nutch/util/domain/DomainStatistics.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/domain/DomainStatistics.java	(working copy)
@@ -48,37 +48,40 @@
 import org.apache.nutch.util.URLUtil;
 
 /**
- * Extracts some very basic statistics about domains from the crawldb 
+ * Extracts some very basic statistics about domains from the crawldb
  */
-public class DomainStatistics
-extends MapReduceBase
-implements Tool, Mapper<Text, CrawlDatum, Text, LongWritable>,
-           Reducer<Text, LongWritable, LongWritable, Text> {
+public class DomainStatistics extends MapReduceBase implements Tool,
+    Mapper<Text, CrawlDatum, Text, LongWritable>,
+    Reducer<Text, LongWritable, LongWritable, Text> {
 
-  private static final Logger LOG = LoggerFactory.getLogger(DomainStatistics.class);
-  
+  private static final Logger LOG = LoggerFactory
+      .getLogger(DomainStatistics.class);
+
   private static final Text FETCHED_TEXT = new Text("FETCHED");
   private static final Text NOT_FETCHED_TEXT = new Text("NOT_FETCHED");
-  
-  public static enum MyCounter {FETCHED, NOT_FETCHED, EMPTY_RESULT};
-  
+
+  public static enum MyCounter {
+    FETCHED, NOT_FETCHED, EMPTY_RESULT
+  };
+
   private static final int MODE_HOST = 1;
   private static final int MODE_DOMAIN = 2;
   private static final int MODE_SUFFIX = 3;
-  
+
   private int mode = 0;
-  
+
   private Configuration conf;
-  
+
   public int run(String[] args) throws IOException {
     if (args.length < 3) {
-      System.out.println("usage: DomainStatistics inputDirs outDir host|domain|suffix [numOfReducer]");
+      System.out
+          .println("usage: DomainStatistics inputDirs outDir host|domain|suffix [numOfReducer]");
       return 1;
     }
     String inputDir = args[0];
     String outputDir = args[1];
     int numOfReducers = 1;
-    
+
     if (args.length > 3) {
       numOfReducers = Integer.parseInt(args[3]);
     }
@@ -91,14 +94,14 @@
     job.setJobName("Domain statistics");
 
     int mode = 0;
-    if(args[2].equals("host"))
+    if (args[2].equals("host"))
       mode = MODE_HOST;
-    else if(args[2].equals("domain"))
+    else if (args[2].equals("domain"))
       mode = MODE_DOMAIN;
-    else if(args[2].equals("suffix"))
+    else if (args[2].equals("suffix"))
       mode = MODE_SUFFIX;
     job.setInt("domain.statistics.mode", mode);
-    
+
     String[] inputDirsSpecs = inputDir.split(",");
     for (int i = 0; i < inputDirsSpecs.length; i++) {
       FileInputFormat.addInputPath(job, new Path(inputDirsSpecs[i]));
@@ -115,11 +118,12 @@
     job.setReducerClass(DomainStatistics.class);
     job.setCombinerClass(DomainStatisticsCombiner.class);
     job.setNumReduceTasks(numOfReducers);
-    
+
     JobClient.runJob(job);
-    
+
     long end = System.currentTimeMillis();
-    LOG.info("DomainStatistics: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    LOG.info("DomainStatistics: finished at " + sdf.format(end) + ", elapsed: "
+        + TimingUtil.elapsedTime(start, end));
     return 0;
   }
 
@@ -128,7 +132,6 @@
     super.configure(job);
     mode = job.getInt("domain.statistics.mode", MODE_DOMAIN);
   }
-  
 
   public Configuration getConf() {
     return conf;
@@ -140,35 +143,35 @@
 
   public void map(Text urlText, CrawlDatum datum,
       OutputCollector<Text, LongWritable> output, Reporter reporter)
-  throws IOException {
-    
-    if(datum.getStatus() == CrawlDatum.STATUS_DB_FETCHED 
+      throws IOException {
+
+    if (datum.getStatus() == CrawlDatum.STATUS_DB_FETCHED
         || datum.getStatus() == CrawlDatum.STATUS_FETCH_SUCCESS) {
       try {
         URL url = new URL(urlText.toString());
         String out = null;
         switch (mode) {
-          case MODE_HOST:
-            out = url.getHost();
-            break;
-          case MODE_DOMAIN:
-            out = URLUtil.getDomainName(url);
-            break;
-          case MODE_SUFFIX:
-            out = URLUtil.getDomainSuffix(url).getDomain();
-            break;
+        case MODE_HOST:
+          out = url.getHost();
+          break;
+        case MODE_DOMAIN:
+          out = URLUtil.getDomainName(url);
+          break;
+        case MODE_SUFFIX:
+          out = URLUtil.getDomainSuffix(url).getDomain();
+          break;
         }
-        if(out.trim().equals("")) {
+        if (out.trim().equals("")) {
           LOG.info("url : " + url);
           reporter.incrCounter(MyCounter.EMPTY_RESULT, 1);
         }
-        
+
         output.collect(new Text(out), new LongWritable(1));
-      } catch (Exception ex) { }
+      } catch (Exception ex) {
+      }
       reporter.incrCounter(MyCounter.FETCHED, 1);
       output.collect(FETCHED_TEXT, new LongWritable(1));
-    }
-    else {
+    } else {
       reporter.incrCounter(MyCounter.NOT_FETCHED, 1);
       output.collect(NOT_FETCHED_TEXT, new LongWritable(1));
     }
@@ -176,31 +179,30 @@
 
   public void reduce(Text key, Iterator<LongWritable> values,
       OutputCollector<LongWritable, Text> output, Reporter reporter)
-  throws IOException {
-    
+      throws IOException {
+
     long total = 0;
-    
-    while(values.hasNext()) {
+
+    while (values.hasNext()) {
       LongWritable val = values.next();
       total += val.get();
     }
-    //invert output 
+    // invert output
     output.collect(new LongWritable(total), key);
   }
-    
-  
-  public static class DomainStatisticsCombiner extends MapReduceBase
-  implements Reducer<Text, LongWritable, Text, LongWritable> {
 
+  public static class DomainStatisticsCombiner extends MapReduceBase implements
+      Reducer<Text, LongWritable, Text, LongWritable> {
+
     public void reduce(Text key, Iterator<LongWritable> values,
         OutputCollector<Text, LongWritable> output, Reporter reporter)
-    throws IOException {
+        throws IOException {
       long total = 0;
-      
-      while(values.hasNext()) {
+
+      while (values.hasNext()) {
         LongWritable val = values.next();
         total += val.get();
-      } 
+      }
       output.collect(key, new LongWritable(total));
     }
 
@@ -209,5 +211,5 @@
   public static void main(String[] args) throws Exception {
     ToolRunner.run(NutchConfiguration.create(), new DomainStatistics(), args);
   }
-  
+
 }
Index: src/java/org/apache/nutch/util/domain/TopLevelDomain.java
===================================================================
--- src/java/org/apache/nutch/util/domain/TopLevelDomain.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/domain/TopLevelDomain.java	(working copy)
@@ -18,41 +18,47 @@
 package org.apache.nutch.util.domain;
 
 /**
- * (From wikipedia) A top-level domain (TLD) is the last part of an 
- * Internet domain name; that is, the letters which follow the final 
- * dot of any domain name. For example, in the domain name 
- * <code>www.website.com</code>, the top-level domain is <code>com</code>.
+ * (From wikipedia) A top-level domain (TLD) is the last part of an Internet
+ * domain name; that is, the letters which follow the final dot of any domain
+ * name. For example, in the domain name <code>www.website.com</code>, the
+ * top-level domain is <code>com</code>.
+ * 
  * @author Enis Soztutar &lt;enis.soz.nutch@gmail.com&gt;
  * @see http://www.iana.org/
  * @see http://en.wikipedia.org/wiki/Top-level_domain
  */
 public class TopLevelDomain extends DomainSuffix {
 
-  public enum Type { INFRASTRUCTURE, GENERIC, COUNTRY };
-  
+  public enum Type {
+    INFRASTRUCTURE, GENERIC, COUNTRY
+  };
+
   private Type type;
   private String countryName = null;
-  
-  public TopLevelDomain(String domain, Type type, Status status, float boost){
+
+  public TopLevelDomain(String domain, Type type, Status status, float boost) {
     super(domain, status, boost);
     this.type = type;
   }
 
-  public TopLevelDomain(String domain, Status status, float boost, String countryName){
+  public TopLevelDomain(String domain, Status status, float boost,
+      String countryName) {
     super(domain, status, boost);
     this.type = Type.COUNTRY;
     this.countryName = countryName;
   }
-  
+
   public Type getType() {
     return type;
   }
 
-  /** Returns the country name if TLD is Country Code TLD
+  /**
+   * Returns the country name if TLD is Country Code TLD
+   * 
    * @return country name or null
-   */ 
-  public String getCountryName(){
+   */
+  public String getCountryName() {
     return countryName;
   }
-  
+
 }
Index: src/java/org/apache/nutch/util/domain/DomainSuffixes.java
===================================================================
--- src/java/org/apache/nutch/util/domain/DomainSuffixes.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/domain/DomainSuffixes.java	(working copy)
@@ -25,57 +25,62 @@
 import org.apache.hadoop.util.StringUtils;
 
 /**
- * Storage class for <code>DomainSuffix</code> objects 
- * Note: this class is singleton
+ * Storage class for <code>DomainSuffix</code> objects Note: this class is
+ * singleton
+ * 
  * @author Enis Soztutar &lt;enis.soz.nutch@gmail.com&gt;
  */
 public class DomainSuffixes {
-  private static final Logger LOG = LoggerFactory.getLogger(DomainSuffixes.class);
-  
-  private HashMap<String, DomainSuffix> domains = new HashMap<String, DomainSuffix>(); 
-  
+  private static final Logger LOG = LoggerFactory
+      .getLogger(DomainSuffixes.class);
+
+  private HashMap<String, DomainSuffix> domains = new HashMap<String, DomainSuffix>();
+
   private static DomainSuffixes instance;
-  
+
   /** private ctor */
   private DomainSuffixes() {
     String file = "domain-suffixes.xml";
-    InputStream input = this.getClass().getClassLoader().getResourceAsStream(file);
+    InputStream input = this.getClass().getClassLoader()
+        .getResourceAsStream(file);
     try {
       new DomainSuffixesReader().read(this, input);
-    }
-    catch (Exception ex) {
+    } catch (Exception ex) {
       LOG.warn(StringUtils.stringifyException(ex));
     }
   }
-  
+
   /**
    * Singleton instance, lazy instantination
+   * 
    * @return
    */
   public static DomainSuffixes getInstance() {
-    if(instance == null) {
+    if (instance == null) {
       instance = new DomainSuffixes();
     }
     return instance;
   }
-  
+
   void addDomainSuffix(DomainSuffix tld) {
     domains.put(tld.getDomain(), tld);
   }
 
   /** return whether the extension is a registered domain entry */
   public boolean isDomainSuffix(String extension) {
-    return domains.containsKey(extension); 
+    return domains.containsKey(extension);
   }
-    
+
   /**
-   * Return the {@link DomainSuffix} object for the extension, if 
-   * extension is a top level domain returned object will be an 
-   * instance of {@link TopLevelDomain}
-   * @param extension of the domain
+   * Return the {@link DomainSuffix} object for the extension, if extension is a
+   * top level domain returned object will be an instance of
+   * {@link TopLevelDomain}
+   * 
+   * @param extension
+   *          of the domain
    */
   public DomainSuffix get(String extension) {
     return domains.get(extension);
   }
-  
+
 }
Index: src/java/org/apache/nutch/util/domain/DomainSuffixesReader.java
===================================================================
--- src/java/org/apache/nutch/util/domain/DomainSuffixesReader.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/domain/DomainSuffixesReader.java	(working copy)
@@ -36,16 +36,17 @@
 import org.xml.sax.SAXException;
 
 /**
- * For parsing xml files containing domain suffix definitions.
- * Parsed xml files should validate against 
- * <code>domain-suffixes.xsd</code>  
+ * For parsing xml files containing domain suffix definitions. Parsed xml files
+ * should validate against <code>domain-suffixes.xsd</code>
+ * 
  * @author Enis Soztutar &lt;enis.soz.nutch@gmail.com&gt;
  */
 class DomainSuffixesReader {
 
-  private static final Logger LOG = LoggerFactory.getLogger(DomainSuffixesReader.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(DomainSuffixesReader.class);
 
-  void read(DomainSuffixes tldEntries, InputStream input) throws IOException{
+  void read(DomainSuffixes tldEntries, InputStream input) throws IOException {
     try {
 
       DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
@@ -54,28 +55,29 @@
       Document document = builder.parse(new InputSource(input));
 
       Element root = document.getDocumentElement();
-      
-      if(root != null && root.getTagName().equals("domains")) {
-        
-        Element tlds = (Element)root.getElementsByTagName("tlds").item(0);
-        Element suffixes = (Element)root.getElementsByTagName("suffixes").item(0);
-        
-        //read tlds
-        readITLDs(tldEntries, (Element)tlds.getElementsByTagName("itlds").item(0));
-        readGTLDs(tldEntries, (Element)tlds.getElementsByTagName("gtlds").item(0));
-        readCCTLDs(tldEntries, (Element)tlds.getElementsByTagName("cctlds").item(0));
-        
+
+      if (root != null && root.getTagName().equals("domains")) {
+
+        Element tlds = (Element) root.getElementsByTagName("tlds").item(0);
+        Element suffixes = (Element) root.getElementsByTagName("suffixes")
+            .item(0);
+
+        // read tlds
+        readITLDs(tldEntries, (Element) tlds.getElementsByTagName("itlds")
+            .item(0));
+        readGTLDs(tldEntries, (Element) tlds.getElementsByTagName("gtlds")
+            .item(0));
+        readCCTLDs(tldEntries, (Element) tlds.getElementsByTagName("cctlds")
+            .item(0));
+
         readSuffixes(tldEntries, suffixes);
-      }
-      else {
+      } else {
         throw new IOException("xml file is not valid");
       }
-    }
-    catch (ParserConfigurationException ex) {
+    } catch (ParserConfigurationException ex) {
       LOG.warn(StringUtils.stringifyException(ex));
       throw new IOException(ex.getMessage());
-    }
-    catch (SAXException ex) {
+    } catch (SAXException ex) {
       LOG.warn(StringUtils.stringifyException(ex));
       throw new IOException(ex.getMessage());
     }
@@ -83,22 +85,24 @@
 
   void readITLDs(DomainSuffixes tldEntries, Element el) {
     NodeList children = el.getElementsByTagName("tld");
-    for(int i=0;i<children.getLength();i++) {
-      tldEntries.addDomainSuffix(readGTLD((Element)children.item(i), Type.INFRASTRUCTURE));
+    for (int i = 0; i < children.getLength(); i++) {
+      tldEntries.addDomainSuffix(readGTLD((Element) children.item(i),
+          Type.INFRASTRUCTURE));
     }
   }
-    
+
   void readGTLDs(DomainSuffixes tldEntries, Element el) {
     NodeList children = el.getElementsByTagName("tld");
-    for(int i=0;i<children.getLength();i++) {
-      tldEntries.addDomainSuffix(readGTLD((Element)children.item(i), Type.GENERIC));
+    for (int i = 0; i < children.getLength(); i++) {
+      tldEntries.addDomainSuffix(readGTLD((Element) children.item(i),
+          Type.GENERIC));
     }
   }
 
   void readCCTLDs(DomainSuffixes tldEntries, Element el) throws IOException {
     NodeList children = el.getElementsByTagName("tld");
-    for(int i=0;i<children.getLength();i++) {
-      tldEntries.addDomainSuffix(readCCTLD((Element)children.item(i)));
+    for (int i = 0; i < children.getLength(); i++) {
+      tldEntries.addDomainSuffix(readCCTLD((Element) children.item(i)));
     }
   }
 
@@ -113,39 +117,40 @@
     String domain = el.getAttribute("domain");
     Status status = readStatus(el);
     float boost = readBoost(el);
-    String countryName = readCountryName(el); 
-    return new TopLevelDomain(domain, status, boost, countryName);  
+    String countryName = readCountryName(el);
+    return new TopLevelDomain(domain, status, boost, countryName);
   }
-  
+
   /** read optional field status */
   Status readStatus(Element el) {
     NodeList list = el.getElementsByTagName("status");
-    if(list == null || list.getLength() == 0)
+    if (list == null || list.getLength() == 0)
       return DomainSuffix.DEFAULT_STATUS;
     return Status.valueOf(list.item(0).getFirstChild().getNodeValue());
   }
-  
+
   /** read optional field boost */
   float readBoost(Element el) {
     NodeList list = el.getElementsByTagName("boost");
-    if(list == null || list.getLength() == 0)
+    if (list == null || list.getLength() == 0)
       return DomainSuffix.DEFAULT_BOOST;
     return Float.parseFloat(list.item(0).getFirstChild().getNodeValue());
   }
-  
-  /** read field countryname 
-    */
+
+  /**
+   * read field countryname
+   */
   String readCountryName(Element el) throws IOException {
     NodeList list = el.getElementsByTagName("country");
-    if(list == null || list.getLength() == 0)
+    if (list == null || list.getLength() == 0)
       throw new IOException("Country name should be given");
     return list.item(0).getNodeValue();
   }
-  
+
   void readSuffixes(DomainSuffixes tldEntries, Element el) {
     NodeList children = el.getElementsByTagName("suffix");
-    for(int i=0;i<children.getLength();i++) {
-      tldEntries.addDomainSuffix(readSuffix((Element)children.item(i)));
+    for (int i = 0; i < children.getLength(); i++) {
+      tldEntries.addDomainSuffix(readSuffix((Element) children.item(i)));
     }
   }
 
@@ -155,5 +160,5 @@
     float boost = readBoost(el);
     return new DomainSuffix(domain, status, boost);
   }
-  
+
 }
Index: src/java/org/apache/nutch/util/domain/DomainSuffix.java
===================================================================
--- src/java/org/apache/nutch/util/domain/DomainSuffix.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/domain/DomainSuffix.java	(working copy)
@@ -18,17 +18,18 @@
 package org.apache.nutch.util.domain;
 
 /**
- * This class represents the last part of the host name, 
- * which is operated by authoritives, not individuals. This information 
- * is needed to find the domain name of a host. The domain name of a host
- * is defined to be the last part before the domain suffix, w/o subdomain 
- * names.  As an example the domain name of <br><code> http://lucene.apache.org/ 
- * </code><br> is <code> apache.org</code>   
- * <br>
- * This class holds three fields,  
- * <strong>domain</strong> field represents the suffix (such as "co.uk")
- * <strong>boost</strong> is a float for boosting score of url's with this suffix
- * <strong>status</strong> field represents domain's status
+ * This class represents the last part of the host name, which is operated by
+ * authoritives, not individuals. This information is needed to find the domain
+ * name of a host. The domain name of a host is defined to be the last part
+ * before the domain suffix, w/o subdomain names. As an example the domain name
+ * of <br>
+ * <code> http://lucene.apache.org/ 
+ * </code><br>
+ * is <code> apache.org</code> <br>
+ * This class holds three fields, <strong>domain</strong> field represents the
+ * suffix (such as "co.uk") <strong>boost</strong> is a float for boosting score
+ * of url's with this suffix <strong>status</strong> field represents domain's
+ * status
  * 
  * @author Enis Soztutar &lt;enis.soz.nutch@gmail.com&gt;
  * @see TopLevelDomain
@@ -37,10 +38,10 @@
 public class DomainSuffix {
 
   /**
-   * Enumeration of the status of the tld. Please see domain-suffixes.xml. 
+   * Enumeration of the status of the tld. Please see domain-suffixes.xml.
    */
-  public enum Status { INFRASTRUCTURE, SPONSORED, UNSPONSORED
-    , STARTUP, PROPOSED, DELETED, PSEUDO_DOMAIN, DEPRECATED, IN_USE, NOT_IN_USE, REJECTED
+  public enum Status {
+    INFRASTRUCTURE, SPONSORED, UNSPONSORED, STARTUP, PROPOSED, DELETED, PSEUDO_DOMAIN, DEPRECATED, IN_USE, NOT_IN_USE, REJECTED
   };
 
   private String domain;
@@ -49,7 +50,7 @@
 
   public static final float DEFAULT_BOOST = 1.0f;
   public static final Status DEFAULT_STATUS = Status.IN_USE;
-  
+
   public DomainSuffix(String domain, Status status, float boost) {
     this.domain = domain;
     this.status = status;
@@ -59,7 +60,7 @@
   public DomainSuffix(String domain) {
     this(domain, DEFAULT_STATUS, DEFAULT_BOOST);
   }
-  
+
   public String getDomain() {
     return domain;
   }
@@ -71,7 +72,7 @@
   public float getBoost() {
     return boost;
   }
-  
+
   @Override
   public String toString() {
     return domain;
Index: src/java/org/apache/nutch/util/GenericWritableConfigurable.java
===================================================================
--- src/java/org/apache/nutch/util/GenericWritableConfigurable.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/GenericWritableConfigurable.java	(working copy)
@@ -24,12 +24,15 @@
 import org.apache.hadoop.io.GenericWritable;
 import org.apache.hadoop.io.Writable;
 
-/** A generic Writable wrapper that can inject Configuration to {@link Configurable}s */ 
-public abstract class GenericWritableConfigurable extends GenericWritable 
-                                                  implements Configurable {
+/**
+ * A generic Writable wrapper that can inject Configuration to
+ * {@link Configurable}s
+ */
+public abstract class GenericWritableConfigurable extends GenericWritable
+    implements Configurable {
 
   private Configuration conf;
-  
+
   public Configuration getConf() {
     return conf;
   }
@@ -37,7 +40,7 @@
   public void setConf(Configuration conf) {
     this.conf = conf;
   }
-  
+
   @Override
   public void readFields(DataInput in) throws IOException {
     byte type = in.readByte();
@@ -50,8 +53,8 @@
     }
     Writable w = get();
     if (w instanceof Configurable)
-      ((Configurable)w).setConf(conf);
+      ((Configurable) w).setConf(conf);
     w.readFields(in);
   }
-  
+
 }
Index: src/java/org/apache/nutch/util/PrefixStringMatcher.java
===================================================================
--- src/java/org/apache/nutch/util/PrefixStringMatcher.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/PrefixStringMatcher.java	(working copy)
@@ -21,46 +21,47 @@
 import java.util.Iterator;
 
 /**
- * A class for efficiently matching <code>String</code>s against a set
- * of prefixes.
+ * A class for efficiently matching <code>String</code>s against a set of
+ * prefixes.
  */
 public class PrefixStringMatcher extends TrieStringMatcher {
 
   /**
    * Creates a new <code>PrefixStringMatcher</code> which will match
-   * <code>String</code>s with any prefix in the supplied array.
-   * Zero-length <code>Strings</code> are ignored.
+   * <code>String</code>s with any prefix in the supplied array. Zero-length
+   * <code>Strings</code> are ignored.
    */
   public PrefixStringMatcher(String[] prefixes) {
     super();
-    for (int i= 0; i < prefixes.length; i++)
+    for (int i = 0; i < prefixes.length; i++)
       addPatternForward(prefixes[i]);
   }
 
   /**
    * Creates a new <code>PrefixStringMatcher</code> which will match
-   * <code>String</code>s with any prefix in the supplied    
+   * <code>String</code>s with any prefix in the supplied
    * <code>Collection</code>.
-   *
-   * @throws ClassCastException if any <code>Object</code>s in the
-   * collection are not <code>String</code>s
+   * 
+   * @throws ClassCastException
+   *           if any <code>Object</code>s in the collection are not
+   *           <code>String</code>s
    */
   public PrefixStringMatcher(Collection prefixes) {
     super();
-    Iterator iter= prefixes.iterator();
+    Iterator iter = prefixes.iterator();
     while (iter.hasNext())
-      addPatternForward((String)iter.next());
+      addPatternForward((String) iter.next());
   }
 
   /**
-   * Returns true if the given <code>String</code> is matched by a
-   * prefix in the trie
+   * Returns true if the given <code>String</code> is matched by a prefix in the
+   * trie
    */
   public boolean matches(String input) {
-    TrieNode node= root;
-    for (int i= 0; i < input.length(); i++) {
-      node= node.getChild(input.charAt(i));
-      if (node == null) 
+    TrieNode node = root;
+    for (int i = 0; i < input.length(); i++) {
+      node = node.getChild(input.charAt(i));
+      if (node == null)
         return false;
       if (node.isTerminal())
         return true;
@@ -73,13 +74,13 @@
    * or <code>null<code> if no match exists.
    */
   public String shortestMatch(String input) {
-    TrieNode node= root;
-    for (int i= 0; i < input.length(); i++) {
-      node= node.getChild(input.charAt(i));
-      if (node == null) 
+    TrieNode node = root;
+    for (int i = 0; i < input.length(); i++) {
+      node = node.getChild(input.charAt(i));
+      if (node == null)
         return null;
       if (node.isTerminal())
-        return input.substring(0, i+1);
+        return input.substring(0, i + 1);
     }
     return null;
   }
@@ -89,29 +90,26 @@
    * or <code>null<code> if no match exists.
    */
   public String longestMatch(String input) {
-    TrieNode node= root;
-    String result= null;
-    for (int i= 0; i < input.length(); i++) {
-      node= node.getChild(input.charAt(i));
-      if (node == null) 
+    TrieNode node = root;
+    String result = null;
+    for (int i = 0; i < input.length(); i++) {
+      node = node.getChild(input.charAt(i));
+      if (node == null)
         break;
       if (node.isTerminal())
-        result= input.substring(0, i+1);
+        result = input.substring(0, i + 1);
     }
     return result;
   }
 
   public static final void main(String[] argv) {
-    PrefixStringMatcher matcher= 
-      new PrefixStringMatcher( 
-        new String[] 
-        {"abcd", "abc", "aac", "baz", "foo", "foobar"} );
+    PrefixStringMatcher matcher = new PrefixStringMatcher(new String[] {
+        "abcd", "abc", "aac", "baz", "foo", "foobar" });
 
-    String[] tests= {"a", "ab", "abc", "abcdefg", "apple", "aa", "aac",
-                     "aaccca", "abaz", "baz", "bazooka", "fo", "foobar",
-                     "kite", };
+    String[] tests = { "a", "ab", "abc", "abcdefg", "apple", "aa", "aac",
+        "aaccca", "abaz", "baz", "bazooka", "fo", "foobar", "kite", };
 
-    for (int i= 0; i < tests.length; i++) {
+    for (int i = 0; i < tests.length; i++) {
       System.out.println("testing: " + tests[i]);
       System.out.println("   matches: " + matcher.matches(tests[i]));
       System.out.println("  shortest: " + matcher.shortestMatch(tests[i]));
Index: src/java/org/apache/nutch/util/FSUtils.java
===================================================================
--- src/java/org/apache/nutch/util/FSUtils.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/FSUtils.java	(working copy)
@@ -33,16 +33,20 @@
    * path. If removeOld is set to false then the old path will be set to the
    * name current.old.
    * 
-   * @param fs The FileSystem.
-   * @param current The end path, the one being replaced.
-   * @param replacement The path to replace with.
-   * @param removeOld True if we are removing the current path.
+   * @param fs
+   *          The FileSystem.
+   * @param current
+   *          The end path, the one being replaced.
+   * @param replacement
+   *          The path to replace with.
+   * @param removeOld
+   *          True if we are removing the current path.
    * 
-   * @throws IOException If an error occurs during replacement.
+   * @throws IOException
+   *           If an error occurs during replacement.
    */
   public static void replace(FileSystem fs, Path current, Path replacement,
-    boolean removeOld)
-    throws IOException {
+      boolean removeOld) throws IOException {
 
     // rename any current path to old
     Path old = new Path(current + ".old");
@@ -60,12 +64,14 @@
   /**
    * Closes a group of SequenceFile readers.
    * 
-   * @param readers The SequenceFile readers to close.
-   * @throws IOException If an error occurs while closing a reader.
+   * @param readers
+   *          The SequenceFile readers to close.
+   * @throws IOException
+   *           If an error occurs while closing a reader.
    */
   public static void closeReaders(SequenceFile.Reader[] readers)
-    throws IOException {
-    
+      throws IOException {
+
     // loop through the readers, closing one by one
     if (readers != null) {
       for (int i = 0; i < readers.length; i++) {
@@ -80,12 +86,13 @@
   /**
    * Closes a group of MapFile readers.
    * 
-   * @param readers The MapFile readers to close.
-   * @throws IOException If an error occurs while closing a reader.
+   * @param readers
+   *          The MapFile readers to close.
+   * @throws IOException
+   *           If an error occurs while closing a reader.
    */
-  public static void closeReaders(MapFile.Reader[] readers)
-    throws IOException {
-    
+  public static void closeReaders(MapFile.Reader[] readers) throws IOException {
+
     // loop through the readers closing one by one
     if (readers != null) {
       for (int i = 0; i < readers.length; i++) {
Index: src/java/org/apache/nutch/util/GZIPUtils.java
===================================================================
--- src/java/org/apache/nutch/util/GZIPUtils.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/GZIPUtils.java	(working copy)
@@ -28,19 +28,18 @@
 import org.slf4j.LoggerFactory;
 
 /**
- *  A collection of utility methods for working on GZIPed data.
+ * A collection of utility methods for working on GZIPed data.
  */
 public class GZIPUtils {
-  
+
   private static final Logger LOG = LoggerFactory.getLogger(GZIPUtils.class);
-  private static final int EXPECTED_COMPRESSION_RATIO= 5;
-  private static final int BUF_SIZE= 4096;
+  private static final int EXPECTED_COMPRESSION_RATIO = 5;
+  private static final int BUF_SIZE = 4096;
 
   /**
-   * Returns an gunzipped copy of the input array.  If the gzipped
-   * input has been truncated or corrupted, a best-effort attempt is
-   * made to unzip as much as possible.  If no data can be extracted
-   * <code>null</code> is returned.
+   * Returns an gunzipped copy of the input array. If the gzipped input has been
+   * truncated or corrupted, a best-effort attempt is made to unzip as much as
+   * possible. If no data can be extracted <code>null</code> is returned.
    */
   public static final byte[] unzipBestEffort(byte[] in) {
     return unzipBestEffort(in, Integer.MAX_VALUE);
@@ -48,33 +47,32 @@
 
   /**
    * Returns an gunzipped copy of the input array, truncated to
-   * <code>sizeLimit</code> bytes, if necessary.  If the gzipped input
-   * has been truncated or corrupted, a best-effort attempt is made to
-   * unzip as much as possible.  If no data can be extracted
-   * <code>null</code> is returned.
+   * <code>sizeLimit</code> bytes, if necessary. If the gzipped input has been
+   * truncated or corrupted, a best-effort attempt is made to unzip as much as
+   * possible. If no data can be extracted <code>null</code> is returned.
    */
   public static final byte[] unzipBestEffort(byte[] in, int sizeLimit) {
     try {
-      // decompress using GZIPInputStream 
-      ByteArrayOutputStream outStream = 
-        new ByteArrayOutputStream(EXPECTED_COMPRESSION_RATIO * in.length);
+      // decompress using GZIPInputStream
+      ByteArrayOutputStream outStream = new ByteArrayOutputStream(
+          EXPECTED_COMPRESSION_RATIO * in.length);
 
-      GZIPInputStream inStream = 
-        new GZIPInputStream ( new ByteArrayInputStream(in) );
+      GZIPInputStream inStream = new GZIPInputStream(new ByteArrayInputStream(
+          in));
 
       byte[] buf = new byte[BUF_SIZE];
       int written = 0;
       while (true) {
         try {
           int size = inStream.read(buf);
-          if (size <= 0) 
+          if (size <= 0)
             break;
           if ((written + size) > sizeLimit) {
             outStream.write(buf, 0, sizeLimit - written);
             break;
           }
           outStream.write(buf, 0, size);
-          written+= size;
+          written += size;
         } catch (Exception e) {
           break;
         }
@@ -91,23 +89,23 @@
     }
   }
 
-
   /**
-   * Returns an gunzipped copy of the input array.  
-   * @throws IOException if the input cannot be properly decompressed
+   * Returns an gunzipped copy of the input array.
+   * 
+   * @throws IOException
+   *           if the input cannot be properly decompressed
    */
   public static final byte[] unzip(byte[] in) throws IOException {
-    // decompress using GZIPInputStream 
-    ByteArrayOutputStream outStream = 
-      new ByteArrayOutputStream(EXPECTED_COMPRESSION_RATIO * in.length);
+    // decompress using GZIPInputStream
+    ByteArrayOutputStream outStream = new ByteArrayOutputStream(
+        EXPECTED_COMPRESSION_RATIO * in.length);
 
-    GZIPInputStream inStream = 
-      new GZIPInputStream ( new ByteArrayInputStream(in) );
+    GZIPInputStream inStream = new GZIPInputStream(new ByteArrayInputStream(in));
 
     byte[] buf = new byte[BUF_SIZE];
     while (true) {
       int size = inStream.read(buf);
-      if (size <= 0) 
+      if (size <= 0)
         break;
       outStream.write(buf, 0, size);
     }
@@ -121,11 +119,11 @@
    */
   public static final byte[] zip(byte[] in) {
     try {
-      // compress using GZIPOutputStream 
-      ByteArrayOutputStream byteOut= 
-        new ByteArrayOutputStream(in.length / EXPECTED_COMPRESSION_RATIO);
+      // compress using GZIPOutputStream
+      ByteArrayOutputStream byteOut = new ByteArrayOutputStream(in.length
+          / EXPECTED_COMPRESSION_RATIO);
 
-      GZIPOutputStream outStream= new GZIPOutputStream(byteOut);
+      GZIPOutputStream outStream = new GZIPOutputStream(byteOut);
 
       try {
         outStream.write(in);
@@ -146,5 +144,5 @@
       return null;
     }
   }
-    
+
 }
Index: src/java/org/apache/nutch/util/ObjectCache.java
===================================================================
--- src/java/org/apache/nutch/util/ObjectCache.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/ObjectCache.java	(working copy)
@@ -24,35 +24,33 @@
 import org.apache.hadoop.conf.Configuration;
 
 public class ObjectCache {
-  
+
   private static final Logger LOG = LoggerFactory.getLogger(ObjectCache.class);
-  
-  private static final WeakHashMap<Configuration, ObjectCache> CACHE = 
-    new WeakHashMap<Configuration, ObjectCache>();
 
+  private static final WeakHashMap<Configuration, ObjectCache> CACHE = new WeakHashMap<Configuration, ObjectCache>();
+
   private final HashMap<String, Object> objectMap;
-  
+
   private ObjectCache() {
     objectMap = new HashMap<String, Object>();
   }
-  
+
   public static ObjectCache get(Configuration conf) {
     ObjectCache objectCache = CACHE.get(conf);
     if (objectCache == null) {
-      LOG.debug("No object cache found for conf=" + conf 
-                  + ", instantiating a new object cache");
+      LOG.debug("No object cache found for conf=" + conf
+          + ", instantiating a new object cache");
       objectCache = new ObjectCache();
       CACHE.put(conf, objectCache);
     }
     return objectCache;
   }
-  
+
   public Object getObject(String key) {
     return objectMap.get(key);
   }
-  
+
   public void setObject(String key, Object value) {
     objectMap.put(key, value);
   }
 }
-
Index: src/java/org/apache/nutch/util/NodeWalker.java
===================================================================
--- src/java/org/apache/nutch/util/NodeWalker.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/NodeWalker.java	(working copy)
@@ -22,13 +22,17 @@
 import org.w3c.dom.NodeList;
 
 /**
- * <p>A utility class that allows the walking of any DOM tree using a stack 
- * instead of recursion.  As the node tree is walked the next node is popped
- * off of the stack and all of its children are automatically added to the 
- * stack to be called in tree order.</p>
+ * <p>
+ * A utility class that allows the walking of any DOM tree using a stack instead
+ * of recursion. As the node tree is walked the next node is popped off of the
+ * stack and all of its children are automatically added to the stack to be
+ * called in tree order.
+ * </p>
  * 
- * <p>Currently this class is not thread safe.  It is assumed that only one
- * thread will be accessing the <code>NodeWalker</code> at any given time.</p>
+ * <p>
+ * Currently this class is not thread safe. It is assumed that only one thread
+ * will be accessing the <code>NodeWalker</code> at any given time.
+ * </p>
  */
 public class NodeWalker {
 
@@ -36,7 +40,7 @@
   private Node currentNode;
   private NodeList currentChildren;
   private Stack<Node> nodes;
-  
+
   /**
    * Starts the <code>Node</code> tree from the root node.
    * 
@@ -47,62 +51,68 @@
     nodes = new Stack<Node>();
     nodes.add(rootNode);
   }
-  
+
   /**
-   * <p>Returns the next <code>Node</code> on the stack and pushes all of its
-   * children onto the stack, allowing us to walk the node tree without the
-   * use of recursion.  If there are no more nodes on the stack then null is
-   * returned.</p>
+   * <p>
+   * Returns the next <code>Node</code> on the stack and pushes all of its
+   * children onto the stack, allowing us to walk the node tree without the use
+   * of recursion. If there are no more nodes on the stack then null is
+   * returned.
+   * </p>
    * 
-   * @return Node The next <code>Node</code> on the stack or null if there
-   * isn't a next node.
+   * @return Node The next <code>Node</code> on the stack or null if there isn't
+   *         a next node.
    */
   public Node nextNode() {
-    
+
     // if no next node return null
     if (!hasNext()) {
       return null;
     }
-    
+
     // pop the next node off of the stack and push all of its children onto
     // the stack
     currentNode = nodes.pop();
     currentChildren = currentNode.getChildNodes();
     int childLen = (currentChildren != null) ? currentChildren.getLength() : 0;
-    
+
     // put the children node on the stack in first to last order
     for (int i = childLen - 1; i >= 0; i--) {
       nodes.add(currentChildren.item(i));
     }
-    
+
     return currentNode;
   }
-  
+
   /**
-   * <p>Skips over and removes from the node stack the children of the last
-   * node.  When getting a next node from the walker, that node's children 
-   * are automatically added to the stack.  You can call this method to remove
-   * those children from the stack.</p>
+   * <p>
+   * Skips over and removes from the node stack the children of the last node.
+   * When getting a next node from the walker, that node's children are
+   * automatically added to the stack. You can call this method to remove those
+   * children from the stack.
+   * </p>
    * 
-   * <p>This is useful when you don't want to process deeper into the 
-   * current path of the node tree but you want to continue processing sibling
-   * nodes.</p>
-   *
+   * <p>
+   * This is useful when you don't want to process deeper into the current path
+   * of the node tree but you want to continue processing sibling nodes.
+   * </p>
+   * 
    */
   public void skipChildren() {
-    
+
     int childLen = (currentChildren != null) ? currentChildren.getLength() : 0;
-    
-    for (int i = 0 ; i < childLen ; i++) {
+
+    for (int i = 0; i < childLen; i++) {
       Node child = nodes.peek();
       if (child.equals(currentChildren.item(i))) {
         nodes.pop();
       }
     }
   }
-  
+
   /**
    * Returns true if there are more nodes on the current stack.
+   * 
    * @return
    */
   public boolean hasNext() {
Index: src/java/org/apache/nutch/util/TrieStringMatcher.java
===================================================================
--- src/java/org/apache/nutch/util/TrieStringMatcher.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/TrieStringMatcher.java	(working copy)
@@ -17,21 +17,19 @@
 
 package org.apache.nutch.util;
 
-
 import java.util.Arrays;
 import java.util.LinkedList;
 import java.util.ListIterator;
 
 /**
- * TrieStringMatcher is a base class for simple tree-based string
- * matching.
- *
+ * TrieStringMatcher is a base class for simple tree-based string matching.
+ * 
  */
 public abstract class TrieStringMatcher {
   protected TrieNode root;
 
   protected TrieStringMatcher() {
-    this.root= new TrieNode('\000', false);
+    this.root = new TrieNode('\000', false);
   }
 
   /**
@@ -44,20 +42,19 @@
     protected boolean terminal;
 
     /**
-     * Creates a new TrieNode, which contains the given
-     * <code>nodeChar</code>.  If <code>isTerminal</code> is
-     * <code>true</code>, the new node is a <em>terminal</em> node in
-     * the trie.
-     */  
+     * Creates a new TrieNode, which contains the given <code>nodeChar</code>.
+     * If <code>isTerminal</code> is <code>true</code>, the new node is a
+     * <em>terminal</em> node in the trie.
+     */
     TrieNode(char nodeChar, boolean isTerminal) {
-      this.nodeChar= nodeChar;
-      this.terminal= isTerminal;
-      this.childrenList= new LinkedList<TrieNode>();
+      this.nodeChar = nodeChar;
+      this.terminal = isTerminal;
+      this.childrenList = new LinkedList<TrieNode>();
     }
 
     /**
-     * Returns <code>true</code> if this node is a <em>terminal</em>
-     * node in the trie.
+     * Returns <code>true</code> if this node is a <em>terminal</em> node in the
+     * trie.
      */
     boolean isTerminal() {
       return terminal;
@@ -65,67 +62,68 @@
 
     /**
      * Returns the child node of this node whose node-character is
-     * <code>nextChar</code>.  If no such node exists, one will be is
-     * added.  If <em>isTerminal</em> is <code>true</code>, the node 
-     * will be a terminal node in the trie.
+     * <code>nextChar</code>. If no such node exists, one will be is added. If
+     * <em>isTerminal</em> is <code>true</code>, the node will be a terminal
+     * node in the trie.
      */
     TrieNode getChildAddIfNotPresent(char nextChar, boolean isTerminal) {
       if (childrenList == null) {
-        childrenList= new LinkedList<TrieNode>();
+        childrenList = new LinkedList<TrieNode>();
         childrenList.addAll(Arrays.asList(children));
-        children= null;
+        children = null;
       }
 
       if (childrenList.size() == 0) {
-        TrieNode newNode= new TrieNode(nextChar, isTerminal);
+        TrieNode newNode = new TrieNode(nextChar, isTerminal);
         childrenList.add(newNode);
         return newNode;
       }
 
-      ListIterator<TrieNode> iter= childrenList.listIterator();
-      TrieNode node= iter.next();
-      while ( (node.nodeChar < nextChar) && iter.hasNext() ) 
-        node= iter.next();
-                        
+      ListIterator<TrieNode> iter = childrenList.listIterator();
+      TrieNode node = iter.next();
+      while ((node.nodeChar < nextChar) && iter.hasNext())
+        node = iter.next();
+
       if (node.nodeChar == nextChar) {
-        node.terminal= node.terminal | isTerminal;
+        node.terminal = node.terminal | isTerminal;
         return node;
       }
 
-      if (node.nodeChar > nextChar) 
+      if (node.nodeChar > nextChar)
         iter.previous();
 
-      TrieNode newNode= new TrieNode(nextChar, isTerminal);
+      TrieNode newNode = new TrieNode(nextChar, isTerminal);
       iter.add(newNode);
-      return newNode;                   
+      return newNode;
     }
 
     /**
      * Returns the child node of this node whose node-character is
-     * <code>nextChar</code>.  If no such node exists,
-     * <code>null</code> is returned.
+     * <code>nextChar</code>. If no such node exists, <code>null</code> is
+     * returned.
      */
     TrieNode getChild(char nextChar) {
       if (children == null) {
-        children= childrenList.toArray(new TrieNode[childrenList.size()]);
-        childrenList= null;
+        children = childrenList.toArray(new TrieNode[childrenList.size()]);
+        childrenList = null;
         Arrays.sort(children);
       }
 
-      int min= 0;
-      int max= children.length - 1;
-      int mid= 0;
+      int min = 0;
+      int max = children.length - 1;
+      int mid = 0;
       while (min < max) {
-        mid= (min + max) / 2;
-        if (children[mid].nodeChar == nextChar) 
+        mid = (min + max) / 2;
+        if (children[mid].nodeChar == nextChar)
           return children[mid];
         if (children[mid].nodeChar < nextChar)
-          min= mid + 1;
-        else // if (children[mid].nodeChar > nextChar)
-          max= mid - 1;
+          min = mid + 1;
+        else
+          // if (children[mid].nodeChar > nextChar)
+          max = mid - 1;
       }
 
-      if (min == max) 
+      if (min == max)
         if (children[min].nodeChar == nextChar)
           return children[min];
 
@@ -133,59 +131,57 @@
     }
 
     public int compareTo(TrieNode other) {
-      if (this.nodeChar < other.nodeChar) 
+      if (this.nodeChar < other.nodeChar)
         return -1;
-      if (this.nodeChar == other.nodeChar) 
+      if (this.nodeChar == other.nodeChar)
         return 0;
-//    if (this.nodeChar > other.nodeChar) 
+      // if (this.nodeChar > other.nodeChar)
       return 1;
     }
   }
 
   /**
    * Returns the next {@link TrieNode} visited, given that you are at
-   * <code>node</code>, and the the next character in the input is 
-   * the <code>idx</code>'th character of <code>s</code>.
+   * <code>node</code>, and the the next character in the input is the
+   * <code>idx</code>'th character of <code>s</code>.
    */
   protected final TrieNode matchChar(TrieNode node, String s, int idx) {
     return node.getChild(s.charAt(idx));
   }
 
   /**
-   * Adds any necessary nodes to the trie so that the given
-   * <code>String</code> can be decoded and the last character is
-   * represented by a terminal node.  Zero-length <code>Strings</code>
-   * are ignored.
+   * Adds any necessary nodes to the trie so that the given <code>String</code>
+   * can be decoded and the last character is represented by a terminal node.
+   * Zero-length <code>Strings</code> are ignored.
    */
   protected final void addPatternForward(String s) {
-    TrieNode node= root;
-    int stop= s.length() - 1;
+    TrieNode node = root;
+    int stop = s.length() - 1;
     int i;
     if (s.length() > 0) {
-      for (i= 0; i < stop; i++)
-        node= node.getChildAddIfNotPresent(s.charAt(i), false);
-      node= node.getChildAddIfNotPresent(s.charAt(i), true);
+      for (i = 0; i < stop; i++)
+        node = node.getChildAddIfNotPresent(s.charAt(i), false);
+      node = node.getChildAddIfNotPresent(s.charAt(i), true);
     }
   }
 
   /**
-   * Adds any necessary nodes to the trie so that the given
-   * <code>String</code> can be decoded <em>in reverse</em> and the
-   * first character is represented by a terminal node.  Zero-length
-   * <code>Strings</code> are ignored.
+   * Adds any necessary nodes to the trie so that the given <code>String</code>
+   * can be decoded <em>in reverse</em> and the first character is represented
+   * by a terminal node. Zero-length <code>Strings</code> are ignored.
    */
   protected final void addPatternBackward(String s) {
-    TrieNode node= root;
+    TrieNode node = root;
     if (s.length() > 0) {
-      for (int i= s.length()-1; i > 0; i--) 
-        node= node.getChildAddIfNotPresent(s.charAt(i), false);
-      node= node.getChildAddIfNotPresent(s.charAt(0), true);
+      for (int i = s.length() - 1; i > 0; i--)
+        node = node.getChildAddIfNotPresent(s.charAt(i), false);
+      node = node.getChildAddIfNotPresent(s.charAt(0), true);
     }
   }
 
   /**
-   * Returns true if the given <code>String</code> is matched by a
-   * pattern in the trie
+   * Returns true if the given <code>String</code> is matched by a pattern in
+   * the trie
    */
   public abstract boolean matches(String input);
 
Index: src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java
===================================================================
--- src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java	(revision 1188252)
+++ src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java	(working copy)
@@ -52,42 +52,44 @@
 import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.SolrDocumentList;
 
-/** 
+/**
  * Utility class for deleting duplicate documents from a solr index.
- *
+ * 
  * The algorithm goes like follows:
  * 
  * Preparation:
  * <ol>
  * <li>Query the solr server for the number of documents (say, N)</li>
- * <li>Partition N among M map tasks. For example, if we have two map tasks
- * the first map task will deal with solr documents from 0 - (N / 2 - 1) and
- * the second will deal with documents from (N / 2) to (N - 1).</li>
+ * <li>Partition N among M map tasks. For example, if we have two map tasks the
+ * first map task will deal with solr documents from 0 - (N / 2 - 1) and the
+ * second will deal with documents from (N / 2) to (N - 1).</li>
  * </ol>
  * 
  * MapReduce:
  * <ul>
- * <li>Map: Identity map where keys are digests and values are {@link SolrRecord}
- * instances(which contain id, boost and timestamp)</li>
+ * <li>Map: Identity map where keys are digests and values are
+ * {@link SolrRecord} instances(which contain id, boost and timestamp)</li>
  * <li>Reduce: After map, {@link SolrRecord}s with the same digest will be
- * grouped together. Now, of these documents with the same digests, delete
- * all of them except the one with the highest score (boost field). If two
- * (or more) documents have the same score, then the document with the latest
- * timestamp is kept. Again, every other is deleted from solr index.
- * </li>
+ * grouped together. Now, of these documents with the same digests, delete all
+ * of them except the one with the highest score (boost field). If two (or more)
+ * documents have the same score, then the document with the latest timestamp is
+ * kept. Again, every other is deleted from solr index.</li>
  * </ul>
  * 
- * Note that unlike {@link DeleteDuplicates} we assume that two documents in
- * a solr index will never have the same URL. So this class only deals with
- * documents with <b>different</b> URLs but the same digest. 
+ * Note that unlike {@link DeleteDuplicates} we assume that two documents in a
+ * solr index will never have the same URL. So this class only deals with
+ * documents with <b>different</b> URLs but the same digest.
  */
 public class SolrDeleteDuplicates
-implements Reducer<Text, SolrDeleteDuplicates.SolrRecord, Text, SolrDeleteDuplicates.SolrRecord>,
-Tool {
+    implements
+    Reducer<Text, SolrDeleteDuplicates.SolrRecord, Text, SolrDeleteDuplicates.SolrRecord>,
+    Tool {
 
-  public static final Logger LOG = LoggerFactory.getLogger(SolrDeleteDuplicates.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(SolrDeleteDuplicates.class);
 
-  private static final String SOLR_GET_ALL_QUERY = SolrConstants.ID_FIELD + ":[* TO *]";
+  private static final String SOLR_GET_ALL_QUERY = SolrConstants.ID_FIELD
+      + ":[* TO *]";
 
   private static final int NUM_MAX_DELETE_REQUEST = 1000;
 
@@ -97,12 +99,13 @@
     private long tstamp;
     private String id;
 
-    public SolrRecord() { }
-    
+    public SolrRecord() {
+    }
+
     public SolrRecord(SolrRecord old) {
-	this.id = old.id;
-	this.boost = old.boost;
-	this.tstamp = old.tstamp;
+      this.id = old.id;
+      this.boost = old.boost;
+      this.tstamp = old.tstamp;
     }
 
     public SolrRecord(String id, float boost, long tstamp) {
@@ -124,10 +127,10 @@
     }
 
     public void readSolrDocument(SolrDocument doc) {
-      id = (String)doc.getFieldValue(SolrConstants.ID_FIELD);
-      boost = (Float)doc.getFieldValue(SolrConstants.BOOST_FIELD);
+      id = (String) doc.getFieldValue(SolrConstants.ID_FIELD);
+      boost = (Float) doc.getFieldValue(SolrConstants.BOOST_FIELD);
 
-      Date buffer = (Date)doc.getFieldValue(SolrConstants.TIMESTAMP_FIELD);
+      Date buffer = (Date) doc.getFieldValue(SolrConstants.TIMESTAMP_FIELD);
       tstamp = buffer.getTime();
     }
 
@@ -141,7 +144,7 @@
       Text.writeString(out, id);
       out.writeFloat(boost);
       out.writeLong(tstamp);
-    } 
+    }
   }
 
   public static class SolrInputSplit implements InputSplit {
@@ -149,7 +152,8 @@
     private int docBegin;
     private int numDocs;
 
-    public SolrInputSplit() { }
+    public SolrInputSplit() {
+    }
 
     public SolrInputSplit(int docBegin, int numDocs) {
       this.docBegin = docBegin;
@@ -169,7 +173,7 @@
     }
 
     public String[] getLocations() throws IOException {
-      return new String[] {} ;
+      return new String[] {};
     }
 
     public void readFields(DataInput in) throws IOException {
@@ -186,7 +190,8 @@
   public static class SolrInputFormat implements InputFormat<Text, SolrRecord> {
 
     /** Return each index as a split. */
-    public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
+    public InputSplit[] getSplits(JobConf job, int numSplits)
+        throws IOException {
       SolrServer solr = SolrUtils.getCommonsHttpSolrServer(job);
 
       final SolrQuery solrQuery = new SolrQuery(SOLR_GET_ALL_QUERY);
@@ -200,32 +205,31 @@
         throw new IOException(e);
       }
 
-      int numResults = (int)response.getResults().getNumFound();
-      int numDocsPerSplit = (numResults / numSplits); 
+      int numResults = (int) response.getResults().getNumFound();
+      int numDocsPerSplit = (numResults / numSplits);
       int currentDoc = 0;
       SolrInputSplit[] splits = new SolrInputSplit[numSplits];
       for (int i = 0; i < numSplits - 1; i++) {
         splits[i] = new SolrInputSplit(currentDoc, numDocsPerSplit);
         currentDoc += numDocsPerSplit;
       }
-      splits[splits.length - 1] = new SolrInputSplit(currentDoc, numResults - currentDoc);
+      splits[splits.length - 1] = new SolrInputSplit(currentDoc, numResults
+          - currentDoc);
 
       return splits;
     }
 
-    public RecordReader<Text, SolrRecord> getRecordReader(final InputSplit split,
-        final JobConf job, 
-        Reporter reporter)
+    public RecordReader<Text, SolrRecord> getRecordReader(
+        final InputSplit split, final JobConf job, Reporter reporter)
         throws IOException {
 
       SolrServer solr = SolrUtils.getCommonsHttpSolrServer(job);
       SolrInputSplit solrSplit = (SolrInputSplit) split;
       final int numDocs = solrSplit.getNumDocs();
-      
+
       SolrQuery solrQuery = new SolrQuery(SOLR_GET_ALL_QUERY);
       solrQuery.setFields(SolrConstants.ID_FIELD, SolrConstants.BOOST_FIELD,
-                          SolrConstants.TIMESTAMP_FIELD,
-                          SolrConstants.DIGEST_FIELD);
+          SolrConstants.TIMESTAMP_FIELD, SolrConstants.DIGEST_FIELD);
       solrQuery.setStart(solrSplit.getDocBegin());
       solrQuery.setRows(numDocs);
 
@@ -242,7 +246,8 @@
 
         private int currentDoc = 0;
 
-        public void close() throws IOException { }
+        public void close() throws IOException {
+        }
 
         public Text createKey() {
           return new Text();
@@ -266,13 +271,14 @@
           }
 
           SolrDocument doc = solrDocs.get(currentDoc);
-          String digest = (String) doc.getFieldValue(SolrConstants.DIGEST_FIELD);
+          String digest = (String) doc
+              .getFieldValue(SolrConstants.DIGEST_FIELD);
           key.set(digest);
           value.readSolrDocument(doc);
 
           currentDoc++;
           return true;
-        }    
+        }
       };
     }
   }
@@ -304,7 +310,6 @@
     }
   }
 
-
   public void close() throws IOException {
     try {
       if (numDeletes > 0) {
@@ -322,13 +327,13 @@
 
   public void reduce(Text key, Iterator<SolrRecord> values,
       OutputCollector<Text, SolrRecord> output, Reporter reporter)
-  throws IOException {
+      throws IOException {
     SolrRecord recordToKeep = new SolrRecord(values.next());
     while (values.hasNext()) {
       SolrRecord solrRecord = values.next();
-      if (solrRecord.getBoost() > recordToKeep.getBoost() ||
-          (solrRecord.getBoost() == recordToKeep.getBoost() && 
-              solrRecord.getTstamp() > recordToKeep.getTstamp())) {
+      if (solrRecord.getBoost() > recordToKeep.getBoost()
+          || (solrRecord.getBoost() == recordToKeep.getBoost() && solrRecord
+              .getTstamp() > recordToKeep.getTstamp())) {
         updateRequest.deleteById(recordToKeep.id);
         recordToKeep = new SolrRecord(solrRecord);
       } else {
@@ -338,7 +343,8 @@
       reporter.incrCounter("SolrDedupStatus", "Deleted documents", 1);
       if (numDeletes >= NUM_MAX_DELETE_REQUEST) {
         try {
-          LOG.info("SolrDeleteDuplicates: deleting " + numDeletes + " duplicates");
+          LOG.info("SolrDeleteDuplicates: deleting " + numDeletes
+              + " duplicates");
           updateRequest.process(solr);
         } catch (SolrServerException e) {
           throw new IOException(e);
@@ -358,7 +364,7 @@
     long start = System.currentTimeMillis();
     LOG.info("SolrDeleteDuplicates: starting at " + sdf.format(start));
     LOG.info("SolrDeleteDuplicates: Solr url: " + solrUrl);
-    
+
     JobConf job = new NutchJob(getConf());
 
     job.set(SolrConstants.SERVER_URL, solrUrl);
@@ -373,7 +379,8 @@
     JobClient.runJob(job);
 
     long end = System.currentTimeMillis();
-    LOG.info("SolrDeleteDuplicates: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    LOG.info("SolrDeleteDuplicates: finished at " + sdf.format(end)
+        + ", elapsed: " + TimingUtil.elapsedTime(start, end));
   }
 
   public int run(String[] args) throws IOException {
Index: src/java/org/apache/nutch/indexer/solr/SolrUtils.java
===================================================================
--- src/java/org/apache/nutch/indexer/solr/SolrUtils.java	(revision 1188252)
+++ src/java/org/apache/nutch/indexer/solr/SolrUtils.java	(working copy)
@@ -31,8 +31,9 @@
 
   public static Logger LOG = LoggerFactory.getLogger(SolrIndexer.class);
 
-  public static CommonsHttpSolrServer getCommonsHttpSolrServer(JobConf job) throws MalformedURLException {
-    HttpClient client=new HttpClient();
+  public static CommonsHttpSolrServer getCommonsHttpSolrServer(JobConf job)
+      throws MalformedURLException {
+    HttpClient client = new HttpClient();
 
     // Check for username/password
     if (job.getBoolean(SolrConstants.USE_AUTH, false)) {
@@ -40,9 +41,13 @@
 
       LOG.info("Authenticating as: " + username);
 
-      AuthScope scope = new AuthScope(AuthScope.ANY_HOST, AuthScope.ANY_PORT, AuthScope.ANY_REALM, AuthScope.ANY_SCHEME);
+      AuthScope scope = new AuthScope(AuthScope.ANY_HOST, AuthScope.ANY_PORT,
+          AuthScope.ANY_REALM, AuthScope.ANY_SCHEME);
 
-      client.getState().setCredentials(scope, new UsernamePasswordCredentials(username, job.get(SolrConstants.PASSWORD)));
+      client.getState().setCredentials(
+          scope,
+          new UsernamePasswordCredentials(username, job
+              .get(SolrConstants.PASSWORD)));
 
       HttpClientParams params = client.getParams();
       params.setAuthenticationPreemptive(true);
@@ -60,9 +65,12 @@
     for (int i = 0; i < input.length(); i++) {
       ch = input.charAt(i);
 
-      // Strip all non-characters http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:Noncharacter_Code_Point=True:]
-      // and non-printable control characters except tabulator, new line and carriage return
-      if (ch % 0x10000 != 0xffff && // 0xffff - 0x10ffff range step 0x10000
+      // Strip all non-characters
+      // http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:Noncharacter_Code_Point=True:]
+      // and non-printable control characters except tabulator, new line
+      // and carriage return
+      if (ch % 0x10000 != 0xffff && // 0xffff - 0x10ffff range step
+          // 0x10000
           ch % 0x10000 != 0xfffe && // 0xfffe - 0x10fffe range
           (ch <= 0xfdd0 || ch >= 0xfdef) && // 0xfdd0 - 0xfdef
           (ch > 0x1F || ch == 0x9 || ch == 0xa || ch == 0xd)) {
Index: src/java/org/apache/nutch/indexer/solr/SolrClean.java
===================================================================
--- src/java/org/apache/nutch/indexer/solr/SolrClean.java	(revision 1188252)
+++ src/java/org/apache/nutch/indexer/solr/SolrClean.java	(working copy)
@@ -48,13 +48,13 @@
 import org.apache.solr.client.solrj.request.UpdateRequest;
 
 /**
-* The class scans CrawlDB looking for entries with status DB_GONE (404) and sends delete requests to Solr
-* for those documents.
-* 
-* 
-* @author Claudio Martella
-*
-*/
+ * The class scans CrawlDB looking for entries with status DB_GONE (404) and
+ * sends delete requests to Solr for those documents.
+ * 
+ * 
+ * @author Claudio Martella
+ * 
+ */
 
 public class SolrClean implements Tool {
   public static final Logger LOG = LoggerFactory.getLogger(SolrClean.class);
@@ -70,14 +70,17 @@
     this.conf = conf;
   }
 
-  public static class DBFilter implements Mapper<Text, CrawlDatum, ByteWritable, Text> {
+  public static class DBFilter implements
+      Mapper<Text, CrawlDatum, ByteWritable, Text> {
     private ByteWritable OUT = new ByteWritable(CrawlDatum.STATUS_DB_GONE);
 
     @Override
-    public void configure(JobConf arg0) { }
+    public void configure(JobConf arg0) {
+    }
 
     @Override
-    public void close() throws IOException { }
+    public void close() throws IOException {
+    }
 
     @Override
     public void map(Text key, CrawlDatum value,
@@ -90,7 +93,8 @@
     }
   }
 
-  public static class SolrDeleter implements Reducer<ByteWritable, Text, Text, ByteWritable> {
+  public static class SolrDeleter implements
+      Reducer<ByteWritable, Text, Text, ByteWritable> {
     private static final int NUM_MAX_DELETE_REQUEST = 1000;
     private int numDeletes = 0;
     private int totalDeleted = 0;
@@ -130,7 +134,7 @@
     @Override
     public void reduce(ByteWritable key, Iterator<Text> values,
         OutputCollector<Text, ByteWritable> output, Reporter reporter)
-    throws IOException {
+        throws IOException {
       while (values.hasNext()) {
         Text document = values.next();
         updateRequest.deleteById(document.toString());
@@ -151,7 +155,8 @@
     }
   }
 
-  public void delete(String crawldb, String solrUrl, boolean noCommit) throws IOException {
+  public void delete(String crawldb, String solrUrl, boolean noCommit)
+      throws IOException {
     SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
     long start = System.currentTimeMillis();
     LOG.info("SolrClean: starting at " + sdf.format(start));
@@ -171,7 +176,8 @@
     JobClient.runJob(job);
 
     long end = System.currentTimeMillis();
-    LOG.info("SolrClean: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    LOG.info("SolrClean: finished at " + sdf.format(end) + ", elapsed: "
+        + TimingUtil.elapsedTime(start, end));
   }
 
   public int run(String[] args) throws IOException {
@@ -191,8 +197,8 @@
   }
 
   public static void main(String[] args) throws Exception {
-    int result = ToolRunner.run(NutchConfiguration.create(),
-        new SolrClean(), args);
+    int result = ToolRunner.run(NutchConfiguration.create(), new SolrClean(),
+        args);
     System.exit(result);
   }
 }
Index: src/java/org/apache/nutch/indexer/solr/SolrConstants.java
===================================================================
--- src/java/org/apache/nutch/indexer/solr/SolrConstants.java	(revision 1188252)
+++ src/java/org/apache/nutch/indexer/solr/SolrConstants.java	(working copy)
@@ -30,15 +30,15 @@
   public static final String USERNAME = SOLR_PREFIX + "auth.username";
 
   public static final String PASSWORD = SOLR_PREFIX + "auth.password";
-  
+
   public static final String ID_FIELD = "id";
-  
+
   public static final String URL_FIELD = "url";
-  
+
   public static final String BOOST_FIELD = "boost";
-  
+
   public static final String TIMESTAMP_FIELD = "tstamp";
-  
+
   public static final String DIGEST_FIELD = "digest";
 
 }
Index: src/java/org/apache/nutch/indexer/solr/SolrWriter.java
===================================================================
--- src/java/org/apache/nutch/indexer/solr/SolrWriter.java	(revision 1188252)
+++ src/java/org/apache/nutch/indexer/solr/SolrWriter.java	(working copy)
@@ -40,8 +40,7 @@
   private SolrServer solr;
   private SolrMappingReader solrMapping;
 
-  private final List<SolrInputDocument> inputDocs =
-    new ArrayList<SolrInputDocument>();
+  private final List<SolrInputDocument> inputDocs = new ArrayList<SolrInputDocument>();
 
   private int commitSize;
 
@@ -53,23 +52,24 @@
 
   public void write(NutchDocument doc) throws IOException {
     final SolrInputDocument inputDoc = new SolrInputDocument();
-    for(final Entry<String, NutchField> e : doc) {
+    for (final Entry<String, NutchField> e : doc) {
       for (final Object val : e.getValue().getValues()) {
         // normalise the string representation for a Date
         Object val2 = val;
 
-        if (val instanceof Date){
+        if (val instanceof Date) {
           val2 = DateUtil.getThreadLocalDateFormat().format(val);
         }
 
         if (e.getKey().equals("content")) {
-          val2 = SolrUtils.stripNonCharCodepoints((String)val);
+          val2 = SolrUtils.stripNonCharCodepoints((String) val);
         }
 
-        inputDoc.addField(solrMapping.mapKey(e.getKey()), val2, e.getValue().getWeight());
+        inputDoc.addField(solrMapping.mapKey(e.getKey()), val2, e.getValue()
+            .getWeight());
         String sCopy = solrMapping.mapCopyKey(e.getKey());
         if (sCopy != e.getKey()) {
-        	inputDoc.addField(sCopy, val);	
+          inputDoc.addField(sCopy, val);
         }
       }
     }
Index: src/java/org/apache/nutch/indexer/solr/SolrIndexer.java
===================================================================
--- src/java/org/apache/nutch/indexer/solr/SolrIndexer.java	(revision 1188252)
+++ src/java/org/apache/nutch/indexer/solr/SolrIndexer.java	(working copy)
@@ -57,7 +57,7 @@
 
   public void indexSolr(String solrUrl, Path crawlDb, Path linkDb,
       List<Path> segments) throws IOException {
-      indexSolr(solrUrl, crawlDb, linkDb, segments, false);
+    indexSolr(solrUrl, crawlDb, linkDb, segments, false);
   }
 
   public void indexSolr(String solrUrl, Path crawlDb, Path linkDb,
@@ -76,22 +76,22 @@
 
     job.setReduceSpeculativeExecution(false);
 
-    final Path tmp = new Path("tmp_" + System.currentTimeMillis() + "-" +
-                         new Random().nextInt());
+    final Path tmp = new Path("tmp_" + System.currentTimeMillis() + "-"
+        + new Random().nextInt());
 
     FileOutputFormat.setOutputPath(job, tmp);
     try {
       JobClient.runJob(job);
       // do the commits once and for all the reducers in one go
-      SolrServer solr =  SolrUtils.getCommonsHttpSolrServer(job);
+      SolrServer solr = SolrUtils.getCommonsHttpSolrServer(job);
 
       if (!noCommit) {
         solr.commit();
       }
       long end = System.currentTimeMillis();
-      LOG.info("SolrIndexer: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
-    }
-    catch (Exception e){
+      LOG.info("SolrIndexer: finished at " + sdf.format(end) + ", elapsed: "
+          + TimingUtil.elapsedTime(start, end));
+    } catch (Exception e) {
       LOG.error(e.toString());
     } finally {
       FileSystem.get(job).delete(tmp, true);
@@ -100,7 +100,8 @@
 
   public int run(String[] args) throws Exception {
     if (args.length < 3) {
-      System.err.println("Usage: SolrIndexer <solr url> <crawldb> [-linkdb <linkdb>] (<segment> ... | -dir <segments>) [-noCommit]");
+      System.err
+          .println("Usage: SolrIndexer <solr url> <crawldb> [-linkdb <linkdb>] (<segment> ... | -dir <segments>) [-noCommit]");
       return -1;
     }
 
@@ -112,14 +113,13 @@
     boolean noCommit = false;
 
     for (int i = 2; i < args.length; i++) {
-    	if (args[i].equals("-linkdb")) {
-    		linkDb = new Path(args[++i]);
-    	}
-    	else if (args[i].equals("-dir")) {
+      if (args[i].equals("-linkdb")) {
+        linkDb = new Path(args[++i]);
+      } else if (args[i].equals("-dir")) {
         Path dir = new Path(args[++i]);
         FileSystem fs = dir.getFileSystem(getConf());
         FileStatus[] fstats = fs.listStatus(dir,
-                HadoopFSUtil.getPassDirectoriesFilter(fs));
+            HadoopFSUtil.getPassDirectoriesFilter(fs));
         Path[] files = HadoopFSUtil.getPaths(fstats);
         for (Path p : files) {
           segments.add(p);
@@ -141,7 +141,8 @@
   }
 
   public static void main(String[] args) throws Exception {
-    final int res = ToolRunner.run(NutchConfiguration.create(), new SolrIndexer(), args);
+    final int res = ToolRunner.run(NutchConfiguration.create(),
+        new SolrIndexer(), args);
     System.exit(res);
   }
 }
Index: src/java/org/apache/nutch/indexer/solr/SolrMappingReader.java
===================================================================
--- src/java/org/apache/nutch/indexer/solr/SolrMappingReader.java	(revision 1188252)
+++ src/java/org/apache/nutch/indexer/solr/SolrMappingReader.java	(working copy)
@@ -38,16 +38,17 @@
 
 public class SolrMappingReader {
   public static Logger LOG = LoggerFactory.getLogger(SolrMappingReader.class);
-  
+
   private Configuration conf;
-  
+
   private Map<String, String> keyMap = new HashMap<String, String>();
   private Map<String, String> copyMap = new HashMap<String, String>();
   private String uniqueKey = "id";
-  
+
   public static synchronized SolrMappingReader getInstance(Configuration conf) {
     ObjectCache cache = ObjectCache.get(conf);
-    SolrMappingReader instance = (SolrMappingReader)cache.getObject(SolrMappingReader.class.getName());
+    SolrMappingReader instance = (SolrMappingReader) cache
+        .getObject(SolrMappingReader.class.getName());
     if (instance == null) {
       instance = new SolrMappingReader(conf);
       cache.setObject(SolrMappingReader.class.getName(), instance);
@@ -60,9 +61,10 @@
     parseMapping();
   }
 
-  private void parseMapping() {    
+  private void parseMapping() {
     InputStream ssInputStream = null;
-    ssInputStream = conf.getConfResourceAsInputStream(conf.get(SolrConstants.MAPPING_FILE, "solrindex-mapping.xml"));
+    ssInputStream = conf.getConfResourceAsInputStream(conf.get(
+        SolrConstants.MAPPING_FILE, "solrindex-mapping.xml"));
 
     InputSource inputSource = new InputSource(ssInputStream);
     try {
@@ -74,48 +76,50 @@
       if (fieldList.getLength() > 0) {
         for (int i = 0; i < fieldList.getLength(); i++) {
           Element element = (Element) fieldList.item(i);
-          LOG.info("source: " + element.getAttribute("source") + " dest: " + element.getAttribute("dest"));
-          keyMap.put(element.getAttribute("source"), element.getAttribute("dest"));
+          LOG.info("source: " + element.getAttribute("source") + " dest: "
+              + element.getAttribute("dest"));
+          keyMap.put(element.getAttribute("source"),
+              element.getAttribute("dest"));
         }
       }
       NodeList copyFieldList = rootElement.getElementsByTagName("copyField");
       if (copyFieldList.getLength() > 0) {
         for (int i = 0; i < copyFieldList.getLength(); i++) {
           Element element = (Element) copyFieldList.item(i);
-          LOG.info("source: " + element.getAttribute("source") + " dest: " + element.getAttribute("dest"));
-          copyMap.put(element.getAttribute("source"), element.getAttribute("dest"));
+          LOG.info("source: " + element.getAttribute("source") + " dest: "
+              + element.getAttribute("dest"));
+          copyMap.put(element.getAttribute("source"),
+              element.getAttribute("dest"));
         }
       }
       NodeList uniqueKeyItem = rootElement.getElementsByTagName("uniqueKey");
       if (uniqueKeyItem.getLength() > 1) {
         LOG.warn("More than one unique key definitions found in solr index mapping, using default 'id'");
         uniqueKey = "id";
-      }
-      else if (uniqueKeyItem.getLength() == 0) {
+      } else if (uniqueKeyItem.getLength() == 0) {
         LOG.warn("No unique key definition found in solr index mapping using, default 'id'");
+      } else {
+        uniqueKey = uniqueKeyItem.item(0).getFirstChild().getNodeValue();
       }
-      else{
-    	  uniqueKey = uniqueKeyItem.item(0).getFirstChild().getNodeValue();
-      }
     } catch (MalformedURLException e) {
-        LOG.warn(e.toString());
+      LOG.warn(e.toString());
     } catch (SAXException e) {
-        LOG.warn(e.toString());
+      LOG.warn(e.toString());
     } catch (IOException e) {
-    	LOG.warn(e.toString());
+      LOG.warn(e.toString());
     } catch (ParserConfigurationException e) {
-    	LOG.warn(e.toString());
-    } 
+      LOG.warn(e.toString());
+    }
   }
-	  
+
   public Map<String, String> getKeyMap() {
     return keyMap;
   }
-	  
+
   public Map<String, String> getCopyMap() {
     return copyMap;
   }
-	  
+
   public String getUniqueKey() {
     return uniqueKey;
   }
@@ -128,14 +132,14 @@
   }
 
   public String mapKey(String key) throws IOException {
-    if(keyMap.containsKey(key)) {
+    if (keyMap.containsKey(key)) {
       key = (String) keyMap.get(key);
     }
     return key;
   }
 
   public String mapCopyKey(String key) throws IOException {
-    if(copyMap.containsKey(key)) {
+    if (copyMap.containsKey(key)) {
       key = (String) copyMap.get(key);
     }
     return key;
Index: src/java/org/apache/nutch/indexer/NutchDocument.java
===================================================================
--- src/java/org/apache/nutch/indexer/NutchDocument.java	(revision 1188252)
+++ src/java/org/apache/nutch/indexer/NutchDocument.java	(working copy)
@@ -31,12 +31,12 @@
 import org.apache.hadoop.io.WritableUtils;
 import org.apache.nutch.metadata.Metadata;
 
-/** A {@link NutchDocument} is the unit of indexing.*/
-public class NutchDocument
-implements Writable, Iterable<Entry<String, NutchField>> {
+/** A {@link NutchDocument} is the unit of indexing. */
+public class NutchDocument implements Writable,
+    Iterable<Entry<String, NutchField>> {
 
   public static final byte VERSION = 2;
-  
+
   private Map<String, NutchField> fields;
 
   private Metadata documentMeta;
Index: src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java
===================================================================
--- src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java	(revision 1188252)
+++ src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java	(working copy)
@@ -24,66 +24,69 @@
 import org.apache.nutch.util.NutchConfiguration;
 
 /**
- * Reads and parses a URL and run the indexers on it. Displays the fields obtained and the first
- * 100 characters of their value
+ * Reads and parses a URL and run the indexers on it. Displays the fields
+ * obtained and the first 100 characters of their value
  * 
- * Tested with e.g. ./nutch org.apache.nutch.indexer.IndexingFiltersChecker http://www.lemonde.fr
+ * Tested with e.g. ./nutch org.apache.nutch.indexer.IndexingFiltersChecker
+ * http://www.lemonde.fr
+ * 
  * @author Julien Nioche
  **/
 
 public class IndexingFiltersChecker extends Configured implements Tool {
-  
-  public static final Logger LOG = LoggerFactory.getLogger(IndexingFiltersChecker.class);
-  
+
+  public static final Logger LOG = LoggerFactory
+      .getLogger(IndexingFiltersChecker.class);
+
   public IndexingFiltersChecker() {
 
   }
-  
+
   public int run(String[] args) throws Exception {
-    
+
     String contentType = null;
     String url = null;
-    
+
     String usage = "Usage: IndexingFiltersChecker <url>";
-    
+
     if (args.length != 1) {
       System.err.println(usage);
       System.exit(-1);
     }
-    
+
     url = args[0];
-    
+
     if (LOG.isInfoEnabled()) {
       LOG.info("fetching: " + url);
     }
-        
+
     IndexingFilters indexers = new IndexingFilters(conf);
-    
+
     ProtocolFactory factory = new ProtocolFactory(conf);
     Protocol protocol = factory.getProtocol(url);
     CrawlDatum datum = new CrawlDatum();
-    
+
     Content content = protocol.getProtocolOutput(new Text(url), datum)
         .getContent();
-    
+
     if (content == null) {
       System.out.println("No content for " + url);
       return 0;
     }
-    
+
     contentType = content.getContentType();
-    
+
     if (contentType == null) {
       return -1;
     }
-    
+
     if (LOG.isInfoEnabled()) {
       LOG.info("parsing: " + url);
       LOG.info("contentType: " + contentType);
     }
-    
+
     ParseResult parseResult = new ParseUtil(conf).parse(content);
-    
+
     NutchDocument doc = new NutchDocument();
     Text urlText = new Text(url);
 
@@ -107,19 +110,19 @@
     }
     return 0;
   }
-  
+
   public static void main(String[] args) throws Exception {
     final int res = ToolRunner.run(NutchConfiguration.create(),
         new IndexingFiltersChecker(), args);
     System.exit(res);
   }
-  
+
   Configuration conf;
-  
+
   public Configuration getConf() {
     return conf;
   }
-  
+
   @Override
   public void setConf(Configuration arg0) {
     conf = arg0;
Index: src/java/org/apache/nutch/indexer/IndexerMapReduce.java
===================================================================
--- src/java/org/apache/nutch/indexer/IndexerMapReduce.java	(revision 1188252)
+++ src/java/org/apache/nutch/indexer/IndexerMapReduce.java	(working copy)
@@ -48,11 +48,12 @@
 import org.apache.nutch.scoring.ScoringFilterException;
 import org.apache.nutch.scoring.ScoringFilters;
 
-public class IndexerMapReduce extends Configured
-implements Mapper<Text, Writable, Text, NutchWritable>,
-          Reducer<Text, NutchWritable, Text, NutchDocument> {
+public class IndexerMapReduce extends Configured implements
+    Mapper<Text, Writable, Text, NutchWritable>,
+    Reducer<Text, NutchWritable, Text, NutchDocument> {
 
-  public static final Logger LOG = LoggerFactory.getLogger(IndexerMapReduce.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(IndexerMapReduce.class);
 
   private IndexingFilters filters;
   private ScoringFilters scfilters;
@@ -64,13 +65,14 @@
   }
 
   public void map(Text key, Writable value,
-      OutputCollector<Text, NutchWritable> output, Reporter reporter) throws IOException {
+      OutputCollector<Text, NutchWritable> output, Reporter reporter)
+      throws IOException {
     output.collect(key, new NutchWritable(value));
   }
 
   public void reduce(Text key, Iterator<NutchWritable> values,
-                     OutputCollector<Text, NutchDocument> output, Reporter reporter)
-    throws IOException {
+      OutputCollector<Text, NutchDocument> output, Reporter reporter)
+      throws IOException {
     Inlinks inlinks = null;
     CrawlDatum dbDatum = null;
     CrawlDatum fetchDatum = null;
@@ -79,38 +81,38 @@
     while (values.hasNext()) {
       final Writable value = values.next().get(); // unwrap
       if (value instanceof Inlinks) {
-        inlinks = (Inlinks)value;
+        inlinks = (Inlinks) value;
       } else if (value instanceof CrawlDatum) {
-        final CrawlDatum datum = (CrawlDatum)value;
+        final CrawlDatum datum = (CrawlDatum) value;
         if (CrawlDatum.hasDbStatus(datum))
           dbDatum = datum;
         else if (CrawlDatum.hasFetchStatus(datum)) {
           // don't index unmodified (empty) pages
           if (datum.getStatus() != CrawlDatum.STATUS_FETCH_NOTMODIFIED)
             fetchDatum = datum;
-        } else if (CrawlDatum.STATUS_LINKED == datum.getStatus() ||
-                   CrawlDatum.STATUS_SIGNATURE == datum.getStatus() ||
-                   CrawlDatum.STATUS_PARSE_META == datum.getStatus()) {
+        } else if (CrawlDatum.STATUS_LINKED == datum.getStatus()
+            || CrawlDatum.STATUS_SIGNATURE == datum.getStatus()
+            || CrawlDatum.STATUS_PARSE_META == datum.getStatus()) {
           continue;
         } else {
-          throw new RuntimeException("Unexpected status: "+datum.getStatus());
+          throw new RuntimeException("Unexpected status: " + datum.getStatus());
         }
       } else if (value instanceof ParseData) {
-        parseData = (ParseData)value;
+        parseData = (ParseData) value;
       } else if (value instanceof ParseText) {
-        parseText = (ParseText)value;
+        parseText = (ParseText) value;
       } else if (LOG.isWarnEnabled()) {
-        LOG.warn("Unrecognized type: "+value.getClass());
+        LOG.warn("Unrecognized type: " + value.getClass());
       }
     }
 
-    if (fetchDatum == null || dbDatum == null
-        || parseText == null || parseData == null) {
-      return;                                     // only have inlinks
+    if (fetchDatum == null || dbDatum == null || parseText == null
+        || parseData == null) {
+      return; // only have inlinks
     }
 
-    if (!parseData.getStatus().isSuccess() ||
-        fetchDatum.getStatus() != CrawlDatum.STATUS_FETCH_SUCCESS) {
+    if (!parseData.getStatus().isSuccess()
+        || fetchDatum.getStatus() != CrawlDatum.STATUS_FETCH_SUCCESS) {
       return;
     }
 
@@ -127,14 +129,17 @@
     try {
       // extract information from dbDatum and pass it to
       // fetchDatum so that indexing filters can use it
-      final Text url = (Text) dbDatum.getMetaData().get(Nutch.WRITABLE_REPR_URL_KEY);
+      final Text url = (Text) dbDatum.getMetaData().get(
+          Nutch.WRITABLE_REPR_URL_KEY);
       if (url != null) {
         fetchDatum.getMetaData().put(Nutch.WRITABLE_REPR_URL_KEY, url);
       }
       // run indexing filters
       doc = this.filters.filter(doc, parse, key, fetchDatum, inlinks);
     } catch (final IndexingException e) {
-      if (LOG.isWarnEnabled()) { LOG.warn("Error indexing "+key+": "+e); }
+      if (LOG.isWarnEnabled()) {
+        LOG.warn("Error indexing " + key + ": " + e);
+      }
       reporter.incrCounter("IndexerStatus", "Errors", 1);
       return;
     }
@@ -148,8 +153,8 @@
     float boost = 1.0f;
     // run scoring filters
     try {
-      boost = this.scfilters.indexerScore(key, doc, dbDatum,
-              fetchDatum, parse, inlinks, boost);
+      boost = this.scfilters.indexerScore(key, doc, dbDatum, fetchDatum, parse,
+          inlinks, boost);
     } catch (final ScoringFilterException e) {
       if (LOG.isWarnEnabled()) {
         LOG.warn("Error calculating score " + key + ": " + e);
@@ -166,30 +171,32 @@
     output.collect(key, doc);
   }
 
-  public void close() throws IOException { }
+  public void close() throws IOException {
+  }
 
   public static void initMRJob(Path crawlDb, Path linkDb,
-                           Collection<Path> segments,
-                           JobConf job) {
+      Collection<Path> segments, JobConf job) {
 
     LOG.info("IndexerMapReduce: crawldb: " + crawlDb);
-    
-    if (linkDb!=null)
+
+    if (linkDb != null)
       LOG.info("IndexerMapReduce: linkdb: " + linkDb);
 
     for (final Path segment : segments) {
       LOG.info("IndexerMapReduces: adding segment: " + segment);
-      FileInputFormat.addInputPath(job, new Path(segment, CrawlDatum.FETCH_DIR_NAME));
-      FileInputFormat.addInputPath(job, new Path(segment, CrawlDatum.PARSE_DIR_NAME));
+      FileInputFormat.addInputPath(job, new Path(segment,
+          CrawlDatum.FETCH_DIR_NAME));
+      FileInputFormat.addInputPath(job, new Path(segment,
+          CrawlDatum.PARSE_DIR_NAME));
       FileInputFormat.addInputPath(job, new Path(segment, ParseData.DIR_NAME));
       FileInputFormat.addInputPath(job, new Path(segment, ParseText.DIR_NAME));
     }
 
     FileInputFormat.addInputPath(job, new Path(crawlDb, CrawlDb.CURRENT_NAME));
-    
-    if (linkDb!=null)
-	  FileInputFormat.addInputPath(job, new Path(linkDb, LinkDb.CURRENT_NAME));
-    
+
+    if (linkDb != null)
+      FileInputFormat.addInputPath(job, new Path(linkDb, LinkDb.CURRENT_NAME));
+
     job.setInputFormat(SequenceFileInputFormat.class);
 
     job.setMapperClass(IndexerMapReduce.class);
Index: src/java/org/apache/nutch/indexer/NutchField.java
===================================================================
--- src/java/org/apache/nutch/indexer/NutchField.java	(revision 1188252)
+++ src/java/org/apache/nutch/indexer/NutchField.java	(working copy)
@@ -27,34 +27,34 @@
 import org.apache.hadoop.io.Writable;
 
 /**
- * This class represents a multi-valued field with a weight. Values are arbitrary
- * objects.
+ * This class represents a multi-valued field with a weight. Values are
+ * arbitrary objects.
  */
 public class NutchField implements Writable {
   private float weight;
   private List<Object> values = new ArrayList<Object>();
-  
+
   public NutchField() {
-    
+
   }
-  
+
   public NutchField(Object value) {
     this(value, 1.0f);
   }
-  
+
   public NutchField(Object value, float weight) {
     this.weight = weight;
     if (value instanceof Collection) {
-      values.addAll((Collection<Object>)value);
+      values.addAll((Collection<Object>) value);
     } else {
       values.add(value);
     }
   }
-  
+
   public void add(Object value) {
     values.add(value);
   }
-  
+
   public float getWeight() {
     return weight;
   }
@@ -66,7 +66,7 @@
   public List<Object> getValues() {
     return values;
   }
-  
+
   public void reset() {
     weight = 1.0f;
     values.clear();
Index: src/java/org/apache/nutch/indexer/IndexerOutputFormat.java
===================================================================
--- src/java/org/apache/nutch/indexer/IndexerOutputFormat.java	(revision 1188252)
+++ src/java/org/apache/nutch/indexer/IndexerOutputFormat.java	(working copy)
@@ -31,13 +31,13 @@
   @Override
   public RecordWriter<Text, NutchDocument> getRecordWriter(FileSystem ignored,
       JobConf job, String name, Progressable progress) throws IOException {
-    
+
     // populate JobConf with field indexing options
     IndexingFilters filters = new IndexingFilters(job);
-    
-    final NutchIndexWriter[] writers =
-      NutchIndexWriterFactory.getNutchIndexWriters(job);
 
+    final NutchIndexWriter[] writers = NutchIndexWriterFactory
+        .getNutchIndexWriters(job);
+
     for (final NutchIndexWriter writer : writers) {
       writer.open(job, name);
     }
Index: src/java/org/apache/nutch/indexer/IndexingFilter.java
===================================================================
--- src/java/org/apache/nutch/indexer/IndexingFilter.java	(revision 1188252)
+++ src/java/org/apache/nutch/indexer/IndexingFilter.java	(working copy)
@@ -28,9 +28,9 @@
 import org.apache.nutch.crawl.Inlinks;
 import org.apache.nutch.plugin.Pluggable;
 
-
-/** Extension point for indexing.  Permits one to add metadata to the indexed
- * fields.  All plugins found which implement this extension point are run
+/**
+ * Extension point for indexing. Permits one to add metadata to the indexed
+ * fields. All plugins found which implement this extension point are run
  * sequentially on the parse.
  */
 public interface IndexingFilter extends Pluggable, Configurable {
@@ -39,17 +39,23 @@
 
   /**
    * Adds fields or otherwise modifies the document that will be indexed for a
-   * parse. Unwanted documents can be removed from indexing by returning a null value.
+   * parse. Unwanted documents can be removed from indexing by returning a null
+   * value.
    * 
-   * @param doc document instance for collecting fields
-   * @param parse parse data instance
-   * @param url page url
-   * @param datum crawl datum for the page
-   * @param inlinks page inlinks
-   * @return modified (or a new) document instance, or null (meaning the document
-   * should be discarded)
+   * @param doc
+   *          document instance for collecting fields
+   * @param parse
+   *          parse data instance
+   * @param url
+   *          page url
+   * @param datum
+   *          crawl datum for the page
+   * @param inlinks
+   *          page inlinks
+   * @return modified (or a new) document instance, or null (meaning the
+   *         document should be discarded)
    * @throws IndexingException
    */
-  NutchDocument filter(NutchDocument doc, Parse parse, Text url, CrawlDatum datum, Inlinks inlinks)
-    throws IndexingException;
+  NutchDocument filter(NutchDocument doc, Parse parse, Text url,
+      CrawlDatum datum, Inlinks inlinks) throws IndexingException;
 }
Index: src/java/org/apache/nutch/indexer/IndexingFilters.java
===================================================================
--- src/java/org/apache/nutch/indexer/IndexingFilters.java	(revision 1188252)
+++ src/java/org/apache/nutch/indexer/IndexingFilters.java	(working copy)
@@ -32,12 +32,13 @@
 import org.apache.nutch.crawl.Inlinks;
 import org.apache.hadoop.io.Text;
 
-/** Creates and caches {@link IndexingFilter} implementing plugins.*/
+/** Creates and caches {@link IndexingFilter} implementing plugins. */
 public class IndexingFilters {
 
   public static final String INDEXINGFILTER_ORDER = "indexingfilter.order";
 
-  public final static Logger LOG = LoggerFactory.getLogger(IndexingFilters.class);
+  public final static Logger LOG = LoggerFactory
+      .getLogger(IndexingFilters.class);
 
   private IndexingFilter[] indexingFilters;
 
@@ -62,8 +63,7 @@
         if (point == null)
           throw new RuntimeException(IndexingFilter.X_POINT_ID + " not found.");
         Extension[] extensions = point.getExtensions();
-        HashMap<String, IndexingFilter> filterMap =
-          new HashMap<String, IndexingFilter>();
+        HashMap<String, IndexingFilter> filterMap = new HashMap<String, IndexingFilter>();
         for (int i = 0; i < extensions.length; i++) {
           Extension extension = extensions[i];
           IndexingFilter filter = (IndexingFilter) extension
@@ -78,21 +78,19 @@
          * indeterminate order
          */
         if (orderedFilters == null) {
-          objectCache.setObject(IndexingFilter.class.getName(),
-              filterMap.values().toArray(
-                  new IndexingFilter[0]));
+          objectCache.setObject(IndexingFilter.class.getName(), filterMap
+              .values().toArray(new IndexingFilter[0]));
           /* Otherwise run the filters in the required order */
         } else {
           ArrayList<IndexingFilter> filters = new ArrayList<IndexingFilter>();
           for (int i = 0; i < orderedFilters.length; i++) {
-            IndexingFilter filter = filterMap
-                .get(orderedFilters[i]);
+            IndexingFilter filter = filterMap.get(orderedFilters[i]);
             if (filter != null) {
               filters.add(filter);
             }
           }
-          objectCache.setObject(IndexingFilter.class.getName(), filters
-              .toArray(new IndexingFilter[filters.size()]));
+          objectCache.setObject(IndexingFilter.class.getName(),
+              filters.toArray(new IndexingFilter[filters.size()]));
         }
       } catch (PluginRuntimeException e) {
         throw new RuntimeException(e);
@@ -100,15 +98,16 @@
       this.indexingFilters = (IndexingFilter[]) objectCache
           .getObject(IndexingFilter.class.getName());
     }
-  }                  
+  }
 
   /** Run all defined filters. */
-  public NutchDocument filter(NutchDocument doc, Parse parse, Text url, CrawlDatum datum,
-      Inlinks inlinks) throws IndexingException {
+  public NutchDocument filter(NutchDocument doc, Parse parse, Text url,
+      CrawlDatum datum, Inlinks inlinks) throws IndexingException {
     for (int i = 0; i < this.indexingFilters.length; i++) {
       doc = this.indexingFilters[i].filter(doc, parse, url, datum, inlinks);
       // break the loop if an indexing filter discards the doc
-      if (doc == null) return null;
+      if (doc == null)
+        return null;
     }
 
     return doc;
Index: src/java/org/apache/nutch/indexer/NutchIndexWriterFactory.java
===================================================================
--- src/java/org/apache/nutch/indexer/NutchIndexWriterFactory.java	(revision 1188252)
+++ src/java/org/apache/nutch/indexer/NutchIndexWriterFactory.java	(working copy)
@@ -26,8 +26,8 @@
     for (int i = 0; i < classes.length; i++) {
       final String clazz = classes[i];
       try {
-        final Class<NutchIndexWriter> implClass =
-          (Class<NutchIndexWriter>) Class.forName(clazz);
+        final Class<NutchIndexWriter> implClass = (Class<NutchIndexWriter>) Class
+            .forName(clazz);
         writers[i] = implClass.newInstance();
       } catch (final Exception e) {
         throw new RuntimeException("Couldn't create " + clazz, e);
@@ -37,7 +37,7 @@
   }
 
   public static void addClassToConf(Configuration conf,
-                                    Class<? extends NutchIndexWriter> clazz) {
+      Class<? extends NutchIndexWriter> clazz) {
     final String classes = conf.get("indexer.writer.classes");
     final String newClass = clazz.getName();
 
Index: src/java/org/apache/nutch/plugin/PluginRuntimeException.java
===================================================================
--- src/java/org/apache/nutch/plugin/PluginRuntimeException.java	(revision 1188252)
+++ src/java/org/apache/nutch/plugin/PluginRuntimeException.java	(working copy)
@@ -16,6 +16,7 @@
  * limitations under the License.
  */
 package org.apache.nutch.plugin;
+
 /**
  * <code>PluginRuntimeException</code> will be thrown until a exception in the
  * plugin managemnt occurs.
Index: src/java/org/apache/nutch/plugin/CircularDependencyException.java
===================================================================
--- src/java/org/apache/nutch/plugin/CircularDependencyException.java	(revision 1188252)
+++ src/java/org/apache/nutch/plugin/CircularDependencyException.java	(working copy)
@@ -16,7 +16,6 @@
  */
 package org.apache.nutch.plugin;
 
-
 /**
  * <code>CircularDependencyException</code> will be thrown if a circular
  * dependency is detected.
Index: src/java/org/apache/nutch/plugin/Pluggable.java
===================================================================
--- src/java/org/apache/nutch/plugin/Pluggable.java	(revision 1188252)
+++ src/java/org/apache/nutch/plugin/Pluggable.java	(working copy)
@@ -17,16 +17,15 @@
 package org.apache.nutch.plugin;
 
 /**
- * Defines the capability of a class to be plugged into Nutch.
- * This is a common interface that must be implemented by all
- * Nutch Extension Points.
- *
+ * Defines the capability of a class to be plugged into Nutch. This is a common
+ * interface that must be implemented by all Nutch Extension Points.
+ * 
  * @author J&eacute;r&ocirc;me Charron
- *
+ * 
  * @see <a href="http://wiki.apache.org/nutch/AboutPlugins">About Plugins</a>
- * @see <a href="package-summary.html#package_description">
- *      plugin package description</a>
+ * @see <a href="package-summary.html#package_description"> plugin package
+ *      description</a>
  */
 public interface Pluggable {
-  
+
 }
Index: src/java/org/apache/nutch/plugin/PluginManifestParser.java
===================================================================
--- src/java/org/apache/nutch/plugin/PluginManifestParser.java	(revision 1188252)
+++ src/java/org/apache/nutch/plugin/PluginManifestParser.java	(working copy)
@@ -39,8 +39,8 @@
 import org.xml.sax.SAXException;
 
 /**
- * The <code>PluginManifestParser</code> parser just parse the manifest file
- * in all plugin directories.
+ * The <code>PluginManifestParser</code> parser just parse the manifest file in
+ * all plugin directories.
  * 
  * @author joa23
  */
@@ -185,7 +185,7 @@
     PluginDescriptor pluginDescriptor = new PluginDescriptor(id, version, name,
         providerName, pluginClazz, pPath, this.conf);
     LOG.debug("plugin: id=" + id + " name=" + name + " version=" + version
-          + " provider=" + providerName + "class=" + pluginClazz);
+        + " provider=" + providerName + "class=" + pluginClazz);
     parseExtension(rootElement, pluginDescriptor);
     parseExtensionPoints(rootElement, pluginDescriptor);
     parseLibraries(rootElement, pluginDescriptor);
@@ -292,8 +292,8 @@
             if (parameters != null) {
               for (int k = 0; k < parameters.getLength(); k++) {
                 Element param = (Element) parameters.item(k);
-                extension.addAttribute(param.getAttribute(ATTR_NAME), param
-                    .getAttribute("value"));
+                extension.addAttribute(param.getAttribute(ATTR_NAME),
+                    param.getAttribute("value"));
               }
             }
             pPluginDescriptor.addExtension(extension);
Index: src/java/org/apache/nutch/plugin/ExtensionPoint.java
===================================================================
--- src/java/org/apache/nutch/plugin/ExtensionPoint.java	(revision 1188252)
+++ src/java/org/apache/nutch/plugin/ExtensionPoint.java	(working copy)
@@ -15,6 +15,7 @@
  * limitations under the License.
  */
 package org.apache.nutch.plugin;
+
 import java.util.ArrayList;
 
 /**
@@ -76,7 +77,8 @@
   /**
    * Sets the extensionPointId.
    * 
-   * @param pId extension point id
+   * @param pId
+   *          extension point id
    */
   private void setId(String pId) {
     ftId = pId;
Index: src/java/org/apache/nutch/plugin/MissingDependencyException.java
===================================================================
--- src/java/org/apache/nutch/plugin/MissingDependencyException.java	(revision 1188252)
+++ src/java/org/apache/nutch/plugin/MissingDependencyException.java	(working copy)
@@ -17,8 +17,8 @@
 package org.apache.nutch.plugin;
 
 /**
- * <code>MissingDependencyException</code> will be thrown if a plugin
- * dependency cannot be found.
+ * <code>MissingDependencyException</code> will be thrown if a plugin dependency
+ * cannot be found.
  * 
  * @author J&eacute;r&ocirc;me Charron
  */
Index: src/java/org/apache/nutch/plugin/Extension.java
===================================================================
--- src/java/org/apache/nutch/plugin/Extension.java	(revision 1188252)
+++ src/java/org/apache/nutch/plugin/Extension.java	(working copy)
@@ -98,8 +98,10 @@
    * Adds a attribute and is only used until model creation at plugin system
    * start up.
    * 
-   * @param pKey a key
-   * @param pValue a value
+   * @param pKey
+   *          a key
+   * @param pValue
+   *          a value
    */
   public void addAttribute(String pKey, String pValue) {
     fAttributes.put(pKey, pValue);
@@ -109,7 +111,8 @@
    * Sets the Class that implement the concret extension and is only used until
    * model creation at system start up.
    * 
-   * @param extensionClazz The extensionClasname to set
+   * @param extensionClazz
+   *          The extensionClasname to set
    */
   public void setClazz(String extensionClazz) {
     fClazz = extensionClazz;
@@ -119,7 +122,8 @@
    * Sets the unique extension Id and is only used until model creation at
    * system start up.
    * 
-   * @param extensionID The extensionID to set
+   * @param extensionID
+   *          The extensionID to set
    */
   public void setId(String extensionID) {
     fId = extensionID;
Index: src/java/org/apache/nutch/plugin/PluginDescriptor.java
===================================================================
--- src/java/org/apache/nutch/plugin/PluginDescriptor.java	(revision 1188252)
+++ src/java/org/apache/nutch/plugin/PluginDescriptor.java	(working copy)
@@ -30,12 +30,11 @@
 import org.apache.hadoop.conf.Configuration;
 
 /**
- * The <code>PluginDescriptor</code> provide access to all meta information of
- * a nutch-plugin, as well to the internationalizable resources and the plugin
- * own classloader. There are meta information about <code>Plugin</code>,
- * <code>ExtensionPoint</code> and <code>Extension</code>. To provide
- * access to the meta data of a plugin via a descriptor allow a lazy loading
- * mechanism.
+ * The <code>PluginDescriptor</code> provide access to all meta information of a
+ * nutch-plugin, as well to the internationalizable resources and the plugin own
+ * classloader. There are meta information about <code>Plugin</code>,
+ * <code>ExtensionPoint</code> and <code>Extension</code>. To provide access to
+ * the meta data of a plugin via a descriptor allow a lazy loading mechanism.
  * 
  * @author joa23
  */
@@ -53,7 +52,8 @@
   private ArrayList<URL> fNotExportedLibs = new ArrayList<URL>();
   private ArrayList<Extension> fExtensions = new ArrayList<Extension>();
   private PluginClassLoader fClassLoader;
-  public static final Logger LOG = LoggerFactory.getLogger(PluginDescriptor.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(PluginDescriptor.class);
   private Configuration fConf;
 
   /**
@@ -206,7 +206,8 @@
   /**
    * Adds a dependency
    * 
-   * @param pId id of the dependent plugin
+   * @param pId
+   *          id of the dependent plugin
    */
   public void addDependency(String pId) {
     fDependencies.add(pId);
@@ -285,8 +286,8 @@
       LOG.debug(getPluginId() + " " + e.toString());
     }
     URL[] urls = arrayList.toArray(new URL[arrayList.size()]);
-    fClassLoader = new PluginClassLoader(urls, PluginDescriptor.class
-        .getClassLoader());
+    fClassLoader = new PluginClassLoader(urls,
+        PluginDescriptor.class.getClassLoader());
     return fClassLoader;
   }
 
@@ -308,7 +309,7 @@
     for (String id : pDescriptor.getDependencies()) {
       PluginDescriptor descriptor = PluginRepository.get(fConf)
           .getPluginDescriptor(id);
-      for (URL url: descriptor.getExportedLibUrls()) {
+      for (URL url : descriptor.getExportedLibUrls()) {
         pLibs.add(url);
       }
       collectLibs(pLibs, descriptor);
Index: src/java/org/apache/nutch/plugin/Plugin.java
===================================================================
--- src/java/org/apache/nutch/plugin/Plugin.java	(revision 1188252)
+++ src/java/org/apache/nutch/plugin/Plugin.java	(working copy)
@@ -33,8 +33,8 @@
  * The <code>Plugin</code> will be startuped and shutdown by the nutch plugin
  * management system.
  * 
- * A possible usecase of the <code>Plugin</code> implementation is to create
- * or close a database connection.
+ * A possible usecase of the <code>Plugin</code> implementation is to create or
+ * close a database connection.
  * 
  * @author joa23
  */
@@ -81,7 +81,8 @@
   }
 
   /**
-   * @param descriptor The descriptor to set
+   * @param descriptor
+   *          The descriptor to set
    */
   private void setDescriptor(PluginDescriptor descriptor) {
     fDescriptor = descriptor;
Index: src/java/org/apache/nutch/plugin/PluginRepository.java
===================================================================
--- src/java/org/apache/nutch/plugin/PluginRepository.java	(revision 1188252)
+++ src/java/org/apache/nutch/plugin/PluginRepository.java	(working copy)
@@ -55,7 +55,8 @@
 
   private Configuration conf;
 
-  public static final Logger LOG = LoggerFactory.getLogger(PluginRepository.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(PluginRepository.class);
 
   /**
    * @throws PluginRuntimeException
@@ -80,7 +81,7 @@
     try {
       installExtensions(fRegisteredPlugins);
     } catch (PluginRuntimeException e) {
-        LOG.error(e.toString());
+      LOG.error(e.toString());
       throw new RuntimeException(e.getMessage());
     }
     displayStatus();
@@ -107,8 +108,8 @@
       return;
     }
 
-    for (PluginDescriptor plugin: plugins) {
-      for(ExtensionPoint point:plugin.getExtenstionPoints()) {
+    for (PluginDescriptor plugin : plugins) {
+      for (ExtensionPoint point : plugin.getExtenstionPoints()) {
         String xpId = point.getId();
         LOG.debug("Adding extension point " + xpId);
         fExtensionPoints.put(xpId, point);
@@ -123,7 +124,7 @@
       throws PluginRuntimeException {
 
     for (PluginDescriptor descriptor : pRegisteredPlugins) {
-      for(Extension extension:descriptor.getExtensions()) {
+      for (Extension extension : descriptor.getExtensions()) {
         String xpId = extension.getTargetPoint();
         ExtensionPoint point = getExtensionPoint(xpId);
         if (point == null) {
@@ -151,7 +152,7 @@
     branch.put(plugin.getPluginId(), plugin);
 
     // Otherwise, checks each dependency
-    for(String id:plugin.getDependencies()) {
+    for (String id : plugin.getDependencies()) {
       PluginDescriptor dependency = plugins.get(id);
       if (dependency == null) {
         throw new MissingDependencyException("Missing dependency " + id
Index: src/plugin/index-anchor/src/java/org/apache/nutch/indexer/anchor/AnchorIndexingFilter.java
===================================================================
--- src/plugin/index-anchor/src/java/org/apache/nutch/indexer/anchor/AnchorIndexingFilter.java	(revision 1188252)
+++ src/plugin/index-anchor/src/java/org/apache/nutch/indexer/anchor/AnchorIndexingFilter.java	(working copy)
@@ -30,12 +30,12 @@
 import org.apache.nutch.parse.Parse;
 
 /**
- * Indexing filter that indexes all inbound anchor text for a document. 
+ * Indexing filter that indexes all inbound anchor text for a document.
  */
-public class AnchorIndexingFilter
-  implements IndexingFilter {
+public class AnchorIndexingFilter implements IndexingFilter {
 
-  public static final Logger LOG = LoggerFactory.getLogger(AnchorIndexingFilter.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(AnchorIndexingFilter.class);
   private Configuration conf;
   private boolean deduplicate = false;
 
@@ -50,14 +50,13 @@
     return this.conf;
   }
 
-  public NutchDocument filter(NutchDocument doc, Parse parse, Text url, CrawlDatum datum,
-    Inlinks inlinks) throws IndexingException {
+  public NutchDocument filter(NutchDocument doc, Parse parse, Text url,
+      CrawlDatum datum, Inlinks inlinks) throws IndexingException {
 
-    String[] anchors = (inlinks != null ? inlinks.getAnchors()
-      : new String[0]);
+    String[] anchors = (inlinks != null ? inlinks.getAnchors() : new String[0]);
 
     // https://issues.apache.org/jira/browse/NUTCH-1037
-    WeakHashMap<String,Integer> map = new WeakHashMap<String,Integer>();
+    WeakHashMap<String, Integer> map = new WeakHashMap<String, Integer>();
 
     for (int i = 0; i < anchors.length; i++) {
       if (deduplicate) {
@@ -77,5 +76,4 @@
 
     return doc;
   }
-
 }
Index: src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagIndexingFilter.java
===================================================================
--- src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagIndexingFilter.java	(revision 1188252)
+++ src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagIndexingFilter.java	(working copy)
@@ -16,7 +16,6 @@
  */
 package org.apache.nutch.microformats.reltag;
 
-
 // Nutch imports
 import org.apache.nutch.crawl.CrawlDatum;
 import org.apache.nutch.crawl.Inlinks;
@@ -29,29 +28,27 @@
 // Hadoop imports
 import org.apache.hadoop.conf.Configuration;
 
-
 /**
- * An {@link org.apache.nutch.indexer.IndexingFilter} that 
- * add <code>tag</code> field(s) to the document.
- *
+ * An {@link org.apache.nutch.indexer.IndexingFilter} that add <code>tag</code>
+ * field(s) to the document.
+ * 
  * @see <a href="http://www.microformats.org/wiki/rel-tag">
  *      http://www.microformats.org/wiki/rel-tag</a>
  * @author J&eacute;r&ocirc;me Charron
  */
 public class RelTagIndexingFilter implements IndexingFilter {
-  
 
   private Configuration conf;
 
-
   // Inherited JavaDoc
-  public NutchDocument filter(NutchDocument doc, Parse parse, Text url, CrawlDatum datum, Inlinks inlinks)
-    throws IndexingException {
+  public NutchDocument filter(NutchDocument doc, Parse parse, Text url,
+      CrawlDatum datum, Inlinks inlinks) throws IndexingException {
 
     // Check if some Rel-Tags found, possibly put there by RelTagParser
-    String[] tags = parse.getData().getParseMeta().getValues(RelTagParser.REL_TAG);
+    String[] tags = parse.getData().getParseMeta()
+        .getValues(RelTagParser.REL_TAG);
     if (tags != null) {
-      for (int i=0; i<tags.length; i++) {
+      for (int i = 0; i < tags.length; i++) {
         doc.add("tag", tags[i]);
       }
     }
@@ -59,10 +56,11 @@
     return doc;
   }
 
-  /* ----------------------------- *
-   * <implementation:Configurable> *
-   * ----------------------------- */
-  
+  /*
+   * ----------------------------- * <implementation:Configurable> *
+   * -----------------------------
+   */
+
   public void setConf(Configuration conf) {
     this.conf = conf;
   }
@@ -70,9 +68,10 @@
   public Configuration getConf() {
     return this.conf;
   }
-  
-  /* ------------------------------ *
-   * </implementation:Configurable> *
-   * ------------------------------ */
-  
+
+  /*
+   * ------------------------------ * </implementation:Configurable> *
+   * ------------------------------
+   */
+
 }
Index: src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagParser.java
===================================================================
--- src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagParser.java	(revision 1188252)
+++ src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagParser.java	(working copy)
@@ -44,30 +44,27 @@
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.Text;
 
-
 /**
  * Adds microformat rel-tags of document if found.
- *
+ * 
  * @see <a href="http://www.microformats.org/wiki/rel-tag">
  *      http://www.microformats.org/wiki/rel-tag</a>
  * @author J&eacute;r&ocirc;me Charron
  */
 public class RelTagParser implements HtmlParseFilter {
-  
+
   public final static Logger LOG = LoggerFactory.getLogger(RelTagParser.class);
 
   public final static String REL_TAG = "Rel-Tag";
-  
-  
+
   private Configuration conf = null;
-  
-  
+
   /**
    * Scan the HTML document looking at possible rel-tags
    */
   public ParseResult filter(Content content, ParseResult parseResult,
-    HTMLMetaTags metaTags, DocumentFragment doc) {
-    
+      HTMLMetaTags metaTags, DocumentFragment doc) {
+
     // get parse obj
     Parse parse = parseResult.get(content.getUrl());
     // Trying to find the document's rel-tags
@@ -84,16 +81,16 @@
   private static class Parser {
 
     Set tags = null;
-    
+
     Parser(Node node) {
       tags = new TreeSet();
       parse(node);
     }
-  
+
     Set getRelTags() {
       return tags;
     }
-    
+
     void parse(Node node) {
 
       if (node.getNodeType() == Node.ELEMENT_NODE) {
@@ -117,34 +114,35 @@
           }
         }
       }
-      
+
       // Recurse
       NodeList children = node.getChildNodes();
-      for (int i=0; children != null && i<children.getLength(); i++) {
+      for (int i = 0; children != null && i < children.getLength(); i++) {
         parse(children.item(i));
       }
     }
-    
+
     private final static String parseTag(String url) {
       String tag = null;
       try {
         URL u = new URL(url);
         String path = u.getPath();
-        tag = URLDecoder.decode(path.substring(path.lastIndexOf('/') + 1), "UTF-8");
+        tag = URLDecoder.decode(path.substring(path.lastIndexOf('/') + 1),
+            "UTF-8");
       } catch (Exception e) {
         // Malformed tag...
         tag = null;
       }
       return tag;
     }
-    
+
   }
 
+  /*
+   * ----------------------------- * <implementation:Configurable> *
+   * -----------------------------
+   */
 
-  /* ----------------------------- *
-   * <implementation:Configurable> *
-   * ----------------------------- */
-  
   public void setConf(Configuration conf) {
     this.conf = conf;
   }
@@ -152,9 +150,10 @@
   public Configuration getConf() {
     return this.conf;
   }
-  
-  /* ------------------------------ *
-   * </implementation:Configurable> *
-   * ------------------------------ */
-  
+
+  /*
+   * ------------------------------ * </implementation:Configurable> *
+   * ------------------------------
+   */
+
 }
Index: src/plugin/tld/src/java/org/apache/nutch/scoring/tld/TLDScoringFilter.java
===================================================================
--- src/plugin/tld/src/java/org/apache/nutch/scoring/tld/TLDScoringFilter.java	(revision 1188252)
+++ src/plugin/tld/src/java/org/apache/nutch/scoring/tld/TLDScoringFilter.java	(working copy)
@@ -35,9 +35,9 @@
 import org.apache.nutch.util.domain.DomainSuffix;
 import org.apache.nutch.util.domain.DomainSuffixes;
 
-
 /**
  * Scoring filter to boost tlds.
+ * 
  * @author Enis Soztutar &lt;enis.soz.nutch@gmail.com&gt;
  */
 public class TLDScoringFilter implements ScoringFilter {
@@ -56,10 +56,10 @@
     NutchField tlds = doc.getField("tld");
     float boost = 1.0f;
 
-    if(tlds != null) {
-      for(Object tld : tlds.getValues()) {
+    if (tlds != null) {
+      for (Object tld : tlds.getValues()) {
         DomainSuffix entry = tldEntries.get(tld.toString());
-        if(entry != null)
+        if (entry != null)
           boost *= entry.getBoost();
       }
     }
@@ -93,9 +93,8 @@
       throws ScoringFilterException {
   }
 
-  public void updateDbScore(Text url, CrawlDatum old,
-                            CrawlDatum datum, List<CrawlDatum> inlinked)
-  throws ScoringFilterException {
+  public void updateDbScore(Text url, CrawlDatum old, CrawlDatum datum,
+      List<CrawlDatum> inlinked) throws ScoringFilterException {
   }
 
   public Configuration getConf() {
@@ -105,9 +104,10 @@
   public void setConf(Configuration conf) {
     this.conf = conf;
   }
-  public CrawlDatum distributeScoreToOutlinks(Text fromUrl, ParseData parseData, 
-          Collection<Entry<Text, CrawlDatum>> targets, CrawlDatum adjust,
-          int allCount) throws ScoringFilterException {
+
+  public CrawlDatum distributeScoreToOutlinks(Text fromUrl,
+      ParseData parseData, Collection<Entry<Text, CrawlDatum>> targets,
+      CrawlDatum adjust, int allCount) throws ScoringFilterException {
     return adjust;
   }
 
Index: src/plugin/tld/src/java/org/apache/nutch/indexer/tld/TLDIndexingFilter.java
===================================================================
--- src/plugin/tld/src/java/org/apache/nutch/indexer/tld/TLDIndexingFilter.java	(revision 1188252)
+++ src/plugin/tld/src/java/org/apache/nutch/indexer/tld/TLDIndexingFilter.java	(working copy)
@@ -34,23 +34,25 @@
 
 /**
  * Adds the Top level domain extensions to the index
+ * 
  * @author Enis Soztutar &lt;enis.soz.nutch@gmail.com&gt;
  */
 public class TLDIndexingFilter implements IndexingFilter {
-  public static final Logger LOG = LoggerFactory.getLogger(TLDIndexingFilter.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(TLDIndexingFilter.class);
 
   private Configuration conf;
 
-  public NutchDocument filter(NutchDocument doc, Parse parse, Text urlText, CrawlDatum datum, Inlinks inlinks)
-  throws IndexingException {
+  public NutchDocument filter(NutchDocument doc, Parse parse, Text urlText,
+      CrawlDatum datum, Inlinks inlinks) throws IndexingException {
 
     try {
       URL url = new URL(urlText.toString());
       DomainSuffix d = URLUtil.getDomainSuffix(url);
-      
+
       doc.add("tld", d.getDomain());
-      
-    }catch (Exception ex) {
+
+    } catch (Exception ex) {
       LOG.warn(ex.toString());
     }
 
Index: src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java
===================================================================
--- src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java	(revision 1188252)
+++ src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java	(working copy)
@@ -1,19 +1,19 @@
 /*
-* Licensed to the Apache Software Foundation (ASF) under one or more
-* contributor license agreements.  See the NOTICE file distributed with
-* this work for additional information regarding copyright ownership.
-* The ASF licenses this file to You under the Apache License, Version 2.0
-* (the "License"); you may not use this file except in compliance with
-* the License.  You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 package org.apache.nutch.parse.js;
 
 import java.io.BufferedReader;
@@ -57,11 +57,10 @@
 import org.w3c.dom.NodeList;
 
 /**
- * This class is a heuristic link extractor for JavaScript files and
- * code snippets. The general idea of a two-pass regex matching comes from
- * Heritrix. Parts of the code come from OutlinkExtractor.java
- * by Stephan Strittmatter.
- *
+ * This class is a heuristic link extractor for JavaScript files and code
+ * snippets. The general idea of a two-pass regex matching comes from Heritrix.
+ * Parts of the code come from OutlinkExtractor.java by Stephan Strittmatter.
+ * 
  * @author Andrzej Bialecki &lt;ab@getopt.org&gt;
  */
 public class JSParseFilter implements HtmlParseFilter, Parser {
@@ -70,9 +69,9 @@
   private static final int MAX_TITLE_LEN = 80;
 
   private Configuration conf;
-  
+
   public ParseResult filter(Content content, ParseResult parseResult,
-    HTMLMetaTags metaTags, DocumentFragment doc) {
+      HTMLMetaTags metaTags, DocumentFragment doc) {
 
     Parse parse = parseResult.get(content.getUrl());
 
@@ -86,37 +85,43 @@
       outlinks.addAll(list);
       ParseStatus status = parse.getData().getStatus();
       String text = parse.getText();
-      Outlink[] newlinks = (Outlink[])outlinks.toArray(new Outlink[outlinks.size()]);
-      ParseData parseData = new ParseData(status, title, newlinks,
-                                          parse.getData().getContentMeta(),
-                                          parse.getData().getParseMeta());
+      Outlink[] newlinks = (Outlink[]) outlinks.toArray(new Outlink[outlinks
+          .size()]);
+      ParseData parseData = new ParseData(status, title, newlinks, parse
+          .getData().getContentMeta(), parse.getData().getParseMeta());
 
       // replace original parse obj with new one
       parseResult.put(content.getUrl(), new ParseText(text), parseData);
     }
     return parseResult;
   }
-  
-  private void walk(Node n, Parse parse, HTMLMetaTags metaTags, String base, List outlinks) {
+
+  private void walk(Node n, Parse parse, HTMLMetaTags metaTags, String base,
+      List outlinks) {
     if (n instanceof Element) {
       String name = n.getNodeName();
       if (name.equalsIgnoreCase("script")) {
         String lang = null;
         Node lNode = n.getAttributes().getNamedItem("language");
-        if (lNode == null) lang = "javascript";
-        else lang = lNode.getNodeValue();
+        if (lNode == null)
+          lang = "javascript";
+        else
+          lang = lNode.getNodeValue();
         StringBuffer script = new StringBuffer();
         NodeList nn = n.getChildNodes();
         if (nn.getLength() > 0) {
           for (int i = 0; i < nn.getLength(); i++) {
-            if (i > 0) script.append('\n');
+            if (i > 0)
+              script.append('\n');
             script.append(nn.item(i).getNodeValue());
           }
           // if (LOG.isInfoEnabled()) {
-          //   LOG.info("script: language=" + lang + ", text: " + script.toString());
+          // LOG.info("script: language=" + lang + ", text: " +
+          // script.toString());
           // }
           Outlink[] links = getJSLinks(script.toString(), "", base);
-          if (links != null && links.length > 0) outlinks.addAll(Arrays.asList(links));
+          if (links != null && links.length > 0)
+            outlinks.addAll(Arrays.asList(links));
           // no other children of interest here, go one level up.
           return;
         }
@@ -128,7 +133,8 @@
           // Window: onload,onunload
           // Form: onchange,onsubmit,onreset,onselect,onblur,onfocus
           // Keyboard: onkeydown,onkeypress,onkeyup
-          // Mouse: onclick,ondbclick,onmousedown,onmouseout,onmousover,onmouseup
+          // Mouse:
+          // onclick,ondbclick,onmousedown,onmouseout,onmousover,onmouseup
           Node anode = attrs.item(i);
           Outlink[] links = null;
           if (anode.getNodeName().startsWith("on")) {
@@ -139,7 +145,8 @@
               links = getJSLinks(val, "", base);
             }
           }
-          if (links != null && links.length > 0) outlinks.addAll(Arrays.asList(links));
+          if (links != null && links.length > 0)
+            outlinks.addAll(Arrays.asList(links));
         }
       }
     }
@@ -148,48 +155,56 @@
       walk(nl.item(i), parse, metaTags, base, outlinks);
     }
   }
-  
+
   public ParseResult getParse(Content c) {
     String type = c.getContentType();
-    if (type != null && !type.trim().equals("") && !type.toLowerCase().startsWith("application/x-javascript"))
+    if (type != null && !type.trim().equals("")
+        && !type.toLowerCase().startsWith("application/x-javascript"))
       return new ParseStatus(ParseStatus.FAILED_INVALID_FORMAT,
-              "Content not JavaScript: '" + type + "'").getEmptyParseResult(c.getUrl(), getConf());
+          "Content not JavaScript: '" + type + "'").getEmptyParseResult(
+          c.getUrl(), getConf());
     String script = new String(c.getContent());
     Outlink[] outlinks = getJSLinks(script, "", c.getUrl());
-    if (outlinks == null) outlinks = new Outlink[0];
+    if (outlinks == null)
+      outlinks = new Outlink[0];
     // Title? use the first line of the script...
     String title;
     int idx = script.indexOf('\n');
     if (idx != -1) {
-      if (idx > MAX_TITLE_LEN) idx = MAX_TITLE_LEN;
+      if (idx > MAX_TITLE_LEN)
+        idx = MAX_TITLE_LEN;
       title = script.substring(0, idx);
     } else {
       idx = Math.min(MAX_TITLE_LEN, script.length());
       title = script.substring(0, idx);
     }
     ParseData pd = new ParseData(ParseStatus.STATUS_SUCCESS, title, outlinks,
-                                 c.getMetadata());
+        c.getMetadata());
     return ParseResult.createParseResult(c.getUrl(), new ParseImpl(script, pd));
   }
-  
+
   private static final String STRING_PATTERN = "(\\\\*(?:\"|\'))([^\\s\"\']+?)(?:\\1)";
   // A simple pattern. This allows also invalid URL characters.
   private static final String URI_PATTERN = "(^|\\s*?)/?\\S+?[/\\.]\\S+($|\\s*)";
+
   // Alternative pattern, which limits valid url characters.
-  //private static final String URI_PATTERN = "(^|\\s*?)[A-Za-z0-9/](([A-Za-z0-9$_.+!*,;/?:@&~=-])|%[A-Fa-f0-9]{2})+[/.](([A-Za-z0-9$_.+!*,;/?:@&~=-])|%[A-Fa-f0-9]{2})+(#([a-zA-Z0-9][a-zA-Z0-9$_.+!*,;/?:@&~=%-]*))?($|\\s*)";
-  
+  // private static final String URI_PATTERN =
+  // "(^|\\s*?)[A-Za-z0-9/](([A-Za-z0-9$_.+!*,;/?:@&~=-])|%[A-Fa-f0-9]{2})+[/.](([A-Za-z0-9$_.+!*,;/?:@&~=-])|%[A-Fa-f0-9]{2})+(#([a-zA-Z0-9][a-zA-Z0-9$_.+!*,;/?:@&~=%-]*))?($|\\s*)";
+
   /**
-   *  This method extracts URLs from literals embedded in JavaScript.
+   * This method extracts URLs from literals embedded in JavaScript.
    */
   private Outlink[] getJSLinks(String plainText, String anchor, String base) {
 
     final List outlinks = new ArrayList();
     URL baseURL = null;
-    
+
     try {
       baseURL = new URL(base);
     } catch (Exception e) {
-      if (LOG.isErrorEnabled()) { LOG.error("getJSLinks", e); }
+      if (LOG.isErrorEnabled()) {
+        LOG.error("getJSLinks", e);
+      }
     }
 
     try {
@@ -198,8 +213,8 @@
           Perl5Compiler.CASE_INSENSITIVE_MASK | Perl5Compiler.READ_ONLY_MASK
               | Perl5Compiler.MULTILINE_MASK);
       final Pattern pattern1 = cp.compile(URI_PATTERN,
-              Perl5Compiler.CASE_INSENSITIVE_MASK | Perl5Compiler.READ_ONLY_MASK
-                  | Perl5Compiler.MULTILINE_MASK);
+          Perl5Compiler.CASE_INSENSITIVE_MASK | Perl5Compiler.READ_ONLY_MASK
+              | Perl5Compiler.MULTILINE_MASK);
       final PatternMatcher matcher = new Perl5Matcher();
 
       final PatternMatcher matcher1 = new Perl5Matcher();
@@ -208,26 +223,28 @@
       MatchResult result;
       String url;
 
-      //loop the matches
+      // loop the matches
       while (matcher.contains(input, pattern)) {
         result = matcher.getMatch();
         url = result.group(2);
         PatternMatcherInput input1 = new PatternMatcherInput(url);
         if (!matcher1.matches(input1, pattern1)) {
-          //if (LOG.isTraceEnabled()) { LOG.trace(" - invalid '" + url + "'"); }
+          // if (LOG.isTraceEnabled()) { LOG.trace(" - invalid '" +
+          // url + "'"); }
           continue;
         }
         if (url.startsWith("www.")) {
-            url = "http://" + url;
+          url = "http://" + url;
         } else {
-          // See if candidate URL is parseable.  If not, pass and move on to
+          // See if candidate URL is parseable. If not, pass and move
+          // on to
           // the next match.
           try {
             url = new URL(baseURL, url).toString();
           } catch (MalformedURLException ex) {
             if (LOG.isTraceEnabled()) {
-              LOG.trace(" - failed URL parse '" + url + "' and baseURL '" +
-                  baseURL + "'", ex);
+              LOG.trace(" - failed URL parse '" + url + "' and baseURL '"
+                  + baseURL + "'", ex);
             }
             continue;
           }
@@ -241,12 +258,14 @@
     } catch (Exception ex) {
       // if it is a malformed URL we just throw it away and continue with
       // extraction.
-      if (LOG.isErrorEnabled()) { LOG.error("getJSLinks", ex); }
+      if (LOG.isErrorEnabled()) {
+        LOG.error("getJSLinks", ex);
+      }
     }
 
     final Outlink[] retval;
 
-    //create array of the Outlinks
+    // create array of the Outlinks
     if (outlinks != null && outlinks.size() > 0) {
       retval = (Outlink[]) outlinks.toArray(new Outlink[0]);
     } else {
@@ -255,7 +274,7 @@
 
     return retval;
   }
-  
+
   public static void main(String[] args) throws Exception {
     if (args.length < 2) {
       System.err.println(JSParseFilter.class.getName() + " file.js baseURL");
@@ -265,7 +284,8 @@
     BufferedReader br = new BufferedReader(new InputStreamReader(in, "UTF-8"));
     StringBuffer sb = new StringBuffer();
     String line = null;
-    while ((line = br.readLine()) != null) sb.append(line + "\n");
+    while ((line = br.readLine()) != null)
+      sb.append(line + "\n");
     JSParseFilter parseFilter = new JSParseFilter();
     parseFilter.setConf(NutchConfiguration.create());
     Outlink[] links = parseFilter.getJSLinks(sb.toString(), "", args[1]);
Index: src/plugin/urlnormalizer-basic/src/test/org/apache/nutch/net/urlnormalizer/basic/TestBasicURLNormalizer.java
===================================================================
--- src/plugin/urlnormalizer-basic/src/test/org/apache/nutch/net/urlnormalizer/basic/TestBasicURLNormalizer.java	(revision 1188252)
+++ src/plugin/urlnormalizer-basic/src/test/org/apache/nutch/net/urlnormalizer/basic/TestBasicURLNormalizer.java	(working copy)
@@ -27,7 +27,7 @@
 public class TestBasicURLNormalizer extends TestCase {
   private BasicURLNormalizer normalizer;
   private Configuration conf;
-  
+
   public TestBasicURLNormalizer(String name) {
     super(name);
     normalizer = new BasicURLNormalizer();
@@ -56,64 +56,52 @@
     // check that references are removed
     normalizeTest("http://foo.com/foo.html#ref", "http://foo.com/foo.html");
 
-    //     // check that encoding is normalized
-    //     normalizeTest("http://foo.com/%66oo.html", "http://foo.com/foo.html");
+    // // check that encoding is normalized
+    // normalizeTest("http://foo.com/%66oo.html",
+    // "http://foo.com/foo.html");
 
     // check that unnecessary "../" are removed
 
-    normalizeTest("http://foo.com/aa/./foo.html",
-                  "http://foo.com/aa/foo.html" );
-    normalizeTest("http://foo.com/aa/../",
-                  "http://foo.com/" );
-    normalizeTest("http://foo.com/aa/bb/../",
-                  "http://foo.com/aa/");
-    normalizeTest("http://foo.com/aa/..",
-                  "http://foo.com/aa/..");
+    normalizeTest("http://foo.com/aa/./foo.html", "http://foo.com/aa/foo.html");
+    normalizeTest("http://foo.com/aa/../", "http://foo.com/");
+    normalizeTest("http://foo.com/aa/bb/../", "http://foo.com/aa/");
+    normalizeTest("http://foo.com/aa/..", "http://foo.com/aa/..");
     normalizeTest("http://foo.com/aa/bb/cc/../../foo.html",
-                  "http://foo.com/aa/foo.html");
+        "http://foo.com/aa/foo.html");
     normalizeTest("http://foo.com/aa/bb/../cc/dd/../ee/foo.html",
-                  "http://foo.com/aa/cc/ee/foo.html");
-    normalizeTest("http://foo.com/../foo.html",
-                  "http://foo.com/foo.html" );
-    normalizeTest("http://foo.com/../../foo.html",
-                  "http://foo.com/foo.html" );
-    normalizeTest("http://foo.com/../aa/../foo.html",
-                  "http://foo.com/foo.html" );
-    normalizeTest("http://foo.com/aa/../../foo.html",
-                  "http://foo.com/foo.html" );
+        "http://foo.com/aa/cc/ee/foo.html");
+    normalizeTest("http://foo.com/../foo.html", "http://foo.com/foo.html");
+    normalizeTest("http://foo.com/../../foo.html", "http://foo.com/foo.html");
+    normalizeTest("http://foo.com/../aa/../foo.html", "http://foo.com/foo.html");
+    normalizeTest("http://foo.com/aa/../../foo.html", "http://foo.com/foo.html");
     normalizeTest("http://foo.com/aa/../bb/../foo.html/../../",
-                  "http://foo.com/" );
-    normalizeTest("http://foo.com/../aa/foo.html",
-                  "http://foo.com/aa/foo.html" );
-    normalizeTest("http://foo.com/../aa/../foo.html",
-                  "http://foo.com/foo.html" );
+        "http://foo.com/");
+    normalizeTest("http://foo.com/../aa/foo.html", "http://foo.com/aa/foo.html");
+    normalizeTest("http://foo.com/../aa/../foo.html", "http://foo.com/foo.html");
     normalizeTest("http://foo.com/a..a/foo.html",
-                  "http://foo.com/a..a/foo.html" );
-    normalizeTest("http://foo.com/a..a/../foo.html",
-                  "http://foo.com/foo.html" );
+        "http://foo.com/a..a/foo.html");
+    normalizeTest("http://foo.com/a..a/../foo.html", "http://foo.com/foo.html");
     normalizeTest("http://foo.com/foo.foo/../foo.html",
-                  "http://foo.com/foo.html" );
+        "http://foo.com/foo.html");
     normalizeTest("http://foo.com//aa/bb/foo.html",
-                  "http://foo.com/aa/bb/foo.html" );
+        "http://foo.com/aa/bb/foo.html");
     normalizeTest("http://foo.com/aa//bb/foo.html",
-                  "http://foo.com/aa/bb/foo.html" );
+        "http://foo.com/aa/bb/foo.html");
     normalizeTest("http://foo.com/aa/bb//foo.html",
-                  "http://foo.com/aa/bb/foo.html" );
+        "http://foo.com/aa/bb/foo.html");
     normalizeTest("http://foo.com//aa//bb//foo.html",
-                  "http://foo.com/aa/bb/foo.html" );
+        "http://foo.com/aa/bb/foo.html");
     normalizeTest("http://foo.com////aa////bb////foo.html",
-                  "http://foo.com/aa/bb/foo.html" );
+        "http://foo.com/aa/bb/foo.html");
   }
 
   private void normalizeTest(String weird, String normal) throws Exception {
-    assertEquals(normal, normalizer.normalize(weird, URLNormalizers.SCOPE_DEFAULT));
+    assertEquals(normal,
+        normalizer.normalize(weird, URLNormalizers.SCOPE_DEFAULT));
   }
 
   public static void main(String[] args) throws Exception {
     new TestBasicURLNormalizer("test").testNormalizer();
   }
 
-
-
-
 }
\ No newline at end of file
Index: src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/BasicURLNormalizer.java
===================================================================
--- src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/BasicURLNormalizer.java	(revision 1188252)
+++ src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/BasicURLNormalizer.java	(working copy)
@@ -33,181 +33,174 @@
 
 /** Converts URLs to a normal form . */
 public class BasicURLNormalizer implements URLNormalizer {
-    public static final Logger LOG = LoggerFactory.getLogger(BasicURLNormalizer.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(BasicURLNormalizer.class);
 
-    private Perl5Compiler compiler = new Perl5Compiler();
-    private ThreadLocal matchers = new ThreadLocal() {
-        protected synchronized Object initialValue() {
-          return new Perl5Matcher();
-        }
-      };
-    private Rule relativePathRule = null;
-    private Rule leadingRelativePathRule = null;
-    private Rule currentPathRule = null;
-    private Rule adjacentSlashRule = null;
+  private Perl5Compiler compiler = new Perl5Compiler();
+  private ThreadLocal matchers = new ThreadLocal() {
+    protected synchronized Object initialValue() {
+      return new Perl5Matcher();
+    }
+  };
+  private Rule relativePathRule = null;
+  private Rule leadingRelativePathRule = null;
+  private Rule currentPathRule = null;
+  private Rule adjacentSlashRule = null;
 
-    private Configuration conf;
+  private Configuration conf;
 
-    public BasicURLNormalizer() {
-      try {
-        // this pattern tries to find spots like "/xx/../" in the url, which
-        // could be replaced by "/" xx consists of chars, different then "/"
-        // (slash) and needs to have at least one char different from "."
-        relativePathRule = new Rule();
-        relativePathRule.pattern = (Perl5Pattern)
-          compiler.compile("(/[^/]*[^/.]{1}[^/]*/\\.\\./)",
-                           Perl5Compiler.READ_ONLY_MASK);
-        relativePathRule.substitution = new Perl5Substitution("/");
+  public BasicURLNormalizer() {
+    try {
+      // this pattern tries to find spots like "/xx/../" in the url, which
+      // could be replaced by "/" xx consists of chars, different then "/"
+      // (slash) and needs to have at least one char different from "."
+      relativePathRule = new Rule();
+      relativePathRule.pattern = (Perl5Pattern) compiler.compile(
+          "(/[^/]*[^/.]{1}[^/]*/\\.\\./)", Perl5Compiler.READ_ONLY_MASK);
+      relativePathRule.substitution = new Perl5Substitution("/");
 
-        // this pattern tries to find spots like leading "/../" in the url,
-        // which could be replaced by "/"
-        leadingRelativePathRule = new Rule();
-        leadingRelativePathRule.pattern = (Perl5Pattern)
-          compiler.compile("^(/\\.\\./)+", Perl5Compiler.READ_ONLY_MASK);
-        leadingRelativePathRule.substitution = new Perl5Substitution("/");
+      // this pattern tries to find spots like leading "/../" in the url,
+      // which could be replaced by "/"
+      leadingRelativePathRule = new Rule();
+      leadingRelativePathRule.pattern = (Perl5Pattern) compiler.compile(
+          "^(/\\.\\./)+", Perl5Compiler.READ_ONLY_MASK);
+      leadingRelativePathRule.substitution = new Perl5Substitution("/");
 
-        // this pattern tries to find spots like "/./" in the url,
-        // which could be replaced by "/"
-        currentPathRule = new Rule();
-        currentPathRule.pattern = (Perl5Pattern)
-          compiler.compile("(/\\./)", Perl5Compiler.READ_ONLY_MASK);
-        currentPathRule.substitution = new Perl5Substitution("/");
+      // this pattern tries to find spots like "/./" in the url,
+      // which could be replaced by "/"
+      currentPathRule = new Rule();
+      currentPathRule.pattern = (Perl5Pattern) compiler.compile("(/\\./)",
+          Perl5Compiler.READ_ONLY_MASK);
+      currentPathRule.substitution = new Perl5Substitution("/");
 
-        // this pattern tries to find spots like "xx//yy" in the url,
-        // which could be replaced by a "/"
-        adjacentSlashRule = new Rule();
-        adjacentSlashRule.pattern = (Perl5Pattern)      
-          compiler.compile("/{2,}", Perl5Compiler.READ_ONLY_MASK);     
-        adjacentSlashRule.substitution = new Perl5Substitution("/");
-        
-      } catch (MalformedPatternException e) {
-        e.printStackTrace(LogUtil.getWarnStream(LOG));
-        throw new RuntimeException(e);
-      }
+      // this pattern tries to find spots like "xx//yy" in the url,
+      // which could be replaced by a "/"
+      adjacentSlashRule = new Rule();
+      adjacentSlashRule.pattern = (Perl5Pattern) compiler.compile("/{2,}",
+          Perl5Compiler.READ_ONLY_MASK);
+      adjacentSlashRule.substitution = new Perl5Substitution("/");
+
+    } catch (MalformedPatternException e) {
+      e.printStackTrace(LogUtil.getWarnStream(LOG));
+      throw new RuntimeException(e);
     }
+  }
 
-    public String normalize(String urlString, String scope)
-            throws MalformedURLException {
-        if ("".equals(urlString))                     // permit empty
-            return urlString;
+  public String normalize(String urlString, String scope)
+      throws MalformedURLException {
+    if ("".equals(urlString)) // permit empty
+      return urlString;
 
-        urlString = urlString.trim();                 // remove extra spaces
+    urlString = urlString.trim(); // remove extra spaces
 
-        URL url = new URL(urlString);
+    URL url = new URL(urlString);
 
-        String protocol = url.getProtocol();
-        String host = url.getHost();
-        int port = url.getPort();
-        String file = url.getFile();
+    String protocol = url.getProtocol();
+    String host = url.getHost();
+    int port = url.getPort();
+    String file = url.getFile();
 
-        boolean changed = false;
+    boolean changed = false;
 
-        if (!urlString.startsWith(protocol))        // protocol was lowercased
-            changed = true;
+    if (!urlString.startsWith(protocol)) // protocol was lowercased
+      changed = true;
 
-        if ("http".equals(protocol) || "ftp".equals(protocol)) {
+    if ("http".equals(protocol) || "ftp".equals(protocol)) {
 
-            if (host != null) {
-                String newHost = host.toLowerCase();    // lowercase host
-                if (!host.equals(newHost)) {
-                    host = newHost;
-                    changed = true;
-                }
-            }
+      if (host != null) {
+        String newHost = host.toLowerCase(); // lowercase host
+        if (!host.equals(newHost)) {
+          host = newHost;
+          changed = true;
+        }
+      }
 
-            if (port == url.getDefaultPort()) {       // uses default port
-                port = -1;                              // so don't specify it
-                changed = true;
-            }
+      if (port == url.getDefaultPort()) { // uses default port
+        port = -1; // so don't specify it
+        changed = true;
+      }
 
-            if (file == null || "".equals(file)) {    // add a slash
-                file = "/";
-                changed = true;
-            }
+      if (file == null || "".equals(file)) { // add a slash
+        file = "/";
+        changed = true;
+      }
 
-            if (url.getRef() != null) {                 // remove the ref
-                changed = true;
-            }
+      if (url.getRef() != null) { // remove the ref
+        changed = true;
+      }
 
-            // check for unnecessary use of "/../"
-            String file2 = substituteUnnecessaryRelativePaths(file);
+      // check for unnecessary use of "/../"
+      String file2 = substituteUnnecessaryRelativePaths(file);
 
-            if (!file.equals(file2)) {
-                changed = true;
-                file = file2;
-            }
+      if (!file.equals(file2)) {
+        changed = true;
+        file = file2;
+      }
 
-        }
-
-        if (changed)
-            urlString = new URL(protocol, host, port, file).toString();
-
-        return urlString;
     }
 
-    private String substituteUnnecessaryRelativePaths(String file) {
-        String fileWorkCopy = file;
-        int oldLen = file.length();
-        int newLen = oldLen - 1;
+    if (changed)
+      urlString = new URL(protocol, host, port, file).toString();
 
-        // All substitutions will be done step by step, to ensure that certain
-        // constellations will be normalized, too
-        //
-        // For example: "/aa/bb/../../cc/../foo.html will be normalized in the
-        // following manner:
-        //   "/aa/bb/../../cc/../foo.html"
-        //   "/aa/../cc/../foo.html"
-        //   "/cc/../foo.html"
-        //   "/foo.html"
-        //
-        // The normalization also takes care of leading "/../", which will be
-        // replaced by "/", because this is a rather a sign of bad webserver
-        // configuration than of a wanted link.  For example, urls like
-        // "http://www.foo.com/../" should return a http 404 error instead of
-        // redirecting to "http://www.foo.com".
-        //
-        Perl5Matcher matcher = (Perl5Matcher)matchers.get();
+    return urlString;
+  }
 
-        while (oldLen != newLen) {
-            // substitue first occurence of "/xx/../" by "/"
-            oldLen = fileWorkCopy.length();
-            fileWorkCopy = Util.substitute
-              (matcher, relativePathRule.pattern,
-               relativePathRule.substitution, fileWorkCopy, 1);
+  private String substituteUnnecessaryRelativePaths(String file) {
+    String fileWorkCopy = file;
+    int oldLen = file.length();
+    int newLen = oldLen - 1;
 
-            // remove leading "/../"
-            fileWorkCopy = Util.substitute
-              (matcher, leadingRelativePathRule.pattern,
-               leadingRelativePathRule.substitution, fileWorkCopy, 1);
+    // All substitutions will be done step by step, to ensure that certain
+    // constellations will be normalized, too
+    //
+    // For example: "/aa/bb/../../cc/../foo.html will be normalized in the
+    // following manner:
+    // "/aa/bb/../../cc/../foo.html"
+    // "/aa/../cc/../foo.html"
+    // "/cc/../foo.html"
+    // "/foo.html"
+    //
+    // The normalization also takes care of leading "/../", which will be
+    // replaced by "/", because this is a rather a sign of bad webserver
+    // configuration than of a wanted link. For example, urls like
+    // "http://www.foo.com/../" should return a http 404 error instead of
+    // redirecting to "http://www.foo.com".
+    //
+    Perl5Matcher matcher = (Perl5Matcher) matchers.get();
 
-            // remove unnecessary "/./"
-            fileWorkCopy = Util.substitute
-            (matcher, currentPathRule.pattern,
-            		currentPathRule.substitution, fileWorkCopy, 1);
-            
-            
-            // collapse adjacent slashes with "/"
-            fileWorkCopy = Util.substitute
-            (matcher, adjacentSlashRule.pattern,
-              adjacentSlashRule.substitution, fileWorkCopy, 1);
-            
-            newLen = fileWorkCopy.length();
-        }
+    while (oldLen != newLen) {
+      // substitue first occurence of "/xx/../" by "/"
+      oldLen = fileWorkCopy.length();
+      fileWorkCopy = Util.substitute(matcher, relativePathRule.pattern,
+          relativePathRule.substitution, fileWorkCopy, 1);
 
-        return fileWorkCopy;
-    }
+      // remove leading "/../"
+      fileWorkCopy = Util.substitute(matcher, leadingRelativePathRule.pattern,
+          leadingRelativePathRule.substitution, fileWorkCopy, 1);
 
+      // remove unnecessary "/./"
+      fileWorkCopy = Util.substitute(matcher, currentPathRule.pattern,
+          currentPathRule.substitution, fileWorkCopy, 1);
 
-    /**
-     * Class which holds a compiled pattern and its corresponding substition
-     * string.
-     */
-    private static class Rule {
-        public Perl5Pattern pattern;
-        public Perl5Substitution substitution;
+      // collapse adjacent slashes with "/"
+      fileWorkCopy = Util.substitute(matcher, adjacentSlashRule.pattern,
+          adjacentSlashRule.substitution, fileWorkCopy, 1);
+
+      newLen = fileWorkCopy.length();
     }
 
+    return fileWorkCopy;
+  }
 
+  /**
+   * Class which holds a compiled pattern and its corresponding substition
+   * string.
+   */
+  private static class Rule {
+    public Perl5Pattern pattern;
+    public Perl5Substitution substitution;
+  }
+
   public void setConf(Configuration conf) {
     this.conf = conf;
   }
@@ -217,4 +210,3 @@
   }
 
 }
-
Index: src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java
===================================================================
--- src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java	(revision 1188252)
+++ src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java	(working copy)
@@ -39,19 +39,20 @@
 
 /** Adds basic searchable fields to a document. */
 public class BasicIndexingFilter implements IndexingFilter {
-  public static final Logger LOG = LoggerFactory.getLogger(BasicIndexingFilter.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(BasicIndexingFilter.class);
 
   private int MAX_TITLE_LENGTH;
   private int MAX_CONTENT_LENGTH;
   private Configuration conf;
 
-  public NutchDocument filter(NutchDocument doc, Parse parse, Text url, CrawlDatum datum, Inlinks inlinks)
-    throws IndexingException {
+  public NutchDocument filter(NutchDocument doc, Parse parse, Text url,
+      CrawlDatum datum, Inlinks inlinks) throws IndexingException {
 
     Text reprUrl = (Text) datum.getMetaData().get(Nutch.WRITABLE_REPR_URL_KEY);
     String reprUrlString = reprUrl != null ? reprUrl.toString() : null;
     String urlString = url.toString();
-    
+
     String host = null;
     try {
       URL u;
@@ -81,7 +82,7 @@
 
     // title
     String title = parse.getData().getTitle();
-    if (title.length() > MAX_TITLE_LENGTH) {      // truncate title if needed
+    if (title.length() > MAX_TITLE_LENGTH) { // truncate title if needed
       title = title.substring(0, MAX_TITLE_LENGTH);
     }
 
Index: src/plugin/urlfilter-validator/src/java/org/apache/nutch/urlfilter/validator/UrlValidator.java
===================================================================
--- src/plugin/urlfilter-validator/src/java/org/apache/nutch/urlfilter/validator/UrlValidator.java	(revision 1188252)
+++ src/plugin/urlfilter-validator/src/java/org/apache/nutch/urlfilter/validator/UrlValidator.java	(working copy)
@@ -23,12 +23,16 @@
 import org.apache.nutch.net.URLFilter;
 
 /**
- * <p>Validates URLs.</p>
- *
- * <p>Originally based in on php script by Debbie Dyer, validation.php v1.2b,
- * Date: 03/07/02,
- * http://javascript.internet.com. However, this validation now bears little
- * resemblance to the php original.</p>
+ * <p>
+ * Validates URLs.
+ * </p>
+ * 
+ * <p>
+ * Originally based in on php script by Debbie Dyer, validation.php v1.2b, Date:
+ * 03/07/02, http://javascript.internet.com. However, this validation now bears
+ * little resemblance to the php original.
+ * </p>
+ * 
  * <pre>
  *   Example of usage:
  *    UrlValidator urlValidator = UrlValidator.get();
@@ -37,17 +41,17 @@
  *    } else {
  *       System.out.println("url is invalid");
  *    }
- *
+ * 
  *   prints out "url is valid"
- *  </pre>
- *
- * <p>Based on UrlValidator code from Apache commons-validator.</p>
- *
- * @see
- * <a href='http://www.ietf.org/rfc/rfc2396.txt' >
- *  Uniform Resource Identifiers (URI): Generic Syntax
- * </a>
- *
+ * </pre>
+ * 
+ * <p>
+ * Based on UrlValidator code from Apache commons-validator.
+ * </p>
+ * 
+ * @see <a href='http://www.ietf.org/rfc/rfc2396.txt' > Uniform Resource
+ *      Identifiers (URI): Generic Syntax </a>
+ * 
  */
 public class UrlValidator implements URLFilter {
 
@@ -61,7 +65,7 @@
 
   private static final String SCHEME_CHARS = ALPHA_CHARS;
 
-  // Drop numeric, and  "+-." for now
+  // Drop numeric, and "+-." for now
   private static final String AUTHORITY_CHARS = ALPHA_NUMERIC_CHARS + "\\-\\.";
 
   private static final String ATOM = VALID_CHARS + '+';
@@ -69,9 +73,9 @@
   /**
    * This expression derived/taken from the BNF for URI (RFC2396).
    */
-  private static final Pattern URL_PATTERN =
-    Pattern.compile("^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)" +
-                    "(\\?([^#]*))?(#(.*))?");
+  private static final Pattern URL_PATTERN = Pattern
+      .compile("^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)"
+          + "(\\?([^#]*))?(#(.*))?");
 
   /**
    * Schema/Protocol (ie. http:, ftp:, file:, etc).
@@ -90,11 +94,11 @@
   /**
    * Protocol (ie. http:, ftp:,https:).
    */
-  private static final Pattern SCHEME_PATTERN =
-    Pattern.compile("^[" + SCHEME_CHARS + "]+");
+  private static final Pattern SCHEME_PATTERN = Pattern.compile("^["
+      + SCHEME_CHARS + "]+");
 
-  private static final Pattern AUTHORITY_PATTERN =
-    Pattern.compile("^([" + AUTHORITY_CHARS + "]*)(:\\d*)?(.*)?");
+  private static final Pattern AUTHORITY_PATTERN = Pattern.compile("^(["
+      + AUTHORITY_CHARS + "]*)(:\\d*)?(.*)?");
 
   private static final int PARSE_AUTHORITY_HOST_IP = 1;
 
@@ -105,28 +109,26 @@
    */
   private static final int PARSE_AUTHORITY_EXTRA = 3;
 
-  private static final Pattern PATH_PATTERN =
-    Pattern.compile("^(/[-\\w:@&?=+,.!/~*'%$_;\\(\\)]*)?$");
+  private static final Pattern PATH_PATTERN = Pattern
+      .compile("^(/[-\\w:@&?=+,.!/~*'%$_;\\(\\)]*)?$");
 
   private static final Pattern QUERY_PATTERN = Pattern.compile("^(.*)$");
 
-  private static final Pattern LEGAL_ASCII_PATTERN =
-    Pattern.compile("^[\\x21-\\x7E]+$");
+  private static final Pattern LEGAL_ASCII_PATTERN = Pattern
+      .compile("^[\\x21-\\x7E]+$");
 
-  private static final Pattern IP_V4_DOMAIN_PATTERN =
-    Pattern.compile("^(\\d{1,3})[.](\\d{1,3})[.](\\d{1,3})[.](\\d{1,3})$");
+  private static final Pattern IP_V4_DOMAIN_PATTERN = Pattern
+      .compile("^(\\d{1,3})[.](\\d{1,3})[.](\\d{1,3})[.](\\d{1,3})$");
 
-  private static final Pattern DOMAIN_PATTERN =
-    Pattern.compile("^" + ATOM + "(\\." + ATOM + ")*$");
+  private static final Pattern DOMAIN_PATTERN = Pattern.compile("^" + ATOM
+      + "(\\." + ATOM + ")*$");
 
-  private static final Pattern PORT_PATTERN =
-    Pattern.compile("^:(\\d{1,5})$");
+  private static final Pattern PORT_PATTERN = Pattern.compile("^:(\\d{1,5})$");
 
-  private static final Pattern ATOM_PATTERN =
-    Pattern.compile("(" + ATOM + ")");
+  private static final Pattern ATOM_PATTERN = Pattern.compile("(" + ATOM + ")");
 
-  private static final Pattern ALPHA_PATTERN =
-    Pattern.compile("^[" + ALPHA_CHARS + "]");
+  private static final Pattern ALPHA_PATTERN = Pattern.compile("^["
+      + ALPHA_CHARS + "]");
 
   private Configuration conf;
 
@@ -143,10 +145,13 @@
   }
 
   /**
-   * <p>Checks if a field has a valid url address.</p>
-   *
-   * @param value The value validation is being performed on.
-   * A <code>null</code> value is considered invalid.
+   * <p>
+   * Checks if a field has a valid url address.
+   * </p>
+   * 
+   * @param value
+   *          The value validation is being performed on. A <code>null</code>
+   *          value is considered invalid.
    * @return true if the url is valid.
    */
   private boolean isValid(String value) {
@@ -184,11 +189,13 @@
   }
 
   /**
-   * Validate scheme. If schemes[] was initialized to a non null,
-   * then only those scheme's are allowed.  Note this is slightly different
-   * than for the constructor.
-   * @param scheme The scheme to validate.  A <code>null</code> value is
-   * considered invalid.
+   * Validate scheme. If schemes[] was initialized to a non null, then only
+   * those scheme's are allowed. Note this is slightly different than for the
+   * constructor.
+   * 
+   * @param scheme
+   *          The scheme to validate. A <code>null</code> value is considered
+   *          invalid.
    * @return true if valid.
    */
   private boolean isValidScheme(String scheme) {
@@ -200,10 +207,12 @@
   }
 
   /**
-   * Returns true if the authority is properly formatted.  An authority is
-   * the combination of hostname and port.  A <code>null</code> authority
-   * value is considered invalid.
-   * @param authority Authority value to validate.
+   * Returns true if the authority is properly formatted. An authority is the
+   * combination of hostname and port. A <code>null</code> authority value is
+   * considered invalid.
+   * 
+   * @param authority
+   *          Authority value to validate.
    * @return true if authority (hostname and port) is valid.
    */
   private boolean isValidAuthority(String authority) {
@@ -235,7 +244,7 @@
           if (Integer.parseInt(ipSegment) > 255) {
             return false;
           }
-        } catch(NumberFormatException e) {
+        } catch (NumberFormatException e) {
           return false;
         }
 
@@ -248,11 +257,12 @@
     // rightmost hostname will never start with a digit.
     if (hostname) {
       // LOW-TECH FIX FOR VALIDATOR-202
-      // TODO: Rewrite to use ArrayList and .add semantics: see VALIDATOR-203
+      // TODO: Rewrite to use ArrayList and .add semantics: see
+      // VALIDATOR-203
       char[] chars = hostIP.toCharArray();
       int size = 1;
-      for(int i=0; i<chars.length; i++) {
-        if(chars[i] == '.') {
+      for (int i = 0; i < chars.length; i++) {
+        if (chars[i] == '.') {
           size++;
         }
       }
@@ -264,8 +274,7 @@
       while (atomMatcher.find()) {
         domainSegment[segCount] = atomMatcher.group();
         segLen = domainSegment[segCount].length() + 1;
-        hostIP = (segLen >= hostIP.length()) ? ""
-                                             : hostIP.substring(segLen);
+        hostIP = (segLen >= hostIP.length()) ? "" : hostIP.substring(segLen);
         segCount++;
       }
       String topLevel = domainSegment[segCount - 1];
@@ -300,10 +309,13 @@
   }
 
   /**
-   * <p>Checks if the field isn't null and length of the field is greater
-   * than zero not including whitespace.</p>
-   *
-   * @param value The value validation is being performed on.
+   * <p>
+   * Checks if the field isn't null and length of the field is greater than zero
+   * not including whitespace.
+   * </p>
+   * 
+   * @param value
+   *          The value validation is being performed on.
    * @return true if blank or null.
    */
   private boolean isBlankOrNull(String value) {
@@ -311,9 +323,11 @@
   }
 
   /**
-   * Returns true if the path is valid.  A <code>null</code> value is
-   * considered invalid.
-   * @param path Path value to validate.
+   * Returns true if the path is valid. A <code>null</code> value is considered
+   * invalid.
+   * 
+   * @param path
+   *          Path value to validate.
    * @return true if path is valid.
    */
   private boolean isValidPath(String path) {
@@ -335,7 +349,9 @@
   /**
    * Returns true if the query is null or it's a properly formatted query
    * string.
-   * @param query Query value to validate.
+   * 
+   * @param query
+   *          Query value to validate.
    * @return true if query is valid.
    */
   private boolean isValidQuery(String query) {
@@ -348,8 +364,11 @@
 
   /**
    * Returns the number of times the token appears in the target.
-   * @param token Token value to be counted.
-   * @param target Target value to count tokens in.
+   * 
+   * @param token
+   *          Token value to be counted.
+   * @param target
+   *          Target value to count tokens in.
    * @return the number of tokens.
    */
   private int countToken(String token, String target) {
Index: src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/OPICScoringFilter.java
===================================================================
--- src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/OPICScoringFilter.java	(revision 1188252)
+++ src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/OPICScoringFilter.java	(working copy)
@@ -42,17 +42,17 @@
 
 /**
  * This plugin implements a variant of an Online Page Importance Computation
- * (OPIC) score, described in this paper:
- * <a href="http://www2003.org/cdrom/papers/refereed/p007/p7-abiteboul.html"/>
- * Abiteboul, Serge and Preda, Mihai and Cobena, Gregory (2003),
- * Adaptive On-Line Page Importance Computation
- * </a>.
+ * (OPIC) score, described in this paper: <a
+ * href="http://www2003.org/cdrom/papers/refereed/p007/p7-abiteboul.html"/>
+ * Abiteboul, Serge and Preda, Mihai and Cobena, Gregory (2003), Adaptive
+ * On-Line Page Importance Computation </a>.
  * 
  * @author Andrzej Bialecki
  */
 public class OPICScoringFilter implements ScoringFilter {
 
-  private final static Logger LOG = LoggerFactory.getLogger(OPICScoringFilter.class);
+  private final static Logger LOG = LoggerFactory
+      .getLogger(OPICScoringFilter.class);
 
   private Configuration conf;
   private float scoreInjected;
@@ -73,28 +73,35 @@
     countFiltered = conf.getBoolean("db.score.count.filtered", false);
   }
 
-  public void injectedScore(Text url, CrawlDatum datum) throws ScoringFilterException {
+  public void injectedScore(Text url, CrawlDatum datum)
+      throws ScoringFilterException {
   }
 
-  /** Set to 0.0f (unknown value) - inlink contributions will bring it to
-   * a correct level. Newly discovered pages have at least one inlink. */
-  public void initialScore(Text url, CrawlDatum datum) throws ScoringFilterException {
+  /**
+   * Set to 0.0f (unknown value) - inlink contributions will bring it to a
+   * correct level. Newly discovered pages have at least one inlink.
+   */
+  public void initialScore(Text url, CrawlDatum datum)
+      throws ScoringFilterException {
     datum.setScore(0.0f);
   }
 
   /** Use {@link CrawlDatum#getScore()}. */
-  public float generatorSortValue(Text url, CrawlDatum datum, float initSort) throws ScoringFilterException {
+  public float generatorSortValue(Text url, CrawlDatum datum, float initSort)
+      throws ScoringFilterException {
     return datum.getScore() * initSort;
   }
 
   /** Increase the score by a sum of inlinked scores. */
-  public void updateDbScore(Text url, CrawlDatum old, CrawlDatum datum, List inlinked) throws ScoringFilterException {
+  public void updateDbScore(Text url, CrawlDatum old, CrawlDatum datum,
+      List inlinked) throws ScoringFilterException {
     float adjust = 0.0f;
     for (int i = 0; i < inlinked.size(); i++) {
-      CrawlDatum linked = (CrawlDatum)inlinked.get(i);
+      CrawlDatum linked = (CrawlDatum) inlinked.get(i);
       adjust += linked.getScore();
     }
-    if (old == null) old = datum;
+    if (old == null)
+      old = datum;
     datum.setScore(old.getScore() + adjust);
   }
 
@@ -105,11 +112,17 @@
 
   /** Copy the value from Content metadata under Fetcher.SCORE_KEY to parseData. */
   public void passScoreAfterParsing(Text url, Content content, Parse parse) {
-    parse.getData().getContentMeta().set(Nutch.SCORE_KEY, content.getMetadata().get(Nutch.SCORE_KEY));
+    parse.getData().getContentMeta()
+        .set(Nutch.SCORE_KEY, content.getMetadata().get(Nutch.SCORE_KEY));
   }
 
-  /** Get a float value from Fetcher.SCORE_KEY, divide it by the number of outlinks and apply. */
-  public CrawlDatum distributeScoreToOutlinks(Text fromUrl, ParseData parseData, Collection<Entry<Text, CrawlDatum>> targets, CrawlDatum adjust, int allCount) throws ScoringFilterException {
+  /**
+   * Get a float value from Fetcher.SCORE_KEY, divide it by the number of
+   * outlinks and apply.
+   */
+  public CrawlDatum distributeScoreToOutlinks(Text fromUrl,
+      ParseData parseData, Collection<Entry<Text, CrawlDatum>> targets,
+      CrawlDatum adjust, int allCount) throws ScoringFilterException {
     float score = scoreInjected;
     String scoreString = parseData.getContentMeta().get(Nutch.SCORE_KEY);
     if (scoreString != null) {
@@ -136,7 +149,7 @@
       try {
         String toHost = new URL(target.getKey().toString()).getHost();
         String fromHost = new URL(fromUrl.toString()).getHost();
-        if(toHost.equalsIgnoreCase(fromHost)){
+        if (toHost.equalsIgnoreCase(fromHost)) {
           target.getValue().setScore(internalScore);
         } else {
           target.getValue().setScore(externalScore);
@@ -152,8 +165,10 @@
     return adjust;
   }
 
-  /** Dampen the boost value by scorePower.*/
-  public float indexerScore(Text url, NutchDocument doc, CrawlDatum dbDatum, CrawlDatum fetchDatum, Parse parse, Inlinks inlinks, float initScore) throws ScoringFilterException {
-    return (float)Math.pow(dbDatum.getScore(), scorePower) * initScore;
+  /** Dampen the boost value by scorePower. */
+  public float indexerScore(Text url, NutchDocument doc, CrawlDatum dbDatum,
+      CrawlDatum fetchDatum, Parse parse, Inlinks inlinks, float initScore)
+      throws ScoringFilterException {
+    return (float) Math.pow(dbDatum.getScore(), scorePower) * initScore;
   }
 }
Index: src/plugin/index-static/src/java/org/apache/nutch/indexer/staticfield/StaticFieldIndexer.java
===================================================================
--- src/plugin/index-static/src/java/org/apache/nutch/indexer/staticfield/StaticFieldIndexer.java	(revision 1188252)
+++ src/plugin/index-static/src/java/org/apache/nutch/indexer/staticfield/StaticFieldIndexer.java	(working copy)
@@ -31,52 +31,55 @@
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.conf.Configuration;
 
-/** A simple plugin called at indexing that adds fields with static data. 
- *  You can specify a list of fieldname:fieldcontent per nutch job.
- *  It can be useful when collections can't be created by urlpatterns, 
- *  like in subcollection, but on a job-basis. */
+/**
+ * A simple plugin called at indexing that adds fields with static data. You can
+ * specify a list of fieldname:fieldcontent per nutch job. It can be useful when
+ * collections can't be created by urlpatterns, like in subcollection, but on a
+ * job-basis.
+ */
 
 public class StaticFieldIndexer implements IndexingFilter {
-	private Configuration conf;
-	private HashMap<String, String[]> fields;
-	private boolean addStaticFields = false;
+  private Configuration conf;
+  private HashMap<String, String[]> fields;
+  private boolean addStaticFields = false;
 
-	public NutchDocument filter(NutchDocument doc, Parse parse, Text url, CrawlDatum datum, Inlinks inlinks)
-	    throws IndexingException {
-		
-		if(this.addStaticFields == true){
-			for(Entry<String,String[]> entry: this.fields.entrySet()){
-				doc.add(entry.getKey(), entry.getValue());
-			}
-		}	
-		return doc;
-	}
+  public NutchDocument filter(NutchDocument doc, Parse parse, Text url,
+      CrawlDatum datum, Inlinks inlinks) throws IndexingException {
 
-	private HashMap<String, String[]> parseFields(String fieldsString) {
-		HashMap<String, String[]> fields = new HashMap<String, String[]>();
-		
-		/*
-		  The format is very easy, it's a comma-separated list of fields in the form <name>:<value>
-		*/
-		for(String field: fieldsString.split(",")){
-			String[] entry = field.split(":");
-			if(entry.length == 2)
-				fields.put(entry[0].trim(), entry[1].trim().split(" "));
-		}
+    if (this.addStaticFields == true) {
+      for (Entry<String, String[]> entry : this.fields.entrySet()) {
+        doc.add(entry.getKey(), entry.getValue());
+      }
+    }
+    return doc;
+  }
 
-		return fields;
-	}
+  private HashMap<String, String[]> parseFields(String fieldsString) {
+    HashMap<String, String[]> fields = new HashMap<String, String[]>();
 
-	public void setConf(Configuration conf) {
-		this.conf = conf;
-		String fieldsString = conf.get("index.static", null);
-		if(fieldsString != null){
-			this.addStaticFields = true;
-			this.fields = parseFields(fieldsString);
-		}
-	}
+    /*
+     * The format is very easy, it's a comma-separated list of fields in the
+     * form <name>:<value>
+     */
+    for (String field : fieldsString.split(",")) {
+      String[] entry = field.split(":");
+      if (entry.length == 2)
+        fields.put(entry[0].trim(), entry[1].trim().split(" "));
+    }
 
-	public Configuration getConf() {
-		return this.conf;
-	}
+    return fields;
+  }
+
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+    String fieldsString = conf.get("index.static", null);
+    if (fieldsString != null) {
+      this.addStaticFields = true;
+      this.fields = parseFields(fieldsString);
+    }
+  }
+
+  public Configuration getConf() {
+    return this.conf;
+  }
 }
Index: src/plugin/scoring-link/src/java/org/apache/nutch/scoring/link/LinkAnalysisScoringFilter.java
===================================================================
--- src/plugin/scoring-link/src/java/org/apache/nutch/scoring/link/LinkAnalysisScoringFilter.java	(revision 1188252)
+++ src/plugin/scoring-link/src/java/org/apache/nutch/scoring/link/LinkAnalysisScoringFilter.java	(working copy)
@@ -32,8 +32,7 @@
 import org.apache.nutch.scoring.ScoringFilter;
 import org.apache.nutch.scoring.ScoringFilterException;
 
-public class LinkAnalysisScoringFilter
-  implements ScoringFilter {
+public class LinkAnalysisScoringFilter implements ScoringFilter {
 
   private Configuration conf;
   private float normalizedScore = 1.00f;
@@ -52,46 +51,44 @@
   }
 
   public CrawlDatum distributeScoreToOutlinks(Text fromUrl,
-    ParseData parseData, Collection<Entry<Text, CrawlDatum>> targets,
-    CrawlDatum adjust, int allCount)
-    throws ScoringFilterException {
+      ParseData parseData, Collection<Entry<Text, CrawlDatum>> targets,
+      CrawlDatum adjust, int allCount) throws ScoringFilterException {
     return adjust;
   }
 
   public float generatorSortValue(Text url, CrawlDatum datum, float initSort)
-    throws ScoringFilterException {
+      throws ScoringFilterException {
     return datum.getScore() * initSort;
   }
 
   public float indexerScore(Text url, NutchDocument doc, CrawlDatum dbDatum,
-    CrawlDatum fetchDatum, Parse parse, Inlinks inlinks, float initScore)
-    throws ScoringFilterException {
+      CrawlDatum fetchDatum, Parse parse, Inlinks inlinks, float initScore)
+      throws ScoringFilterException {
     return (normalizedScore * dbDatum.getScore());
   }
 
   public void initialScore(Text url, CrawlDatum datum)
-    throws ScoringFilterException {
+      throws ScoringFilterException {
     datum.setScore(0.0f);
   }
 
   public void injectedScore(Text url, CrawlDatum datum)
-    throws ScoringFilterException {
+      throws ScoringFilterException {
   }
 
   public void passScoreAfterParsing(Text url, Content content, Parse parse)
-    throws ScoringFilterException {
-    parse.getData().getContentMeta().set(Nutch.SCORE_KEY,
-      content.getMetadata().get(Nutch.SCORE_KEY));
+      throws ScoringFilterException {
+    parse.getData().getContentMeta()
+        .set(Nutch.SCORE_KEY, content.getMetadata().get(Nutch.SCORE_KEY));
   }
 
   public void passScoreBeforeParsing(Text url, CrawlDatum datum, Content content)
-    throws ScoringFilterException {
+      throws ScoringFilterException {
     content.getMetadata().set(Nutch.SCORE_KEY, "" + datum.getScore());
   }
 
   public void updateDbScore(Text url, CrawlDatum old, CrawlDatum datum,
-    List<CrawlDatum> inlinked)
-    throws ScoringFilterException {
+      List<CrawlDatum> inlinked) throws ScoringFilterException {
     // nothing to do
   }
 
Index: src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java
===================================================================
--- src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java	(revision 1188252)
+++ src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java	(working copy)
@@ -42,16 +42,19 @@
 import java.util.ArrayList;
 
 /**
- * Filters URLs based on a file of URL prefixes. The file is named by
- * (1) property "urlfilter.prefix.file" in ./conf/nutch-default.xml, and
- * (2) attribute "file" in plugin.xml of this plugin
- * Attribute "file" has higher precedence if defined.
- *
- * <p>The format of this file is one URL prefix per line.</p>
+ * Filters URLs based on a file of URL prefixes. The file is named by (1)
+ * property "urlfilter.prefix.file" in ./conf/nutch-default.xml, and (2)
+ * attribute "file" in plugin.xml of this plugin Attribute "file" has higher
+ * precedence if defined.
+ * 
+ * <p>
+ * The format of this file is one URL prefix per line.
+ * </p>
  */
 public class PrefixURLFilter implements URLFilter {
 
-  private static final Logger LOG = LoggerFactory.getLogger(PrefixURLFilter.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(PrefixURLFilter.class);
 
   // read in attribute "file" of this plugin.
   private static String attributeFile = null;
@@ -61,7 +64,7 @@
   private Configuration conf;
 
   public PrefixURLFilter() throws IOException {
-   
+
   }
 
   public PrefixURLFilter(String stringRules) throws IOException {
@@ -75,43 +78,43 @@
       return url;
   }
 
-  private TrieStringMatcher readConfiguration(Reader reader)
-    throws IOException {
-    
-    BufferedReader in=new BufferedReader(reader);
+  private TrieStringMatcher readConfiguration(Reader reader) throws IOException {
+
+    BufferedReader in = new BufferedReader(reader);
     List urlprefixes = new ArrayList();
     String line;
 
-    while((line=in.readLine())!=null) {
+    while ((line = in.readLine()) != null) {
       if (line.length() == 0)
         continue;
 
-      char first=line.charAt(0);
+      char first = line.charAt(0);
       switch (first) {
-      case ' ' : case '\n' : case '#' :           // skip blank & comment lines
+      case ' ':
+      case '\n':
+      case '#': // skip blank & comment lines
         continue;
-      default :
-	urlprefixes.add(line);
+      default:
+        urlprefixes.add(line);
       }
     }
 
     return new PrefixStringMatcher(urlprefixes);
   }
 
-  public static void main(String args[])
-    throws IOException {
-    
+  public static void main(String args[]) throws IOException {
+
     PrefixURLFilter filter;
     if (args.length >= 1)
       filter = new PrefixURLFilter(args[0]);
     else
       filter = new PrefixURLFilter();
-    
-    BufferedReader in=new BufferedReader(new InputStreamReader(System.in));
+
+    BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
     String line;
-    while((line=in.readLine())!=null) {
-      String out=filter.filter(line);
-      if(out!=null) {
+    while ((line = in.readLine()) != null) {
+      String out = filter.filter(line);
+      if (out != null) {
         System.out.println(out);
       }
     }
@@ -121,8 +124,8 @@
     this.conf = conf;
 
     String pluginName = "urlfilter-prefix";
-    Extension[] extensions = PluginRepository.get(conf).getExtensionPoint(
-        URLFilter.class.getName()).getExtensions();
+    Extension[] extensions = PluginRepository.get(conf)
+        .getExtensionPoint(URLFilter.class.getName()).getExtensions();
     for (int i = 0; i < extensions.length; i++) {
       Extension extension = extensions[i];
       if (extension.getDescriptor().getPluginId().equals(pluginName)) {
@@ -139,8 +142,8 @@
       }
     } else {
       // if (LOG.isWarnEnabled()) {
-      //   LOG.warn("Attribute \"file\" is not defined in plugin.xml for
-      //   plugin "+pluginName);
+      // LOG.warn("Attribute \"file\" is not defined in plugin.xml for
+      // plugin "+pluginName);
       // }
     }
 
@@ -162,7 +165,9 @@
       try {
         trie = readConfiguration(reader);
       } catch (IOException e) {
-        if (LOG.isErrorEnabled()) { LOG.error(e.getMessage()); }
+        if (LOG.isErrorEnabled()) {
+          LOG.error(e.getMessage());
+        }
         // TODO mb@media-style.com: throw Exception? Because broken api.
         throw new RuntimeException(e.getMessage(), e);
       }
@@ -172,5 +177,5 @@
   public Configuration getConf() {
     return this.conf;
   }
-  
+
 }
Index: src/plugin/parse-swf/src/test/org/apache/nutch/parse/swf/TestSWFParser.java
===================================================================
--- src/plugin/parse-swf/src/test/org/apache/nutch/parse/swf/TestSWFParser.java	(revision 1188252)
+++ src/plugin/parse-swf/src/test/org/apache/nutch/parse/swf/TestSWFParser.java	(working copy)
@@ -35,45 +35,50 @@
 
 import junit.framework.TestCase;
 
-/** 
+/**
  * Unit tests for SWFParser.
- *
+ * 
  * @author Andrzej Bialecki
  */
 public class TestSWFParser extends TestCase {
 
   private String fileSeparator = System.getProperty("file.separator");
   // This system property is defined in ./src/plugin/build-plugin.xml
-  private String sampleDir = System.getProperty("test.data",".");
-  
-  private String[] sampleFiles = new String[]{"test1.swf", "test2.swf", "test3.swf"};
-  private String[] sampleTexts = new String[]{"test1.txt", "test2.txt", "test3.txt"};
+  private String sampleDir = System.getProperty("test.data", ".");
+
+  private String[] sampleFiles = new String[] { "test1.swf", "test2.swf",
+      "test3.swf" };
+  private String[] sampleTexts = new String[] { "test1.txt", "test2.txt",
+      "test3.txt" };
   private String[] texts = new String[sampleTexts.length];
 
-  public TestSWFParser(String name) { 
+  public TestSWFParser(String name) {
     super(name);
     for (int i = 0; i < sampleFiles.length; i++) {
-    try {
-      // read the test string
-      FileInputStream fis = new FileInputStream(sampleDir + fileSeparator + sampleTexts[i]);
-      StringBuffer sb = new StringBuffer();
-      int len = 0;
-      InputStreamReader isr = new InputStreamReader(fis, "UTF-8");
-      char[] buf = new char[1024];
-      while ((len = isr.read(buf)) > 0) {
-        sb.append(buf, 0, len);
+      try {
+        // read the test string
+        FileInputStream fis = new FileInputStream(sampleDir + fileSeparator
+            + sampleTexts[i]);
+        StringBuffer sb = new StringBuffer();
+        int len = 0;
+        InputStreamReader isr = new InputStreamReader(fis, "UTF-8");
+        char[] buf = new char[1024];
+        while ((len = isr.read(buf)) > 0) {
+          sb.append(buf, 0, len);
+        }
+        isr.close();
+        sampleTexts[i] = sb.toString().replaceAll("[ \t\r\n]+", " ").trim();
+      } catch (Exception e) {
+        e.printStackTrace();
       }
-      isr.close();
-      sampleTexts[i] = sb.toString().replaceAll("[ \t\r\n]+", " ").trim();
-    } catch (Exception e) {
-      e.printStackTrace();
     }
-    }
   }
 
-  protected void setUp() {}
+  protected void setUp() {
+  }
 
-  protected void tearDown() {}
+  protected void tearDown() {
+  }
 
   public void testIt() throws ProtocolException, ParseException {
     String urlString;
@@ -86,7 +91,8 @@
       urlString = "file:" + sampleDir + fileSeparator + sampleFiles[i];
 
       protocol = new ProtocolFactory(conf).getProtocol(urlString);
-      content = protocol.getProtocolOutput(new Text(urlString), new CrawlDatum()).getContent();
+      content = protocol.getProtocolOutput(new Text(urlString),
+          new CrawlDatum()).getContent();
 
       parse = new ParseUtil(conf).parse(content).get(content.getUrl());
 
Index: src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/SWFParser.java
===================================================================
--- src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/SWFParser.java	(revision 1188252)
+++ src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/SWFParser.java	(working copy)
@@ -47,11 +47,13 @@
  * @author Andrzej Bialecki
  */
 public class SWFParser implements Parser {
-  public static final Logger LOG = LoggerFactory.getLogger("org.apache.nutch.parse.swf");
+  public static final Logger LOG = LoggerFactory
+      .getLogger("org.apache.nutch.parse.swf");
 
   private Configuration conf = null;
 
-  public SWFParser() {}
+  public SWFParser() {
+  }
 
   public void setConf(Configuration conf) {
     this.conf = conf;
@@ -71,10 +73,12 @@
       byte[] raw = content.getContent();
 
       String contentLength = content.getMetadata().get(Response.CONTENT_LENGTH);
-      if (contentLength != null && raw.length != Integer.parseInt(contentLength)) {
-        return new ParseStatus(ParseStatus.FAILED, ParseStatus.FAILED_TRUNCATED,
-                               "Content truncated at " + raw.length +
-                               " bytes. Parser can't handle incomplete files.").getEmptyParseResult(content.getUrl(), getConf());
+      if (contentLength != null
+          && raw.length != Integer.parseInt(contentLength)) {
+        return new ParseStatus(ParseStatus.FAILED,
+            ParseStatus.FAILED_TRUNCATED, "Content truncated at " + raw.length
+                + " bytes. Parser can't handle incomplete files.")
+            .getEmptyParseResult(content.getUrl(), getConf());
       }
       ExtractText extractor = new ExtractText();
 
@@ -86,11 +90,13 @@
       // SWFReader reads an input file and drives a SWFTags interface
       SWFReader reader = new SWFReader(parser, new InStream(raw));
 
-      // read the input SWF file and pass it through the interface pipeline
+      // read the input SWF file and pass it through the interface
+      // pipeline
       reader.readFile();
       text = extractor.getText();
       String atext = extractor.getActionText();
-      if (atext != null && atext.length() > 0) text += "\n--------\n" + atext;
+      if (atext != null && atext.length() > 0)
+        text += "\n--------\n" + atext;
       // harvest potential outlinks
       String[] links = extractor.getUrls();
       for (int i = 0; i < links.length; i++) {
@@ -98,19 +104,25 @@
         outlinks.add(out);
       }
       Outlink[] olinks = OutlinkExtractor.getOutlinks(text, conf);
-      if (olinks != null) for (int i = 0; i < olinks.length; i++) {
-        outlinks.add(olinks[i]);
-      }
+      if (olinks != null)
+        for (int i = 0; i < olinks.length; i++) {
+          outlinks.add(olinks[i]);
+        }
     } catch (Exception e) { // run time exception
       e.printStackTrace(LogUtil.getErrorStream(LOG));
-      return new ParseStatus(ParseStatus.FAILED, "Can't be handled as SWF document. " + e).getEmptyParseResult(content.getUrl(), getConf());
-    } 
-    if (text == null) text = "";
+      return new ParseStatus(ParseStatus.FAILED,
+          "Can't be handled as SWF document. " + e).getEmptyParseResult(
+          content.getUrl(), getConf());
+    }
+    if (text == null)
+      text = "";
 
-    Outlink[] links = (Outlink[]) outlinks.toArray(new Outlink[outlinks.size()]);
+    Outlink[] links = (Outlink[]) outlinks
+        .toArray(new Outlink[outlinks.size()]);
     ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS, "", links,
-                                        content.getMetadata());
-    return ParseResult.createParseResult(content.getUrl(), new ParseImpl(text, parseData));
+        content.getMetadata());
+    return ParseResult.createParseResult(content.getUrl(), new ParseImpl(text,
+        parseData));
   }
 
   /**
@@ -122,10 +134,9 @@
     byte[] buf = new byte[in.available()];
     in.read(buf);
     SWFParser parser = new SWFParser();
-    ParseResult parseResult = parser.getParse(new Content("file:" + args[0], "file:" + args[0],
-                                          buf, "application/x-shockwave-flash",
-                                          new Metadata(),
-                                          NutchConfiguration.create()));
+    ParseResult parseResult = parser.getParse(new Content("file:" + args[0],
+        "file:" + args[0], buf, "application/x-shockwave-flash",
+        new Metadata(), NutchConfiguration.create()));
     Parse p = parseResult.get("file:" + args[0]);
     System.out.println("Parse Text:");
     System.out.println(p.getText());
@@ -170,7 +181,8 @@
     StringBuffer res = new StringBuffer();
     Iterator it = strings.iterator();
     while (it.hasNext()) {
-      if (res.length() > 0) res.append(' ');
+      if (res.length() > 0)
+        res.append(' ');
       res.append(it.next());
     }
     return res.toString();
@@ -178,10 +190,12 @@
 
   public String getActionText() {
     StringBuffer res = new StringBuffer();
-    String[] strings = (String[])actionStrings.toArray(new String[actionStrings.size()]);
+    String[] strings = (String[]) actionStrings
+        .toArray(new String[actionStrings.size()]);
     Arrays.sort(strings);
     for (int i = 0; i < strings.length; i++) {
-      if (i > 0) res.append('\n');
+      if (i > 0)
+        res.append('\n');
       res.append(strings[i]);
     }
     return res.toString();
@@ -198,14 +212,16 @@
     return res;
   }
 
-  public void tagDefineFontInfo2(int arg0, String arg1, int arg2, int[] arg3, int arg4) throws IOException {
+  public void tagDefineFontInfo2(int arg0, String arg1, int arg2, int[] arg3,
+      int arg4) throws IOException {
     tagDefineFontInfo(arg0, arg1, arg2, arg3);
   }
 
   /**
    * SWFTagTypes interface Save the Text Font character code info
    */
-  public void tagDefineFontInfo(int fontId, String fontName, int flags, int[] codes) throws IOException {
+  public void tagDefineFontInfo(int fontId, String fontName, int flags,
+      int[] codes) throws IOException {
     // System.out.println("-defineFontInfo id=" + fontId + ", name=" +
     // fontName);
     fontCodes.put(new Integer(fontId), codes);
@@ -215,16 +231,16 @@
   // XXX codes anyway, so we just give up.
   /*
    * public SWFVectors tagDefineFont(int arg0, int arg1) throws IOException {
-   *    return null;
-   * }
+   * return null; }
    */
 
   /**
    * SWFTagTypes interface. Save the character code info.
    */
-  public SWFVectors tagDefineFont2(int id, int flags, String name, int numGlyphs, int ascent, int descent, int leading,
-          int[] codes, int[] advances, Rect[] bounds, int[] kernCodes1, int[] kernCodes2, int[] kernAdjustments)
-          throws IOException {
+  public SWFVectors tagDefineFont2(int id, int flags, String name,
+      int numGlyphs, int ascent, int descent, int leading, int[] codes,
+      int[] advances, Rect[] bounds, int[] kernCodes1, int[] kernCodes2,
+      int[] kernAdjustments) throws IOException {
     // System.out.println("-defineFontInfo id=" + id + ", name=" + name);
     fontCodes.put(new Integer(id), (codes != null) ? codes : new int[0]);
 
@@ -234,9 +250,10 @@
   /**
    * SWFTagTypes interface. Dump any initial text in the field.
    */
-  public void tagDefineTextField(int fieldId, String fieldName, String initialText, Rect boundary, int flags,
-          AlphaColor textColor, int alignment, int fontId, int fontSize, int charLimit, int leftMargin,
-          int rightMargin, int indentation, int lineSpacing) throws IOException {
+  public void tagDefineTextField(int fieldId, String fieldName,
+      String initialText, Rect boundary, int flags, AlphaColor textColor,
+      int alignment, int fontId, int fontSize, int charLimit, int leftMargin,
+      int rightMargin, int indentation, int lineSpacing) throws IOException {
     if (initialText != null) {
       strings.add(initialText);
     }
@@ -245,7 +262,8 @@
   /**
    * SWFTagTypes interface
    */
-  public SWFText tagDefineText(int id, Rect bounds, Matrix matrix) throws IOException {
+  public SWFText tagDefineText(int id, Rect bounds, Matrix matrix)
+      throws IOException {
     lastBounds = curBounds;
     curBounds = bounds;
     return new TextDumper();
@@ -257,7 +275,8 @@
   /**
    * SWFTagTypes interface
    */
-  public SWFText tagDefineText2(int id, Rect bounds, Matrix matrix) throws IOException {
+  public SWFText tagDefineText2(int id, Rect bounds, Matrix matrix)
+      throws IOException {
     lastBounds = curBounds;
     curBounds = bounds;
     return new TextDumper();
@@ -275,15 +294,16 @@
     public void setY(int y) {
       if (firstY)
         firstY = false;
-      else strings.add("\n"); // Change in Y - dump a new line
+      else
+        strings.add("\n"); // Change in Y - dump a new line
     }
 
     /*
      * There are some issues with this method: sometimes SWF files define their
-     * own font, so short of OCR we cannot guess what is the glyph code -> character
-     * mapping. Additionally, some files don't use literal space character, instead
-     * they adjust glyphAdvances. We don't handle it at all - in such cases the text
-     * will be all glued together.
+     * own font, so short of OCR we cannot guess what is the glyph code ->
+     * character mapping. Additionally, some files don't use literal space
+     * character, instead they adjust glyphAdvances. We don't handle it at all -
+     * in such cases the text will be all glued together.
      */
     public void text(int[] glyphIndices, int[] glyphAdvances) {
       // System.out.println("-text id=" + fontId);
@@ -312,9 +332,11 @@
       strings.add(new String(chars));
     }
 
-    public void color(Color color) {}
+    public void color(Color color) {
+    }
 
-    public void setX(int x) {}
+    public void setX(int x) {
+    }
 
     public void done() {
       strings.add("\n");
@@ -372,7 +394,8 @@
   public void lookupTable(String[] values) throws IOException {
     // System.out.println("-lookupTable: " + values.length);
     for (int i = 0; i < values.length; i++) {
-      if (!strings.contains(values[i])) strings.add(values[i]);
+      if (!strings.contains(values[i]))
+        strings.add(values[i]);
     }
     super.lookupTable(values);
     dict = values;
@@ -385,7 +408,7 @@
   }
 
   public void getURL(int vars, int mode) {
-  // System.out.println("-getURL: vars=" + vars + ", mode=" + mode);
+    // System.out.println("-getURL: vars=" + vars + ", mode=" + mode);
   }
 
   public void getURL(String url, String target) throws IOException {
@@ -460,7 +483,8 @@
     super.setTarget(var);
   }
 
-  public SWFActionBlock startFunction(String var, String[] params) throws IOException {
+  public SWFActionBlock startFunction(String var, String[] params)
+      throws IOException {
     // System.out.println("-startFunction1: var=" + var);
     stack.push(var);
     strings.remove(var);
@@ -472,7 +496,8 @@
     return this;
   }
 
-  public SWFActionBlock startFunction2(String var, int arg1, int arg2, String[] params, int[] arg3) throws IOException {
+  public SWFActionBlock startFunction2(String var, int arg1, int arg2,
+      String[] params, int[] arg3) throws IOException {
     // System.out.println("-startFunction2: var=" + var);
     stack.push(var);
     strings.remove(var);
@@ -703,6 +728,7 @@
     // tolerate underruns
     if (this.size() == 0)
       return null;
-    else return super.pop();
+    else
+      return super.pop();
   }
 }
Index: src/plugin/urlmeta/src/java/org/apache/nutch/scoring/urlmeta/URLMetaScoringFilter.java
===================================================================
--- src/plugin/urlmeta/src/java/org/apache/nutch/scoring/urlmeta/URLMetaScoringFilter.java	(revision 1188252)
+++ src/plugin/urlmeta/src/java/org/apache/nutch/scoring/urlmeta/URLMetaScoringFilter.java	(working copy)
@@ -43,7 +43,8 @@
  */
 public class URLMetaScoringFilter extends Configured implements ScoringFilter {
 
-  private static final Logger LOG = LoggerFactory.getLogger(URLMetaScoringFilter.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(URLMetaScoringFilter.class);
   private static final String CONF_PROPERTY = "urlmeta.tags";
   private static String[] urlMetaTags;
   private Configuration conf;
@@ -73,8 +74,8 @@
         if (metaFromParse == null)
           continue;
 
-        nextTarget.getValue().getMetaData().put(new Text(metatag),
-            new Text(metaFromParse));
+        nextTarget.getValue().getMetaData()
+            .put(new Text(metatag), new Text(metaFromParse));
       }
     }
     return adjust;
Index: src/plugin/urlmeta/src/java/org/apache/nutch/indexer/urlmeta/URLMetaIndexingFilter.java
===================================================================
--- src/plugin/urlmeta/src/java/org/apache/nutch/indexer/urlmeta/URLMetaIndexingFilter.java	(revision 1188252)
+++ src/plugin/urlmeta/src/java/org/apache/nutch/indexer/urlmeta/URLMetaIndexingFilter.java	(working copy)
@@ -67,52 +67,52 @@
  */
 public class URLMetaIndexingFilter implements IndexingFilter {
 
-	private static final Logger LOG = LoggerFactory
-			.getLogger(URLMetaIndexingFilter.class);
-	private static final String CONF_PROPERTY = "urlmeta.tags";
-	private static String[] urlMetaTags;
-	private Configuration conf;
+  private static final Logger LOG = LoggerFactory
+      .getLogger(URLMetaIndexingFilter.class);
+  private static final String CONF_PROPERTY = "urlmeta.tags";
+  private static String[] urlMetaTags;
+  private Configuration conf;
 
-	/**
-	 * This will take the metatags that you have listed in your "urlmeta.tags"
-	 * property, and looks for them inside the CrawlDatum object. If they exist,
-	 * this will add it as an attribute inside the NutchDocument.
-	 * 
-	 * @see IndexingFilter#filter
-	 */
-	public NutchDocument filter(NutchDocument doc, Parse parse, Text url,
-			CrawlDatum datum, Inlinks inlinks) throws IndexingException {
-		if (conf != null)
-			this.setConf(conf);
+  /**
+   * This will take the metatags that you have listed in your "urlmeta.tags"
+   * property, and looks for them inside the CrawlDatum object. If they exist,
+   * this will add it as an attribute inside the NutchDocument.
+   * 
+   * @see IndexingFilter#filter
+   */
+  public NutchDocument filter(NutchDocument doc, Parse parse, Text url,
+      CrawlDatum datum, Inlinks inlinks) throws IndexingException {
+    if (conf != null)
+      this.setConf(conf);
 
-		if (urlMetaTags == null || doc == null)
-			return doc;
+    if (urlMetaTags == null || doc == null)
+      return doc;
 
-		for (String metatag : urlMetaTags) {
-			Text metadata = (Text) datum.getMetaData().get(new Text(metatag));
+    for (String metatag : urlMetaTags) {
+      Text metadata = (Text) datum.getMetaData().get(new Text(metatag));
 
-			if (metadata != null)
-				doc.add(metatag, metadata.toString());
-		}
+      if (metadata != null)
+        doc.add(metatag, metadata.toString());
+    }
 
-		return doc;
-	}
+    return doc;
+  }
 
-	/** Boilerplate */
-	public Configuration getConf() {
-		return conf;
-	}
+  /** Boilerplate */
+  public Configuration getConf() {
+    return conf;
+  }
 
-	/**
-	 * handles conf assignment and pulls the value assignment from the
-	 * "urlmeta.tags" property
-	 */
-	public void setConf(Configuration conf) {
-		this.conf = conf;
+  /**
+   * handles conf assignment and pulls the value assignment from the
+   * "urlmeta.tags" property
+   */
+  public void setConf(Configuration conf) {
+    this.conf = conf;
 
-		if (conf == null)
-			return;
+    if (conf == null)
+      return;
 
-		urlMetaTags = conf.getStrings(CONF_PROPERTY);
-	}
+    urlMetaTags = conf.getStrings(CONF_PROPERTY);
+  }
 }
Index: src/plugin/urlfilter-suffix/src/test/org/apache/nutch/urlfilter/suffix/TestSuffixURLFilter.java
===================================================================
--- src/plugin/urlfilter-suffix/src/test/org/apache/nutch/urlfilter/suffix/TestSuffixURLFilter.java	(revision 1188252)
+++ src/plugin/urlfilter-suffix/src/test/org/apache/nutch/urlfilter/suffix/TestSuffixURLFilter.java	(working copy)
@@ -24,115 +24,58 @@
 import junit.framework.TestSuite;
 import junit.textui.TestRunner;
 
-
 /**
  * JUnit test for <code>SuffixURLFilter</code>.
- *
+ * 
  * @author Andrzej Bialecki
  */
 public class TestSuffixURLFilter extends TestCase {
-  private static final String suffixes =
-    "# this is a comment\n" +
-    "\n" +
-    ".gif\n" +
-    ".jpg\n" +
-    ".js\n";
-  
+  private static final String suffixes = "# this is a comment\n" + "\n"
+      + ".gif\n" + ".jpg\n" + ".js\n";
+
   private static final String[] urls = new String[] {
-    "http://www.example.com/test.gif",
-    "http://www.example.com/TEST.GIF",
-    "http://www.example.com/test.jpg",
-    "http://www.example.com/test.JPG",
-    "http://www.example.com/test.html",
-    "http://www.example.com/test.HTML",
-    "http://www.example.com/test.html?q=abc.js",
-    "http://www.example.com/test.js?foo=bar&baz=bar#12333",
-  };
-  
-  private static String[] urlsModeAccept = new String[] {
-    null,
-    urls[1],
-    null,
-    urls[3],
-    urls[4],
-    urls[5],
-    null,
-    urls[7]
-  };
-  
-  private static String[] urlsModeReject = new String[] {
-    urls[0],
-    null,
-    urls[2],
-    null,
-    null,
-    null,
-    urls[6],
-    null
-  };
-  
-  private static String[] urlsModeAcceptIgnoreCase = new String[] {
-    null,
-    null,
-    null,
-    null,
-    urls[4],
-    urls[5],
-    null,
-    urls[7]
-  };
- 
-  private static String[] urlsModeRejectIgnoreCase = new String[] {
-    urls[0],
-    urls[1],
-    urls[2],
-    urls[3],
-    null,
-    null,
-    urls[6],
-    null
-  };
-  
-  private static String[] urlsModeAcceptAndPathFilter = new String[] {
-    null,
-    urls[1],
-    null,
-    urls[3],
-    urls[4],
-    urls[5],
-    urls[6],
-    null
-  };
-  
-  private static String[] urlsModeAcceptAndNonPathFilter = new String[] {
-    null,
-    urls[1],
-    null,
-    urls[3],
-    urls[4],
-    urls[5],
-    null,
-    urls[7]
-  };
-  
+      "http://www.example.com/test.gif", "http://www.example.com/TEST.GIF",
+      "http://www.example.com/test.jpg", "http://www.example.com/test.JPG",
+      "http://www.example.com/test.html", "http://www.example.com/test.HTML",
+      "http://www.example.com/test.html?q=abc.js",
+      "http://www.example.com/test.js?foo=bar&baz=bar#12333", };
+
+  private static String[] urlsModeAccept = new String[] { null, urls[1], null,
+      urls[3], urls[4], urls[5], null, urls[7] };
+
+  private static String[] urlsModeReject = new String[] { urls[0], null,
+      urls[2], null, null, null, urls[6], null };
+
+  private static String[] urlsModeAcceptIgnoreCase = new String[] { null, null,
+      null, null, urls[4], urls[5], null, urls[7] };
+
+  private static String[] urlsModeRejectIgnoreCase = new String[] { urls[0],
+      urls[1], urls[2], urls[3], null, null, urls[6], null };
+
+  private static String[] urlsModeAcceptAndPathFilter = new String[] { null,
+      urls[1], null, urls[3], urls[4], urls[5], urls[6], null };
+
+  private static String[] urlsModeAcceptAndNonPathFilter = new String[] { null,
+      urls[1], null, urls[3], urls[4], urls[5], null, urls[7] };
+
   private SuffixURLFilter filter = null;
-  
+
   public TestSuffixURLFilter(String testName) {
     super(testName);
   }
-  
+
   public static Test suite() {
     return new TestSuite(TestSuffixURLFilter.class);
   }
-  
+
   public static void main(String[] args) {
     TestRunner.run(suite());
   }
-  
+
   public void setUp() throws IOException {
     filter = new SuffixURLFilter(new StringReader(suffixes));
   }
-  
+
   public void testModeAccept() {
     filter.setIgnoreCase(false);
     filter.setModeAccept(true);
@@ -164,7 +107,7 @@
       assertTrue(urlsModeRejectIgnoreCase[i] == filter.filter(urls[i]));
     }
   }
-  
+
   public void testModeAcceptAndNonPathFilter() {
     filter.setModeAccept(true);
     filter.setFilterFromPath(false);
@@ -172,7 +115,7 @@
       assertTrue(urlsModeAcceptAndNonPathFilter[i] == filter.filter(urls[i]));
     }
   }
-  
+
   public void testModeAcceptAndPathFilter() {
     filter.setModeAccept(true);
     filter.setFilterFromPath(true);
Index: src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java
===================================================================
--- src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java	(revision 1188252)
+++ src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java	(working copy)
@@ -51,14 +51,15 @@
  * Attribute "file" has higher precedence if defined. If the config file is
  * missing, all URLs will be rejected.
  * 
- * <p>This filter can be configured to work in one of two modes:
+ * <p>
+ * This filter can be configured to work in one of two modes:
  * <ul>
- * <li><b>default to reject</b> ('-'): in this mode, only URLs that match suffixes
- * specified in the config file will be accepted, all other URLs will be
- * rejected.</li>
- * <li><b>default to accept</b> ('+'): in this mode, only URLs that match suffixes
- * specified in the config file will be rejected, all other URLs will be
- * accepted.</li>
+ * <li><b>default to reject</b> ('-'): in this mode, only URLs that match
+ * suffixes specified in the config file will be accepted, all other URLs will
+ * be rejected.</li>
+ * <li><b>default to accept</b> ('+'): in this mode, only URLs that match
+ * suffixes specified in the config file will be rejected, all other URLs will
+ * be accepted.</li>
  * </ul>
  * <p>
  * The format of this config file is one URL suffix per line, with no preceding
@@ -67,10 +68,10 @@
  * </p>
  * <p>
  * A single '+' or '-' sign not followed by any suffix must be used once, to
- * signify the mode this plugin operates in. An optional single 'I' can be appended,
- * to signify that suffix matches should be case-insensitive. The default, if 
- * not specified, is to use case-sensitive matches, i.e. suffix '.JPG'
- * does not match '.jpg'.
+ * signify the mode this plugin operates in. An optional single 'I' can be
+ * appended, to signify that suffix matches should be case-insensitive. The
+ * default, if not specified, is to use case-sensitive matches, i.e. suffix
+ * '.JPG' does not match '.jpg'.
  * </p>
  * <p>
  * NOTE: the format of this file is different from urlfilter-prefix, because
@@ -82,8 +83,8 @@
  * <h4>Example 1</h4>
  * <p>
  * The configuration shown below will accept all URLs with '.html' or '.htm'
- * suffixes (case-sensitive - '.HTML' or '.HTM' will be rejected),
- * and prohibit all other suffixes.
+ * suffixes (case-sensitive - '.HTML' or '.HTM' will be rejected), and prohibit
+ * all other suffixes.
  * <p>
  * 
  * <pre>
@@ -91,7 +92,7 @@
  *  
  *  # prohibit all unknown, case-sensitive matching
  *  -
- *
+ * 
  *  # collect only HTML files.
  *  .html
  *  .htm
@@ -119,11 +120,13 @@
  * </pre>
  * 
  * </p>
+ * 
  * @author Andrzej Bialecki
  */
 public class SuffixURLFilter implements URLFilter {
 
-  private static final Logger LOG = LoggerFactory.getLogger(SuffixURLFilter.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(SuffixURLFilter.class);
 
   // read in attribute "file" of this plugin.
   private String attributeFile = null;
@@ -144,11 +147,13 @@
   }
 
   public String filter(String url) {
-    if (url == null) return null;
+    if (url == null)
+      return null;
     String _url;
     if (ignoreCase)
       _url = url.toLowerCase();
-    else _url = url;
+    else
+      _url = url;
     if (filterFromPath) {
       try {
         URL pUrl = new URL(_url);
@@ -160,11 +165,15 @@
 
     String a = suffixes.shortestMatch(_url);
     if (a == null) {
-      if (modeAccept) return url;
-      else return null;
+      if (modeAccept)
+        return url;
+      else
+        return null;
     } else {
-      if (modeAccept) return null;
-      else return url;
+      if (modeAccept)
+        return null;
+      else
+        return url;
     }
   }
 
@@ -187,30 +196,31 @@
     String line;
 
     while ((line = in.readLine()) != null) {
-      if (line.length() == 0) continue;
+      if (line.length() == 0)
+        continue;
 
       char first = line.charAt(0);
       switch (first) {
-        case ' ':
-        case '\n':
-        case '#': // skip blank & comment lines
-          break;
-        case '-':
-          allow = false;
-          if(line.contains("P"))
-            filterFromPath = true;
-          if(line.contains("I"))
-            ignore = true;
-          break;
-        case '+':
-          allow = true;
-          if(line.contains("P"))
-            filterFromPath = true;
-          if(line.contains("I"))
-            ignore = true;
-          break;
-        default:
-          aSuffixes.add(line);
+      case ' ':
+      case '\n':
+      case '#': // skip blank & comment lines
+        break;
+      case '-':
+        allow = false;
+        if (line.contains("P"))
+          filterFromPath = true;
+        if (line.contains("I"))
+          ignore = true;
+        break;
+      case '+':
+        allow = true;
+        if (line.contains("P"))
+          filterFromPath = true;
+        if (line.contains("I"))
+          ignore = true;
+        break;
+      default:
+        aSuffixes.add(line);
       }
     }
     if (ignore) {
@@ -249,7 +259,8 @@
     this.conf = conf;
 
     String pluginName = "urlfilter-suffix";
-    Extension[] extensions = PluginRepository.get(conf).getExtensionPoint(URLFilter.class.getName()).getExtensions();
+    Extension[] extensions = PluginRepository.get(conf)
+        .getExtensionPoint(URLFilter.class.getName()).getExtensions();
     for (int i = 0; i < extensions.length; i++) {
       Extension extension = extensions[i];
       if (extension.getDescriptor().getPluginId().equals(pluginName)) {
@@ -257,22 +268,25 @@
         break;
       }
     }
-    if (attributeFile != null && attributeFile.trim().equals("")) attributeFile = null;
+    if (attributeFile != null && attributeFile.trim().equals(""))
+      attributeFile = null;
     if (attributeFile != null) {
       if (LOG.isInfoEnabled()) {
-        LOG.info("Attribute \"file\" is defined for plugin " + pluginName + " as " + attributeFile);
+        LOG.info("Attribute \"file\" is defined for plugin " + pluginName
+            + " as " + attributeFile);
       }
     } else {
       // if (LOG.isWarnEnabled()) {
-      //   LOG.warn("Attribute \"file\" is not defined in plugin.xml for
-      //   plugin "+pluginName);
+      // LOG.warn("Attribute \"file\" is not defined in plugin.xml for
+      // plugin "+pluginName);
       // }
     }
 
     String file = conf.get("urlfilter.suffix.file");
     String stringRules = conf.get("urlfilter.suffix.rules");
     // attribute "file" takes precedence if defined
-    if (attributeFile != null) file = attributeFile;
+    if (attributeFile != null)
+      file = attributeFile;
     Reader reader = null;
     if (stringRules != null) { // takes precedence over files
       reader = new StringReader(stringRules);
@@ -283,7 +297,9 @@
     try {
       readConfiguration(reader);
     } catch (IOException e) {
-      if (LOG.isErrorEnabled()) { LOG.error(e.getMessage()); }
+      if (LOG.isErrorEnabled()) {
+        LOG.error(e.getMessage());
+      }
       throw new RuntimeException(e.getMessage(), e);
     }
   }
Index: src/plugin/parse-zip/src/test/org/apache/nutch/parse/zip/TestZipParser.java
===================================================================
--- src/plugin/parse-zip/src/test/org/apache/nutch/parse/zip/TestZipParser.java	(revision 1188252)
+++ src/plugin/parse-zip/src/test/org/apache/nutch/parse/zip/TestZipParser.java	(working copy)
@@ -34,30 +34,32 @@
 
 import junit.framework.TestCase;
 
-/** 
+/**
  * Based on Unit tests for MSWordParser by John Xing
- *
+ * 
  * @author Rohit Kulkarni & Ashish Vaidya
  */
 public class TestZipParser extends TestCase {
 
   private String fileSeparator = System.getProperty("file.separator");
   // This system property is defined in ./src/plugin/build-plugin.xml
-  private String sampleDir = System.getProperty("test.data",".");
-  
+  private String sampleDir = System.getProperty("test.data", ".");
+
   // Make sure sample files are copied to "test.data"
-  
-  private String[] sampleFiles = {"test.zip"};
 
+  private String[] sampleFiles = { "test.zip" };
+
   private String expectedText = "textfile.txt This is text file number 1 ";
 
-  public TestZipParser(String name) { 
-    super(name); 
+  public TestZipParser(String name) {
+    super(name);
   }
 
-  protected void setUp() {}
+  protected void setUp() {
+  }
 
-  protected void tearDown() {}
+  protected void tearDown() {
+  }
 
   public void testIt() throws ProtocolException, ParseException {
     String urlString;
@@ -70,8 +72,10 @@
       urlString = "file:" + sampleDir + fileSeparator + sampleFiles[i];
 
       protocol = new ProtocolFactory(conf).getProtocol(urlString);
-      content = protocol.getProtocolOutput(new Text(urlString), new CrawlDatum()).getContent();
-      parse = new ParseUtil(conf).parseByExtensionId("parse-zip",content).get(content.getUrl());
+      content = protocol.getProtocolOutput(new Text(urlString),
+          new CrawlDatum()).getContent();
+      parse = new ParseUtil(conf).parseByExtensionId("parse-zip", content).get(
+          content.getUrl());
       assertTrue(parse.getText().equals(expectedText));
     }
   }
Index: src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java
===================================================================
--- src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java	(revision 1188252)
+++ src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java	(working copy)
@@ -61,9 +61,12 @@
     Properties properties = null;
 
     try {
-      final String contentLen = content.getMetadata().get(Response.CONTENT_LENGTH);
+      final String contentLen = content.getMetadata().get(
+          Response.CONTENT_LENGTH);
       final int len = Integer.parseInt(contentLen);
-      if (LOG.isDebugEnabled()) { LOG.debug("ziplen: " + len); }
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("ziplen: " + len);
+      }
       final byte[] contentInBytes = content.getContent();
       final ByteArrayInputStream bainput = new ByteArrayInputStream(
           contentInBytes);
@@ -85,7 +88,8 @@
 
     } catch (Exception e) {
       return new ParseStatus(ParseStatus.FAILED,
-          "Can't be handled as Zip document. " + e).getEmptyParseResult(content.getUrl(), getConf());
+          "Can't be handled as Zip document. " + e).getEmptyParseResult(
+          content.getUrl(), getConf());
     }
 
     if (resultText == null) {
@@ -98,11 +102,13 @@
 
     outlinks = (Outlink[]) outLinksList.toArray(new Outlink[0]);
     final ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS,
-                                              resultTitle, outlinks,
-                                              content.getMetadata());
+        resultTitle, outlinks, content.getMetadata());
 
-    if (LOG.isTraceEnabled()) { LOG.trace("Zip file parsed sucessfully !!"); }
-    return ParseResult.createParseResult(content.getUrl(), new ParseImpl(resultText, parseData));
+    if (LOG.isTraceEnabled()) {
+      LOG.trace("Zip file parsed sucessfully !!");
+    }
+    return ParseResult.createParseResult(content.getUrl(), new ParseImpl(
+        resultText, parseData));
   }
 
   public void setConf(Configuration conf) {
Index: src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipTextExtractor.java
===================================================================
--- src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipTextExtractor.java	(revision 1188252)
+++ src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipTextExtractor.java	(working copy)
@@ -44,45 +44,44 @@
 import org.apache.nutch.protocol.Content;
 import org.apache.nutch.util.MimeUtil;
 
-
-
 /**
- *
+ * 
  * @author Rohit Kulkarni & Ashish Vaidya
  */
 public class ZipTextExtractor {
-  
+
   /** Get the MimeTypes resolver instance. */
   private MimeUtil MIME;
-  
-  public static final Logger LOG = LoggerFactory.getLogger(ZipTextExtractor.class);
 
+  public static final Logger LOG = LoggerFactory
+      .getLogger(ZipTextExtractor.class);
+
   private Configuration conf;
-  
-  
+
   /** Creates a new instance of ZipTextExtractor */
   public ZipTextExtractor(Configuration conf) {
     this.conf = conf;
     this.MIME = new MimeUtil(conf);
   }
-  
-  public String extractText(InputStream input, String url, List outLinksList) throws IOException {
+
+  public String extractText(InputStream input, String url, List outLinksList)
+      throws IOException {
     String resultText = "";
     byte temp;
-    
+
     ZipInputStream zin = new ZipInputStream(input);
-    
+
     ZipEntry entry;
-    
+
     while ((entry = zin.getNextEntry()) != null) {
-      
+
       if (!entry.isDirectory()) {
         int size = (int) entry.getSize();
         byte[] b = new byte[size];
-        for(int x = 0; x < size; x++) {
+        for (int x = 0; x < size; x++) {
           int err = zin.read();
-          if(err != -1) {
-            b[x] = (byte)err;
+          if (err != -1) {
+            b[x] = (byte) err;
           }
         }
         String newurl = url + "/";
@@ -96,29 +95,33 @@
           String contentType = MIME.getMimeType(fname).getName();
           try {
             Metadata metadata = new Metadata();
-            metadata.set(Response.CONTENT_LENGTH, Long.toString(entry.getSize()));
+            metadata.set(Response.CONTENT_LENGTH,
+                Long.toString(entry.getSize()));
             metadata.set(Response.CONTENT_TYPE, contentType);
-            Content content = new Content(newurl, base, b, contentType, metadata, this.conf);
-            Parse parse = new ParseUtil(this.conf).parse(content).get(content.getUrl());
+            Content content = new Content(newurl, base, b, contentType,
+                metadata, this.conf);
+            Parse parse = new ParseUtil(this.conf).parse(content).get(
+                content.getUrl());
             ParseData theParseData = parse.getData();
             Outlink[] theOutlinks = theParseData.getOutlinks();
-            
-            for(int count = 0; count < theOutlinks.length; count++) {
-              outLinksList.add(new Outlink(theOutlinks[count].getToUrl(), theOutlinks[count].getAnchor()));
+
+            for (int count = 0; count < theOutlinks.length; count++) {
+              outLinksList.add(new Outlink(theOutlinks[count].getToUrl(),
+                  theOutlinks[count].getAnchor()));
             }
-            
+
             resultText += entry.getName() + " " + parse.getText() + " ";
           } catch (ParseException e) {
-            if (LOG.isInfoEnabled()) { 
-              LOG.info("fetch okay, but can't parse " + fname + ", reason: " + e.getMessage());
+            if (LOG.isInfoEnabled()) {
+              LOG.info("fetch okay, but can't parse " + fname + ", reason: "
+                  + e.getMessage());
             }
           }
         }
       }
     }
-    
+
     return resultText;
   }
-  
+
 }
-
Index: src/plugin/urlnormalizer-regex/src/test/org/apache/nutch/net/urlnormalizer/regex/TestRegexURLNormalizer.java
===================================================================
--- src/plugin/urlnormalizer-regex/src/test/org/apache/nutch/net/urlnormalizer/regex/TestRegexURLNormalizer.java	(revision 1188252)
+++ src/plugin/urlnormalizer-regex/src/test/org/apache/nutch/net/urlnormalizer/regex/TestRegexURLNormalizer.java	(working copy)
@@ -57,17 +57,19 @@
 
 /** Unit tests for RegexUrlNormalizer. */
 public class TestRegexURLNormalizer extends TestCase {
-  private static final Logger LOG = LoggerFactory.getLogger(TestRegexURLNormalizer.class);
-  
+  private static final Logger LOG = LoggerFactory
+      .getLogger(TestRegexURLNormalizer.class);
+
   private RegexURLNormalizer normalizer;
   private Configuration conf;
   private HashMap testData = new HashMap();
-  
+
   // This system property is defined in ./src/plugin/build-plugin.xml
   private String sampleDir = System.getProperty("test.data", ".");
+
   // Make sure sample files are copied to "test.data" as specified in
   // ./src/plugin/urlnormalizer-regex/build.xml during plugin compilation.
-  
+
   public TestRegexURLNormalizer(String name) throws IOException {
     super(name);
     normalizer = new RegexURLNormalizer();
@@ -75,7 +77,8 @@
     normalizer.setConf(conf);
     File[] configs = new File(sampleDir).listFiles(new FileFilter() {
       public boolean accept(File f) {
-        if (f.getName().endsWith(".xml") && f.getName().startsWith("regex-normalize-"))
+        if (f.getName().endsWith(".xml")
+            && f.getName().startsWith("regex-normalize-"))
           return true;
         return false;
       }
@@ -95,41 +98,44 @@
   }
 
   public void testNormalizerDefault() throws Exception {
-    normalizeTest((NormalizedURL[])testData.get(URLNormalizers.SCOPE_DEFAULT),
-            URLNormalizers.SCOPE_DEFAULT);
+    normalizeTest((NormalizedURL[]) testData.get(URLNormalizers.SCOPE_DEFAULT),
+        URLNormalizers.SCOPE_DEFAULT);
   }
 
   public void testNormalizerScope() throws Exception {
     Iterator it = testData.keySet().iterator();
     while (it.hasNext()) {
-      String scope = (String)it.next();
-      normalizeTest((NormalizedURL[])testData.get(scope), scope);
+      String scope = (String) it.next();
+      normalizeTest((NormalizedURL[]) testData.get(scope), scope);
     }
   }
 
-  private void normalizeTest(NormalizedURL[] urls, String scope) throws Exception {
+  private void normalizeTest(NormalizedURL[] urls, String scope)
+      throws Exception {
     for (int i = 0; i < urls.length; i++) {
       String url = urls[i].url;
       String normalized = normalizer.normalize(urls[i].url, scope);
       String expected = urls[i].expectedURL;
-      LOG.info("scope: " + scope + " url: " + url + " | normalized: " + normalized + " | expected: " + expected);
+      LOG.info("scope: " + scope + " url: " + url + " | normalized: "
+          + normalized + " | expected: " + expected);
       assertEquals(urls[i].expectedURL, normalized);
     }
   }
-	
+
   private void bench(int loops, String scope) {
     long start = System.currentTimeMillis();
     try {
-      NormalizedURL[] expected = (NormalizedURL[])testData.get(scope);
-      if (expected == null) return;
+      NormalizedURL[] expected = (NormalizedURL[]) testData.get(scope);
+      if (expected == null)
+        return;
       for (int i = 0; i < loops; i++) {
         normalizeTest(expected, scope);
       }
     } catch (Exception e) {
       fail(e.toString());
     }
-    LOG.info("bench time (" + loops + ") " +
-             (System.currentTimeMillis() - start) + "ms");
+    LOG.info("bench time (" + loops + ") "
+        + (System.currentTimeMillis() - start) + "ms");
   }
 
   private static class NormalizedURL {
@@ -145,17 +151,18 @@
 
   private NormalizedURL[] readTestFile(String scope) throws IOException {
     File f = new File(sampleDir, "regex-normalize-" + scope + ".test");
-    BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(f), "UTF-8"));
+    BufferedReader in = new BufferedReader(new InputStreamReader(
+        new FileInputStream(f), "UTF-8"));
     List list = new ArrayList();
     String line;
-    while((line = in.readLine()) != null) {
-      if (  line.trim().length() == 0 ||
-            line.startsWith("#") ||
-            line.startsWith(" ")) continue;
+    while ((line = in.readLine()) != null) {
+      if (line.trim().length() == 0 || line.startsWith("#")
+          || line.startsWith(" "))
+        continue;
       list.add(new NormalizedURL(line));
     }
     return (NormalizedURL[]) list.toArray(new NormalizedURL[list.size()]);
-  }  
+  }
 
   public static void main(String[] args) throws Exception {
     if (args.length == 0) {
@@ -169,7 +176,8 @@
       if (args[i].equals("-bench")) {
         bench = true;
         iter = Integer.parseInt(args[++i]);
-      } else scope = args[i];
+      } else
+        scope = args[i];
     }
     if (scope == null) {
       System.err.println("Missing required scope name.");
@@ -180,11 +188,12 @@
       System.exit(-1);
     }
     TestRegexURLNormalizer test = new TestRegexURLNormalizer("test");
-    NormalizedURL[] urls = (NormalizedURL[])test.testData.get(scope);
+    NormalizedURL[] urls = (NormalizedURL[]) test.testData.get(scope);
     if (urls == null) {
-      LOG.warn("Missing test data for scope '" + scope + "', using default scope.");
+      LOG.warn("Missing test data for scope '" + scope
+          + "', using default scope.");
       scope = URLNormalizers.SCOPE_DEFAULT;
-      urls = (NormalizedURL[])test.testData.get(scope);
+      urls = (NormalizedURL[]) test.testData.get(scope);
     }
     if (bench) {
       test.bench(iter, scope);
@@ -193,6 +202,4 @@
     }
   }
 
-
-
 }
Index: src/plugin/urlnormalizer-regex/src/java/org/apache/nutch/net/urlnormalizer/regex/RegexURLNormalizer.java
===================================================================
--- src/plugin/urlnormalizer-regex/src/java/org/apache/nutch/net/urlnormalizer/regex/RegexURLNormalizer.java	(revision 1188252)
+++ src/plugin/urlnormalizer-regex/src/java/org/apache/nutch/net/urlnormalizer/regex/RegexURLNormalizer.java	(working copy)
@@ -51,19 +51,23 @@
  * Allows users to do regex substitutions on all/any URLs that are encountered,
  * which is useful for stripping session IDs from URLs.
  * 
- * <p>This class uses the <tt>urlnormalizer.regex.file</tt> property.
- * It should be set to the file name of an xml file which should contain the
- * patterns and substitutions to be done on encountered URLs.
+ * <p>
+ * This class uses the <tt>urlnormalizer.regex.file</tt> property. It should be
+ * set to the file name of an xml file which should contain the patterns and
+ * substitutions to be done on encountered URLs.
  * </p>
- * <p>This class also supports different rules depending on the scope. Please see
- * the javadoc in {@link org.apache.nutch.net.URLNormalizers} for more details.</p>
+ * <p>
+ * This class also supports different rules depending on the scope. Please see
+ * the javadoc in {@link org.apache.nutch.net.URLNormalizers} for more details.
+ * </p>
  * 
  * @author Luke Baker
  * @author Andrzej Bialecki
  */
 public class RegexURLNormalizer extends Configured implements URLNormalizer {
 
-  private static final Logger LOG = LoggerFactory.getLogger(RegexURLNormalizer.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(RegexURLNormalizer.class);
 
   /**
    * Class which holds a compiled pattern and its corresponding substition
@@ -76,7 +80,7 @@
   }
 
   private HashMap scopedRules;
-  
+
   private static final List EMPTY_RULES = Collections.EMPTY_LIST;
 
   /**
@@ -96,7 +100,7 @@
    * configuration files for it.
    */
   public RegexURLNormalizer(Configuration conf, String filename)
-          throws IOException, PatternSyntaxException {
+      throws IOException, PatternSyntaxException {
     super(conf);
     List rules = readConfigurationFile(filename);
     if (rules != null)
@@ -105,7 +109,8 @@
 
   public void setConf(Configuration conf) {
     super.setConf(conf);
-    if (conf == null) return;
+    if (conf == null)
+      return;
     // the default constructor was called
     if (this.scopedRules == null) {
       String filename = getConf().get("urlnormalizer.regex.file");
@@ -137,15 +142,16 @@
   void setConfiguration(Reader reader, String scope) {
     List rules = readConfiguration(reader);
     scopedRules.put(scope, rules);
-    LOG.debug("Set config for scope '" + scope + "': " + rules.size() + " rules.");
+    LOG.debug("Set config for scope '" + scope + "': " + rules.size()
+        + " rules.");
   }
-  
+
   /**
    * This function does the replacements by iterating through all the regex
    * patterns. It accepts a string url as input and returns the altered string.
    */
   public synchronized String regexNormalize(String urlString, String scope) {
-    List curRules = (List)scopedRules.get(scope);
+    List curRules = (List) scopedRules.get(scope);
     if (curRules == null) {
       // try to populate
       String configFile = getConf().get("urlnormalizer.regex.file." + scope);
@@ -170,7 +176,7 @@
     }
     if (curRules == EMPTY_RULES || curRules == null) {
       // use global rules
-      curRules = (List)scopedRules.get(URLNormalizers.SCOPE_DEFAULT);
+      curRules = (List) scopedRules.get(URLNormalizers.SCOPE_DEFAULT);
     }
     Iterator i = curRules.iterator();
     while (i.hasNext()) {
@@ -184,7 +190,7 @@
   }
 
   public synchronized String normalize(String urlString, String scope)
-          throws MalformedURLException {
+      throws MalformedURLException {
     return regexNormalize(urlString, scope);
   }
 
@@ -201,17 +207,17 @@
       return EMPTY_RULES;
     }
   }
-  
+
   private List readConfiguration(Reader reader) {
     List rules = new ArrayList();
     try {
 
       // borrowed heavily from code in Configuration.java
       Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder()
-              .parse(new InputSource(reader));
+          .parse(new InputSource(reader));
       Element root = doc.getDocumentElement();
       if ((!"regex-normalize".equals(root.getTagName()))
-              && (LOG.isErrorEnabled())) {
+          && (LOG.isErrorEnabled())) {
         LOG.error("bad conf file: top-level element not <regex-normalize>");
       }
       NodeList regexes = root.getChildNodes();
@@ -234,7 +240,7 @@
           if ("pattern".equals(field.getTagName()) && field.hasChildNodes())
             patternValue = ((Text) field.getFirstChild()).getData();
           if ("substitution".equals(field.getTagName())
-                  && field.hasChildNodes())
+              && field.hasChildNodes())
             subValue = ((Text) field.getFirstChild()).getData();
           if (!field.hasChildNodes())
             subValue = "";
@@ -252,16 +258,18 @@
       }
       return EMPTY_RULES;
     }
-    if (rules.size() == 0) return EMPTY_RULES;
+    if (rules.size() == 0)
+      return EMPTY_RULES;
     return rules;
   }
 
   /** Spits out patterns and substitutions that are in the configuration file. */
   public static void main(String args[]) throws PatternSyntaxException,
-          IOException {
+      IOException {
     RegexURLNormalizer normalizer = new RegexURLNormalizer();
     normalizer.setConf(NutchConfiguration.create());
-    Iterator i = ((List)normalizer.scopedRules.get(URLNormalizers.SCOPE_DEFAULT)).iterator();
+    Iterator i = ((List) normalizer.scopedRules
+        .get(URLNormalizers.SCOPE_DEFAULT)).iterator();
     System.out.println("* Rules for 'DEFAULT' scope:");
     while (i.hasNext()) {
       Rule r = (Rule) i.next();
@@ -275,10 +283,11 @@
     if (normalizer.scopedRules.size() > 1) {
       Iterator it = normalizer.scopedRules.keySet().iterator();
       while (it.hasNext()) {
-        String scope = (String)it.next();
-        if (URLNormalizers.SCOPE_DEFAULT.equals(scope)) continue;
+        String scope = (String) it.next();
+        if (URLNormalizers.SCOPE_DEFAULT.equals(scope))
+          continue;
         System.out.println("* Rules for '" + scope + "' scope:");
-        i = ((List)normalizer.scopedRules.get(scope)).iterator();
+        i = ((List) normalizer.scopedRules.get(scope)).iterator();
         while (i.hasNext()) {
           Rule r = (Rule) i.next();
           System.out.print("  " + r.pattern.pattern() + " -> ");
@@ -289,10 +298,12 @@
     if (args.length > 0) {
       System.out.println("\n---------- Normalizer test -----------");
       String scope = URLNormalizers.SCOPE_DEFAULT;
-      if (args.length > 1) scope = args[1];
+      if (args.length > 1)
+        scope = args[1];
       System.out.println("Scope: " + scope);
       System.out.println("Input url:  '" + args[0] + "'");
-      System.out.println("Output url: '" + normalizer.normalize(args[0], scope) + "'");
+      System.out.println("Output url: '" + normalizer.normalize(args[0], scope)
+          + "'");
     }
     System.exit(0);
   }
Index: src/plugin/urlfilter-automaton/src/test/org/apache/nutch/urlfilter/automaton/TestAutomatonURLFilter.java
===================================================================
--- src/plugin/urlfilter-automaton/src/test/org/apache/nutch/urlfilter/automaton/TestAutomatonURLFilter.java	(revision 1188252)
+++ src/plugin/urlfilter-automaton/src/test/org/apache/nutch/urlfilter/automaton/TestAutomatonURLFilter.java	(working copy)
@@ -29,22 +29,21 @@
 // Nutch imports
 import org.apache.nutch.urlfilter.api.RegexURLFilterBaseTest;
 
-
 /**
  * JUnit based test of class <code>AutomatonURLFilter</code>.
- *
+ * 
  * @author J&eacute;r&ocirc;me Charron
  */
 public class TestAutomatonURLFilter extends RegexURLFilterBaseTest {
-  
+
   public TestAutomatonURLFilter(String testName) {
     super(testName);
   }
-  
+
   public static Test suite() {
     return new TestSuite(TestAutomatonURLFilter.class);
   }
-  
+
   public static void main(String[] args) {
     TestRunner.run(suite());
   }
@@ -57,7 +56,7 @@
       return null;
     }
   }
-  
+
   public void test() {
     test("WholeWebCrawling");
     test("IntranetCrawling");
Index: src/plugin/urlfilter-automaton/src/java/org/apache/nutch/urlfilter/automaton/AutomatonURLFilter.java
===================================================================
--- src/plugin/urlfilter-automaton/src/java/org/apache/nutch/urlfilter/automaton/AutomatonURLFilter.java	(revision 1188252)
+++ src/plugin/urlfilter-automaton/src/java/org/apache/nutch/urlfilter/automaton/AutomatonURLFilter.java	(working copy)
@@ -32,12 +32,11 @@
 import org.apache.nutch.urlfilter.api.RegexRule;
 import org.apache.nutch.urlfilter.api.RegexURLFilterBase;
 
-
 /**
- * RegexURLFilterBase implementation based on the
- * <a href="http://www.brics.dk/automaton/">dk.brics.automaton</a>
- * Finite-State Automata for Java<sup>TM</sup>.
- *
+ * RegexURLFilterBase implementation based on the <a
+ * href="http://www.brics.dk/automaton/">dk.brics.automaton</a> Finite-State
+ * Automata for Java<sup>TM</sup>.
+ * 
  * @author J&eacute;r&ocirc;me Charron
  * @see <a href="http://www.brics.dk/automaton/">dk.brics.automaton</a>
  */
@@ -49,24 +48,24 @@
     super();
   }
 
-  public AutomatonURLFilter(String filename)
-    throws IOException, PatternSyntaxException {
+  public AutomatonURLFilter(String filename) throws IOException,
+      PatternSyntaxException {
     super(filename);
   }
 
-  AutomatonURLFilter(Reader reader)
-    throws IOException, IllegalArgumentException {
+  AutomatonURLFilter(Reader reader) throws IOException,
+      IllegalArgumentException {
     super(reader);
   }
 
-  
-  /* ----------------------------------- *
-   * <implementation:RegexURLFilterBase> *
-   * ----------------------------------- */
-  
+  /*
+   * ----------------------------------- * <implementation:RegexURLFilterBase> *
+   * -----------------------------------
+   */
+
   /**
-   * Rules specified as a config property will override rules specified
-   * as a config file.
+   * Rules specified as a config property will override rules specified as a
+   * config file.
    */
   protected Reader getRulesReader(Configuration conf) throws IOException {
     String stringRules = conf.get(URLFILTER_AUTOMATON_RULES);
@@ -81,21 +80,20 @@
   protected RegexRule createRule(boolean sign, String regex) {
     return new Rule(sign, regex);
   }
-  
-  /* ------------------------------------ *
-   * </implementation:RegexURLFilterBase> *
-   * ------------------------------------ */
 
-  
+  /*
+   * ------------------------------------ * </implementation:RegexURLFilterBase>
+   * * ------------------------------------
+   */
+
   public static void main(String args[]) throws IOException {
     main(new AutomatonURLFilter(), args);
   }
 
-
   private class Rule extends RegexRule {
-    
+
     private RunAutomaton automaton;
-    
+
     Rule(boolean sign, String regex) {
       super(sign, regex);
       automaton = new RunAutomaton(new RegExp(regex, RegExp.ALL).toAutomaton());
@@ -105,5 +103,5 @@
       return automaton.run(url);
     }
   }
-  
+
 }
Index: src/plugin/protocol-file/src/test/org/apache/nutch/protocol/file/TestProtocolFile.java
===================================================================
--- src/plugin/protocol-file/src/test/org/apache/nutch/protocol/file/TestProtocolFile.java	(revision 1188252)
+++ src/plugin/protocol-file/src/test/org/apache/nutch/protocol/file/TestProtocolFile.java	(working copy)
@@ -49,7 +49,8 @@
   private String sampleDir = System.getProperty("test.data", ".");
 
   private static final String[] testTextFiles = new String[] {
-      "testprotocolfile.txt", "testprotocolfile_(encoded).txt", "testprotocolfile_%28encoded%29.txt" };
+      "testprotocolfile.txt", "testprotocolfile_(encoded).txt",
+      "testprotocolfile_%28encoded%29.txt" };
 
   private static final CrawlDatum datum = new CrawlDatum();
 
Index: src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java
===================================================================
--- src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java	(revision 1188252)
+++ src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java	(working copy)
@@ -40,10 +40,10 @@
 
 /************************************
  * File.java deals with file: scheme.
- *
- * Configurable parameters are defined under "FILE properties" section
- * in ./conf/nutch-default.xml or similar.
- *
+ * 
+ * Configurable parameters are defined under "FILE properties" section in
+ * ./conf/nutch-default.xml or similar.
+ * 
  * @author John Xing
  ***********************************/
 public class File implements Protocol {
@@ -62,46 +62,51 @@
   }
 
   /** Set the point at which content is truncated. */
-  public void setMaxContentLength(int length) {maxContentLength = length;}
+  public void setMaxContentLength(int length) {
+    maxContentLength = length;
+  }
 
   public ProtocolOutput getProtocolOutput(Text url, CrawlDatum datum) {
     String urlString = url.toString();
     try {
       URL u = new URL(urlString);
-  
+
       int redirects = 0;
-  
+
       while (true) {
         FileResponse response;
-        response = new FileResponse(u, datum, this, getConf());   // make a request
-  
+        response = new FileResponse(u, datum, this, getConf()); // make
+        // a
+        // request
+
         int code = response.getCode();
-  
-        if (code == 200) {                          // got a good response
-          return new ProtocolOutput(response.toContent());              // return it
-  
-        } else if (code >= 300 && code < 400) {     // handle redirect
+
+        if (code == 200) { // got a good response
+          return new ProtocolOutput(response.toContent()); // return
+          // it
+
+        } else if (code >= 300 && code < 400) { // handle redirect
           if (redirects == MAX_REDIRECTS)
             throw new FileException("Too many redirects: " + url);
           u = new URL(response.getHeader("Location"));
-          redirects++;                
+          redirects++;
           if (LOG.isTraceEnabled()) {
-            LOG.trace("redirect to " + u); 
+            LOG.trace("redirect to " + u);
           }
-  
-        } else {                                    // convert to exception
+
+        } else { // convert to exception
           throw new FileError(code);
         }
-      } 
+      }
     } catch (Exception e) {
       e.printStackTrace();
       return new ProtocolOutput(null, new ProtocolStatus(e));
     }
   }
 
-//  protected void finalize () {
-//    // nothing here
-//  }
+  // protected void finalize () {
+  // // nothing here
+  // }
 
   /** For debugging. */
   public static void main(String[] args) throws Exception {
@@ -116,7 +121,7 @@
       System.err.println(usage);
       System.exit(-1);
     }
-      
+
     for (int i = 0; i < args.length; i++) {
       if (args[i].equals("-logLevel")) {
         logLevel = args[++i];
@@ -124,7 +129,7 @@
         maxContentLength = Integer.parseInt(args[++i]);
       } else if (args[i].equals("-dumpContent")) {
         dumpContent = true;
-      } else if (i != args.length-1) {
+      } else if (i != args.length - 1) {
         System.err.println(usage);
         System.exit(-1);
       } else
@@ -138,15 +143,16 @@
       file.setMaxContentLength(maxContentLength);
 
     // set log level
-    //LOG.setLevel(Level.parse((new String(logLevel)).toUpperCase()));
+    // LOG.setLevel(Level.parse((new String(logLevel)).toUpperCase()));
 
-    Content content = file.getProtocolOutput(new Text(urlString), new CrawlDatum()).getContent();
+    Content content = file.getProtocolOutput(new Text(urlString),
+        new CrawlDatum()).getContent();
 
     System.err.println("Content-Type: " + content.getContentType());
-    System.err.println("Content-Length: " +
-                       content.getMetadata().get(Response.CONTENT_LENGTH));
-    System.err.println("Last-Modified: " +
-                       content.getMetadata().get(Response.LAST_MODIFIED));
+    System.err.println("Content-Length: "
+        + content.getMetadata().get(Response.CONTENT_LENGTH));
+    System.err.println("Last-Modified: "
+        + content.getMetadata().get(Response.LAST_MODIFIED));
     if (dumpContent) {
       System.out.print(new String(content.getContent()));
     }
Index: src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileError.java
===================================================================
--- src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileError.java	(revision 1188252)
+++ src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileError.java	(working copy)
@@ -17,14 +17,17 @@
 
 package org.apache.nutch.protocol.file;
 
-/** Thrown for File error codes.
+/**
+ * Thrown for File error codes.
  */
 public class FileError extends FileException {
 
   private int code;
-  
-  public int getCode(int code) { return code; }
 
+  public int getCode(int code) {
+    return code;
+  }
+
   public FileError(int code) {
     super("File Error: " + code);
     this.code = code;
Index: src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java
===================================================================
--- src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java	(revision 1188252)
+++ src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java	(working copy)
@@ -95,13 +95,13 @@
   }
 
   public FileResponse(URL url, CrawlDatum datum, File file, Configuration conf)
-    throws FileException, IOException {
+      throws FileException, IOException {
 
     this.orig = url.toString();
     this.base = url.toString();
     this.file = file;
     this.conf = conf;
-    
+
     MIME = new MimeUtil(conf);
 
     if (!"file".equals(url.getProtocol()))
@@ -130,16 +130,16 @@
       this.content = null;
 
       // url.toURI() is only in j2se 1.5.0
-      //java.io.File f = new java.io.File(url.toURI());
+      // java.io.File f = new java.io.File(url.toURI());
       java.io.File f = new java.io.File(path);
 
       if (!f.exists()) {
-        this.code = 404;  // http Not Found
+        this.code = 404; // http Not Found
         return;
       }
 
       if (!f.canRead()) {
-        this.code = 401;  // http Unauthorized
+        this.code = 401; // http Unauthorized
         return;
       }
 
@@ -148,15 +148,16 @@
       // where case is insensitive
       if (!f.equals(f.getCanonicalFile())) {
         // set headers
-        //hdrs.put("Location", f.getCanonicalFile().toURI());
+        // hdrs.put("Location", f.getCanonicalFile().toURI());
         headers.set(Response.LOCATION, f.getCanonicalFile().toURL().toString());
 
-        this.code = 300;  // http redirect
+        this.code = 300; // http redirect
         return;
       }
       if (f.lastModified() <= datum.getModifiedTime()) {
         this.code = 304;
-        this.headers.set("Last-Modified", HttpDateFormat.toString(f.lastModified()));
+        this.headers.set("Last-Modified",
+            HttpDateFormat.toString(f.lastModified()));
         return;
       }
 
Index: src/plugin/urlfilter-regex/src/test/org/apache/nutch/urlfilter/regex/TestRegexURLFilter.java
===================================================================
--- src/plugin/urlfilter-regex/src/test/org/apache/nutch/urlfilter/regex/TestRegexURLFilter.java	(revision 1188252)
+++ src/plugin/urlfilter-regex/src/test/org/apache/nutch/urlfilter/regex/TestRegexURLFilter.java	(working copy)
@@ -29,22 +29,21 @@
 // Nutch imports
 import org.apache.nutch.urlfilter.api.RegexURLFilterBaseTest;
 
-
 /**
  * JUnit based test of class <code>RegexURLFilter</code>.
- *
+ * 
  * @author J&eacute;r&ocirc;me Charron
  */
 public class TestRegexURLFilter extends RegexURLFilterBaseTest {
-  
+
   public TestRegexURLFilter(String testName) {
     super(testName);
   }
-  
+
   public static Test suite() {
     return new TestSuite(TestRegexURLFilter.class);
   }
-  
+
   public static void main(String[] args) {
     TestRunner.run(suite());
   }
@@ -57,7 +56,7 @@
       return null;
     }
   }
-  
+
   public void test() {
     test("WholeWebCrawling");
     test("IntranetCrawling");
Index: src/plugin/urlfilter-regex/src/java/org/apache/nutch/urlfilter/regex/RegexURLFilter.java
===================================================================
--- src/plugin/urlfilter-regex/src/java/org/apache/nutch/urlfilter/regex/RegexURLFilter.java	(revision 1188252)
+++ src/plugin/urlfilter-regex/src/java/org/apache/nutch/urlfilter/regex/RegexURLFilter.java	(working copy)
@@ -30,13 +30,12 @@
 import org.apache.nutch.urlfilter.api.RegexURLFilterBase;
 import org.apache.nutch.util.NutchConfiguration;
 
-
 /**
  * Filters URLs based on a file of regular expressions using the
  * {@link java.util.regex Java Regex implementation}.
  */
 public class RegexURLFilter extends RegexURLFilterBase {
-  
+
   public static final String URLFILTER_REGEX_FILE = "urlfilter.regex.file";
   public static final String URLFILTER_REGEX_RULES = "urlfilter.regex.rules";
 
@@ -44,24 +43,23 @@
     super();
   }
 
-  public RegexURLFilter(String filename)
-    throws IOException, PatternSyntaxException {
+  public RegexURLFilter(String filename) throws IOException,
+      PatternSyntaxException {
     super(filename);
   }
 
-  RegexURLFilter(Reader reader)
-    throws IOException, IllegalArgumentException {
+  RegexURLFilter(Reader reader) throws IOException, IllegalArgumentException {
     super(reader);
   }
 
-  
-  /* ----------------------------------- *
-   * <implementation:RegexURLFilterBase> *
-   * ----------------------------------- */
-  
+  /*
+   * ----------------------------------- * <implementation:RegexURLFilterBase> *
+   * -----------------------------------
+   */
+
   /**
-   * Rules specified as a config property will override rules specified
-   * as a config file.
+   * Rules specified as a config property will override rules specified as a
+   * config file.
    */
   protected Reader getRulesReader(Configuration conf) throws IOException {
     String stringRules = conf.get(URLFILTER_REGEX_RULES);
@@ -76,23 +74,22 @@
   protected RegexRule createRule(boolean sign, String regex) {
     return new Rule(sign, regex);
   }
-  
-  /* ------------------------------------ *
-   * </implementation:RegexURLFilterBase> *
-   * ------------------------------------ */
 
-  
+  /*
+   * ------------------------------------ * </implementation:RegexURLFilterBase>
+   * * ------------------------------------
+   */
+
   public static void main(String args[]) throws IOException {
     RegexURLFilter filter = new RegexURLFilter();
     filter.setConf(NutchConfiguration.create());
     main(filter, args);
   }
 
-
   private class Rule extends RegexRule {
-    
+
     private Pattern pattern;
-    
+
     Rule(boolean sign, String regex) {
       super(sign, regex);
       pattern = Pattern.compile(regex);
@@ -102,5 +99,5 @@
       return pattern.matcher(url).find();
     }
   }
-  
+
 }
Index: src/plugin/lib-http/src/test/org/apache/nutch/protocol/http/api/TestRobotRulesParser.java
===================================================================
--- src/plugin/lib-http/src/test/org/apache/nutch/protocol/http/api/TestRobotRulesParser.java	(revision 1188252)
+++ src/plugin/lib-http/src/test/org/apache/nutch/protocol/http/api/TestRobotRulesParser.java	(working copy)
@@ -22,287 +22,231 @@
 import junit.framework.TestCase;
 
 public class TestRobotRulesParser extends TestCase {
-  private static final String LF= "\n";
-  private static final String CR= "\r";
-  private static final String CRLF= "\r\n";
-  
-  private static final boolean[] ACCEPT_ALL = {
-    true,   // "/a",	      
-    true,   // "/a/",	      
-    true,   // "/a/bloh/foo.html"
-    true,   // "/b",	      
-    true,   // "/b/a",	      
-    true,   // "/b/a/index.html",
-    true,   // "/b/b/foo.html",  
-    true,   // "/c",	      
-    true,   // "/c/a",	      
-    true,   // "/c/a/index.html",
-    true,   // "/c/b/foo.html",  
-    true,   // "/d",	      
-    true,   // "/d/a",	      
-    true,   // "/e/a/index.html",
-    true,   // "/e/d",	      
-    true,   // "/e/d/foo.html",  
-    true,   // "/e/doh.html",    
-    true,   // "/f/index.html",  
-    true,   // "/foo/bar.html",  
-    true,   // "/f/",
-  };
-  
-  private static final String[] ROBOTS_STRINGS= new String[] {
-    "User-Agent: Agent1 #foo" + CR 
-    + "Disallow: /a" + CR 
-    + "Disallow: /b/a" + CR 
-    + "#Disallow: /c" + CR 
-    + "" + CR 
-    + "" + CR 
-    + "User-Agent: Agent2 Agent3#foo" + CR 
-    + "User-Agent: Agent4" + CR 
-    + "Disallow: /d" + CR 
-    + "Disallow: /e/d/" + CR
-    + "" + CR 
-    + "User-Agent: *" + CR 
-    + "Disallow: /foo/bar/" + CR,
-    null  // Used to test EMPTY_RULES
-  };
+  private static final String LF = "\n";
+  private static final String CR = "\r";
+  private static final String CRLF = "\r\n";
 
-  private static final String[] AGENT_STRINGS= new String[] {
-    "Agent1",
-    "Agent2",
-    "Agent3",
-    "Agent4",
-    "Agent5",
+  private static final boolean[] ACCEPT_ALL = { true, // "/a",
+      true, // "/a/",
+      true, // "/a/bloh/foo.html"
+      true, // "/b",
+      true, // "/b/a",
+      true, // "/b/a/index.html",
+      true, // "/b/b/foo.html",
+      true, // "/c",
+      true, // "/c/a",
+      true, // "/c/a/index.html",
+      true, // "/c/b/foo.html",
+      true, // "/d",
+      true, // "/d/a",
+      true, // "/e/a/index.html",
+      true, // "/e/d",
+      true, // "/e/d/foo.html",
+      true, // "/e/doh.html",
+      true, // "/f/index.html",
+      true, // "/foo/bar.html",
+      true, // "/f/",
   };
 
-  private static final boolean[][] NOT_IN_ROBOTS_STRING= new boolean[][] {
-    { 
-      false, 
-      false,
-      false,
-      false,
-      true,
-    },
-    { 
-      false, 
-      false,
-      false,
-      false,
-      true,
-    }    
+  private static final String[] ROBOTS_STRINGS = new String[] {
+      "User-Agent: Agent1 #foo" + CR + "Disallow: /a" + CR + "Disallow: /b/a"
+          + CR + "#Disallow: /c" + CR + "" + CR + "" + CR
+          + "User-Agent: Agent2 Agent3#foo" + CR + "User-Agent: Agent4" + CR
+          + "Disallow: /d" + CR + "Disallow: /e/d/" + CR + "" + CR
+          + "User-Agent: *" + CR + "Disallow: /foo/bar/" + CR, null // Used to
+                                                                    // test
+  // EMPTY_RULES
   };
 
-  private static final String[] TEST_PATHS= new String[] {
-    "/a",
-    "/a/",
-    "/a/bloh/foo.html",
-    "/b",
-    "/b/a",
-    "/b/a/index.html",
-    "/b/b/foo.html",
-    "/c",
-    "/c/a",
-    "/c/a/index.html",
-    "/c/b/foo.html",
-    "/d",
-    "/d/a",
-    "/e/a/index.html",
-    "/e/d",
-    "/e/d/foo.html",
-    "/e/doh.html",
-    "/f/index.html",
-    "/foo/bar/baz.html",  
-    "/f/",
-  };
+  private static final String[] AGENT_STRINGS = new String[] { "Agent1",
+      "Agent2", "Agent3", "Agent4", "Agent5", };
 
-  private static final boolean[][][] ALLOWED= new boolean[][][] {
-    { // ROBOTS_STRINGS[0]
+  private static final boolean[][] NOT_IN_ROBOTS_STRING = new boolean[][] {
+      { false, false, false, false, true, },
+      { false, false, false, false, true, } };
+
+  private static final String[] TEST_PATHS = new String[] { "/a", "/a/",
+      "/a/bloh/foo.html", "/b", "/b/a", "/b/a/index.html", "/b/b/foo.html",
+      "/c", "/c/a", "/c/a/index.html", "/c/b/foo.html", "/d", "/d/a",
+      "/e/a/index.html", "/e/d", "/e/d/foo.html", "/e/doh.html",
+      "/f/index.html", "/foo/bar/baz.html", "/f/", };
+
+  private static final boolean[][][] ALLOWED = new boolean[][][] { { // ROBOTS_STRINGS[0]
       { // Agent1
-	false,  // "/a",	      
-	false,  // "/a/",	      
-	false,  // "/a/bloh/foo.html"
-	true,   // "/b",	      
-	false,  // "/b/a",	      
-	false,  // "/b/a/index.html",
-	true,   // "/b/b/foo.html",  
-	true,   // "/c",	      
-	true,   // "/c/a",	      
-	true,   // "/c/a/index.html",
-	true,   // "/c/b/foo.html",  
-	true,   // "/d",	      
-	true,   // "/d/a",	      
-	true,   // "/e/a/index.html",
-	true,   // "/e/d",	      
-	true,   // "/e/d/foo.html",  
-	true,   // "/e/doh.html",    
-	true,   // "/f/index.html",  
-	true,   // "/foo/bar.html",  
-	true,   // "/f/",  
-      }, 
-      { // Agent2
-	true,   // "/a",	      
-	true,   // "/a/",	      
-	true,   // "/a/bloh/foo.html"
-	true,   // "/b",	      
-	true,   // "/b/a",	      
-	true,   // "/b/a/index.html",
-	true,   // "/b/b/foo.html",  
-	true,   // "/c",	      
-	true,   // "/c/a",	      
-	true,   // "/c/a/index.html",
-	true,   // "/c/b/foo.html",  
-	false,  // "/d",	      
-	false,  // "/d/a",	      
-	true,   // "/e/a/index.html",
-	true,   // "/e/d",	      
-	false,  // "/e/d/foo.html",  
-	true,   // "/e/doh.html",    
-	true,   // "/f/index.html",  
-	true,   // "/foo/bar.html",  
-	true,   // "/f/",  
-      },
-      { // Agent3
-	true,   // "/a",	      
-	true,   // "/a/",	      
-	true,   // "/a/bloh/foo.html"
-	true,   // "/b",	      
-	true,   // "/b/a",	      
-	true,   // "/b/a/index.html",
-	true,   // "/b/b/foo.html",  
-	true,   // "/c",	      
-	true,   // "/c/a",	      
-	true,   // "/c/a/index.html",
-	true,   // "/c/b/foo.html",  
-	false,  // "/d",	      
-	false,  // "/d/a",	      
-	true,   // "/e/a/index.html",
-	true,   // "/e/d",	      
-	false,  // "/e/d/foo.html",  
-	true,   // "/e/doh.html",    
-	true,   // "/f/index.html",  
-	true,   // "/foo/bar.html",  
-	true,   // "/f/",  
-      },
-      { // Agent4
-	true,   // "/a",	      
-	true,   // "/a/",	      
-	true,   // "/a/bloh/foo.html"
-	true,   // "/b",	      
-	true,   // "/b/a",	      
-	true,   // "/b/a/index.html",
-	true,   // "/b/b/foo.html",  
-	true,   // "/c",	      
-	true,   // "/c/a",	      
-	true,   // "/c/a/index.html",
-	true,   // "/c/b/foo.html",  
-	false,  // "/d",	      
-	false,  // "/d/a",	      
-	true,   // "/e/a/index.html",
-	true,   // "/e/d",	      
-	false,  // "/e/d/foo.html",  
-	true,   // "/e/doh.html",    
-	true,   // "/f/index.html",  
-	true,   // "/foo/bar.html",  
-	true,   // "/f/",  
-      },
-      { // Agent5/"*"
-	true,   // "/a",	      
-	true,   // "/a/",	      
-	true,   // "/a/bloh/foo.html"
-	true,   // "/b",	      
-	true,   // "/b/a",	      
-	true,   // "/b/a/index.html",
-	true,   // "/b/b/foo.html",  
-	true,   // "/c",	      
-	true,   // "/c/a",	      
-	true,   // "/c/a/index.html",
-	true,   // "/c/b/foo.html",  
-	true,   // "/d",	      
-	true,   // "/d/a",	      
-	true,   // "/e/a/index.html",
-	true,   // "/e/d",	      
-	true,   // "/e/d/foo.html",  
-	true,   // "/e/doh.html",    
-	true,   // "/f/index.html",  
-	false,  // "/foo/bar.html",  
-	true,   // "/f/",  
-      }
-    },
-    { // ROBOTS_STRINGS[1]
+          false, // "/a",
+              false, // "/a/",
+              false, // "/a/bloh/foo.html"
+              true, // "/b",
+              false, // "/b/a",
+              false, // "/b/a/index.html",
+              true, // "/b/b/foo.html",
+              true, // "/c",
+              true, // "/c/a",
+              true, // "/c/a/index.html",
+              true, // "/c/b/foo.html",
+              true, // "/d",
+              true, // "/d/a",
+              true, // "/e/a/index.html",
+              true, // "/e/d",
+              true, // "/e/d/foo.html",
+              true, // "/e/doh.html",
+              true, // "/f/index.html",
+              true, // "/foo/bar.html",
+              true, // "/f/",
+          }, { // Agent2
+          true, // "/a",
+              true, // "/a/",
+              true, // "/a/bloh/foo.html"
+              true, // "/b",
+              true, // "/b/a",
+              true, // "/b/a/index.html",
+              true, // "/b/b/foo.html",
+              true, // "/c",
+              true, // "/c/a",
+              true, // "/c/a/index.html",
+              true, // "/c/b/foo.html",
+              false, // "/d",
+              false, // "/d/a",
+              true, // "/e/a/index.html",
+              true, // "/e/d",
+              false, // "/e/d/foo.html",
+              true, // "/e/doh.html",
+              true, // "/f/index.html",
+              true, // "/foo/bar.html",
+              true, // "/f/",
+          }, { // Agent3
+          true, // "/a",
+              true, // "/a/",
+              true, // "/a/bloh/foo.html"
+              true, // "/b",
+              true, // "/b/a",
+              true, // "/b/a/index.html",
+              true, // "/b/b/foo.html",
+              true, // "/c",
+              true, // "/c/a",
+              true, // "/c/a/index.html",
+              true, // "/c/b/foo.html",
+              false, // "/d",
+              false, // "/d/a",
+              true, // "/e/a/index.html",
+              true, // "/e/d",
+              false, // "/e/d/foo.html",
+              true, // "/e/doh.html",
+              true, // "/f/index.html",
+              true, // "/foo/bar.html",
+              true, // "/f/",
+          }, { // Agent4
+          true, // "/a",
+              true, // "/a/",
+              true, // "/a/bloh/foo.html"
+              true, // "/b",
+              true, // "/b/a",
+              true, // "/b/a/index.html",
+              true, // "/b/b/foo.html",
+              true, // "/c",
+              true, // "/c/a",
+              true, // "/c/a/index.html",
+              true, // "/c/b/foo.html",
+              false, // "/d",
+              false, // "/d/a",
+              true, // "/e/a/index.html",
+              true, // "/e/d",
+              false, // "/e/d/foo.html",
+              true, // "/e/doh.html",
+              true, // "/f/index.html",
+              true, // "/foo/bar.html",
+              true, // "/f/",
+          }, { // Agent5/"*"
+          true, // "/a",
+              true, // "/a/",
+              true, // "/a/bloh/foo.html"
+              true, // "/b",
+              true, // "/b/a",
+              true, // "/b/a/index.html",
+              true, // "/b/b/foo.html",
+              true, // "/c",
+              true, // "/c/a",
+              true, // "/c/a/index.html",
+              true, // "/c/b/foo.html",
+              true, // "/d",
+              true, // "/d/a",
+              true, // "/e/a/index.html",
+              true, // "/e/d",
+              true, // "/e/d/foo.html",
+              true, // "/e/doh.html",
+              true, // "/f/index.html",
+              false, // "/foo/bar.html",
+              true, // "/f/",
+          } }, { // ROBOTS_STRINGS[1]
       ACCEPT_ALL, // Agent 1
-      ACCEPT_ALL, // Agent 2
-      ACCEPT_ALL, // Agent 3
-      ACCEPT_ALL, // Agent 4
-      ACCEPT_ALL, // Agent 5
-    }
-  };
- 
+          ACCEPT_ALL, // Agent 2
+          ACCEPT_ALL, // Agent 3
+          ACCEPT_ALL, // Agent 4
+          ACCEPT_ALL, // Agent 5
+      } };
+
   public TestRobotRulesParser(String name) {
     super(name);
   }
 
   public void testRobotsOneAgent() {
-    for (int i= 0; i < ROBOTS_STRINGS.length; i++) {
-      for (int j= 0; j < AGENT_STRINGS.length; j++) {
-	testRobots(i, new String[] { AGENT_STRINGS[j] },
-		   TEST_PATHS, ALLOWED[i][j]);
+    for (int i = 0; i < ROBOTS_STRINGS.length; i++) {
+      for (int j = 0; j < AGENT_STRINGS.length; j++) {
+        testRobots(i, new String[] { AGENT_STRINGS[j] }, TEST_PATHS,
+            ALLOWED[i][j]);
       }
     }
   }
 
   public void testRobotsTwoAgents() {
-    for (int i= 0; i < ROBOTS_STRINGS.length; i++) {
-      for (int j= 0; j < AGENT_STRINGS.length; j++) {
-	for (int k= 0; k < AGENT_STRINGS.length; k++) {
-	  int key= j;
-	  if (NOT_IN_ROBOTS_STRING[i][j])
-	    key= k;
-	  testRobots(i, new String[] { AGENT_STRINGS[j], AGENT_STRINGS[k] },
-		     TEST_PATHS, ALLOWED[i][key]);
-	}
+    for (int i = 0; i < ROBOTS_STRINGS.length; i++) {
+      for (int j = 0; j < AGENT_STRINGS.length; j++) {
+        for (int k = 0; k < AGENT_STRINGS.length; k++) {
+          int key = j;
+          if (NOT_IN_ROBOTS_STRING[i][j])
+            key = k;
+          testRobots(i, new String[] { AGENT_STRINGS[j], AGENT_STRINGS[k] },
+              TEST_PATHS, ALLOWED[i][key]);
+        }
       }
     }
   }
-  
+
   public void testCrawlDelay() {
     RobotRulesParser p = new RobotRulesParser(new String[] { "nutchbot" });
-    String delayRule1 = "User-agent: nutchbot" + CR +
-                        "Crawl-delay: 10" + CR +
-                        "User-agent: foobot" + CR +
-                        "Crawl-delay: 20" + CR +
-                        "User-agent: *" + CR + 
-                        "Disallow:/baz" + CR;
-    String delayRule2 = "User-agent: foobot" + CR +
-                        "Crawl-delay: 20" + CR +
-                        "User-agent: *" + CR + 
-                        "Disallow:/baz" + CR;
+    String delayRule1 = "User-agent: nutchbot" + CR + "Crawl-delay: 10" + CR
+        + "User-agent: foobot" + CR + "Crawl-delay: 20" + CR + "User-agent: *"
+        + CR + "Disallow:/baz" + CR;
+    String delayRule2 = "User-agent: foobot" + CR + "Crawl-delay: 20" + CR
+        + "User-agent: *" + CR + "Disallow:/baz" + CR;
     RobotRuleSet rules = p.parseRules(delayRule1.getBytes());
     long crawlDelay = rules.getCrawlDelay();
-    assertTrue("testing crawl delay for agent nutchbot - rule 1", (crawlDelay == 10000));
+    assertTrue("testing crawl delay for agent nutchbot - rule 1",
+        (crawlDelay == 10000));
     rules = p.parseRules(delayRule2.getBytes());
     crawlDelay = rules.getCrawlDelay();
-    assertTrue("testing crawl delay for agent nutchbot - rule 2", (crawlDelay == -1));
+    assertTrue("testing crawl delay for agent nutchbot - rule 2",
+        (crawlDelay == -1));
   }
 
   // helper
 
-  public void testRobots(int robotsString, String[] agents, String[] paths, 
-			 boolean[] allowed) {
-    String agentsString= agents[0];
-    for (int i= 1; i < agents.length; i++)
-      agentsString= agentsString + "," + agents[i];
-    RobotRulesParser p= new RobotRulesParser(agents);
-    RobotRuleSet rules= p.parseRules(ROBOTS_STRINGS[robotsString] != null
-                                     ? ROBOTS_STRINGS[robotsString].getBytes()
-                                     : null);
-    for (int i= 0; i < paths.length; i++) {
-      assertTrue("testing robots file "+robotsString+", on agents ("
-		 + agentsString + "), and path " + TEST_PATHS[i] + "; got " 
-		 + rules.isAllowed(TEST_PATHS[i]) + ", rules are: " + LF
-				   + rules,
-		 rules.isAllowed(TEST_PATHS[i]) == allowed[i]);
+  public void testRobots(int robotsString, String[] agents, String[] paths,
+      boolean[] allowed) {
+    String agentsString = agents[0];
+    for (int i = 1; i < agents.length; i++)
+      agentsString = agentsString + "," + agents[i];
+    RobotRulesParser p = new RobotRulesParser(agents);
+    RobotRuleSet rules = p
+        .parseRules(ROBOTS_STRINGS[robotsString] != null ? ROBOTS_STRINGS[robotsString]
+            .getBytes() : null);
+    for (int i = 0; i < paths.length; i++) {
+      assertTrue(
+          "testing robots file " + robotsString + ", on agents ("
+              + agentsString + "), and path " + TEST_PATHS[i] + "; got "
+              + rules.isAllowed(TEST_PATHS[i]) + ", rules are: " + LF + rules,
+          rules.isAllowed(TEST_PATHS[i]) == allowed[i]);
     }
   }
 
-
-  
 }
Index: src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/BlockedException.java
===================================================================
--- src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/BlockedException.java	(revision 1188252)
+++ src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/BlockedException.java	(working copy)
@@ -18,7 +18,7 @@
 package org.apache.nutch.protocol.http.api;
 
 public class BlockedException extends HttpException {
-  
+
   public BlockedException(String msg) {
     super(msg);
   }
Index: src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java
===================================================================
--- src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java	(revision 1188252)
+++ src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java	(working copy)
@@ -45,19 +45,18 @@
  * @author J&eacute;r&ocirc;me Charron
  */
 public abstract class HttpBase implements Protocol {
-  
-  
+
   public static final int BUFFER_SIZE = 8 * 1024;
-  
+
   private static final byte[] EMPTY_CONTENT = new byte[0];
 
   private RobotRulesParser robots = null;
- 
-  /** The proxy hostname. */ 
+
+  /** The proxy hostname. */
   protected String proxyHost = null;
 
   /** The proxy port. */
-  protected int proxyPort = 8080; 
+  protected int proxyPort = 8080;
 
   /** Indicates if a proxy is used */
   protected boolean useProxy = false;
@@ -66,29 +65,28 @@
   protected int timeout = 10000;
 
   /** The length limit for downloaded content, in bytes. */
-  protected int maxContent = 64 * 1024; 
+  protected int maxContent = 64 * 1024;
 
   /** The Nutch 'User-Agent' request header */
-  protected String userAgent = getAgentString(
-                        "NutchCVS", null, "Nutch",
-                        "http://lucene.apache.org/nutch/bot.html",
-                        "nutch-agent@lucene.apache.org");
+  protected String userAgent = getAgentString("NutchCVS", null, "Nutch",
+      "http://lucene.apache.org/nutch/bot.html",
+      "nutch-agent@lucene.apache.org");
 
   /** The "Accept-Language" request header value. */
   protected String acceptLanguage = "en-us,en-gb,en;q=0.7,*;q=0.3";
-    
+
   /** The default logger */
   private final static Logger LOGGER = LoggerFactory.getLogger(HttpBase.class);
 
   /** The specified logger */
   private Logger logger = LOGGER;
- 
+
   /** The nutch configuration */
   private Configuration conf = null;
-  
+
   /** Do we use HTTP/1.1? */
   protected boolean useHttp11 = false;
-  
+
   /** Skip page if Crawl-Delay longer than this value. */
   protected long maxCrawlDelay = -1L;
 
@@ -96,7 +94,7 @@
   public HttpBase() {
     this(null);
   }
-  
+
   /** Creates a new instance of HttpBase */
   public HttpBase(Logger logger) {
     if (logger != null) {
@@ -104,105 +102,114 @@
     }
     robots = new RobotRulesParser();
   }
-  
-   // Inherited Javadoc
-    public void setConf(Configuration conf) {
-        this.conf = conf;
-        this.proxyHost = conf.get("http.proxy.host");
-        this.proxyPort = conf.getInt("http.proxy.port", 8080);
-        this.useProxy = (proxyHost != null && proxyHost.length() > 0);
-        this.timeout = conf.getInt("http.timeout", 10000);
-        this.maxContent = conf.getInt("http.content.limit", 64 * 1024);
-        this.userAgent = getAgentString(conf.get("http.agent.name"), conf.get("http.agent.version"), conf
-                .get("http.agent.description"), conf.get("http.agent.url"), conf.get("http.agent.email"));
-        this.acceptLanguage = conf.get("http.accept.language", acceptLanguage);
-        // backward-compatible default setting
-        this.useHttp11 = conf.getBoolean("http.useHttp11", false);
-        this.robots.setConf(conf);
-        logConf();
-    }
 
   // Inherited Javadoc
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+    this.proxyHost = conf.get("http.proxy.host");
+    this.proxyPort = conf.getInt("http.proxy.port", 8080);
+    this.useProxy = (proxyHost != null && proxyHost.length() > 0);
+    this.timeout = conf.getInt("http.timeout", 10000);
+    this.maxContent = conf.getInt("http.content.limit", 64 * 1024);
+    this.userAgent = getAgentString(conf.get("http.agent.name"),
+        conf.get("http.agent.version"), conf.get("http.agent.description"),
+        conf.get("http.agent.url"), conf.get("http.agent.email"));
+    this.acceptLanguage = conf.get("http.accept.language", acceptLanguage);
+    // backward-compatible default setting
+    this.useHttp11 = conf.getBoolean("http.useHttp11", false);
+    this.robots.setConf(conf);
+    logConf();
+  }
+
+  // Inherited Javadoc
   public Configuration getConf() {
     return this.conf;
   }
-   
-  
-  
+
   public ProtocolOutput getProtocolOutput(Text url, CrawlDatum datum) {
-    
+
     String urlString = url.toString();
     try {
       URL u = new URL(urlString);
       String host = null;
       Response response = getResponse(u, datum, false); // make a request
-      
+
       int code = response.getCode();
       byte[] content = response.getContent();
       Content c = new Content(u.toString(), u.toString(),
-                              (content == null ? EMPTY_CONTENT : content),
-                              response.getHeader("Content-Type"),
-                              response.getHeaders(), this.conf);
-      
+          (content == null ? EMPTY_CONTENT : content),
+          response.getHeader("Content-Type"), response.getHeaders(), this.conf);
+
       if (code == 200) { // got a good response
         return new ProtocolOutput(c); // return it
-        
+
       } else if (code == 410) { // page is gone
-        return new ProtocolOutput(c, new ProtocolStatus(ProtocolStatus.GONE, "Http: " + code + " url=" + url));
-        
+        return new ProtocolOutput(c, new ProtocolStatus(ProtocolStatus.GONE,
+            "Http: " + code + " url=" + url));
+
       } else if (code >= 300 && code < 400) { // handle redirect
         String location = response.getHeader("Location");
-        // some broken servers, such as MS IIS, use lowercase header name...
-        if (location == null) location = response.getHeader("location");
-        if (location == null) location = "";
+        // some broken servers, such as MS IIS, use lowercase header
+        // name...
+        if (location == null)
+          location = response.getHeader("location");
+        if (location == null)
+          location = "";
         u = new URL(u, location);
         int protocolStatusCode;
         switch (code) {
-          case 300:   // multiple choices, preferred value in Location
-            protocolStatusCode = ProtocolStatus.MOVED;
-            break;
-          case 301:   // moved permanently
-          case 305:   // use proxy (Location is URL of proxy)
-            protocolStatusCode = ProtocolStatus.MOVED;
-            break;
-          case 302:   // found (temporarily moved)
-          case 303:   // see other (redirect after POST)
-          case 307:   // temporary redirect
-            protocolStatusCode = ProtocolStatus.TEMP_MOVED;
-            break;
-          case 304:   // not modified
-            protocolStatusCode = ProtocolStatus.NOTMODIFIED;
-            break;
-          default:
-            protocolStatusCode = ProtocolStatus.MOVED;
+        case 300: // multiple choices, preferred value in Location
+          protocolStatusCode = ProtocolStatus.MOVED;
+          break;
+        case 301: // moved permanently
+        case 305: // use proxy (Location is URL of proxy)
+          protocolStatusCode = ProtocolStatus.MOVED;
+          break;
+        case 302: // found (temporarily moved)
+        case 303: // see other (redirect after POST)
+        case 307: // temporary redirect
+          protocolStatusCode = ProtocolStatus.TEMP_MOVED;
+          break;
+        case 304: // not modified
+          protocolStatusCode = ProtocolStatus.NOTMODIFIED;
+          break;
+        default:
+          protocolStatusCode = ProtocolStatus.MOVED;
         }
         // handle this in the higher layer.
         return new ProtocolOutput(c, new ProtocolStatus(protocolStatusCode, u));
       } else if (code == 400) { // bad request, mark as GONE
-        if (logger.isTraceEnabled()) { logger.trace("400 Bad request: " + u); }
+        if (logger.isTraceEnabled()) {
+          logger.trace("400 Bad request: " + u);
+        }
         return new ProtocolOutput(c, new ProtocolStatus(ProtocolStatus.GONE, u));
-      } else if (code == 401) { // requires authorization, but no valid auth provided.
-        if (logger.isTraceEnabled()) { logger.trace("401 Authentication Required"); }
-        return new ProtocolOutput(c, new ProtocolStatus(ProtocolStatus.ACCESS_DENIED, "Authentication required: "
+      } else if (code == 401) { // requires authorization, but no valid
+        // auth provided.
+        if (logger.isTraceEnabled()) {
+          logger.trace("401 Authentication Required");
+        }
+        return new ProtocolOutput(c, new ProtocolStatus(
+            ProtocolStatus.ACCESS_DENIED, "Authentication required: "
                 + urlString));
       } else if (code == 404) {
-        return new ProtocolOutput(c, new ProtocolStatus(ProtocolStatus.NOTFOUND, u));
+        return new ProtocolOutput(c, new ProtocolStatus(
+            ProtocolStatus.NOTFOUND, u));
       } else if (code == 410) { // permanently GONE
         return new ProtocolOutput(c, new ProtocolStatus(ProtocolStatus.GONE, u));
       } else {
-        return new ProtocolOutput(c, new ProtocolStatus(ProtocolStatus.EXCEPTION, "Http code=" + code + ", url="
-                + u));
+        return new ProtocolOutput(c, new ProtocolStatus(
+            ProtocolStatus.EXCEPTION, "Http code=" + code + ", url=" + u));
       }
     } catch (Throwable e) {
       e.printStackTrace(LogUtil.getErrorStream(logger));
       return new ProtocolOutput(null, new ProtocolStatus(e));
     }
   }
-  
-  /* -------------------------- *
-   * </implementation:Protocol> *
-   * -------------------------- */
 
+  /*
+   * -------------------------- * </implementation:Protocol> *
+   * --------------------------
+   */
 
   public String getProxyHost() {
     return proxyHost;
@@ -227,58 +234,57 @@
   public String getUserAgent() {
     return userAgent;
   }
-  
-  /** Value of "Accept-Language" request header sent by Nutch.
+
+  /**
+   * Value of "Accept-Language" request header sent by Nutch.
+   * 
    * @return The value of the header "Accept-Language" header.
    */
   public String getAcceptLanguage() {
-         return acceptLanguage;
+    return acceptLanguage;
   }
 
   public boolean getUseHttp11() {
     return useHttp11;
   }
-  
-  private static String getAgentString(String agentName,
-                                       String agentVersion,
-                                       String agentDesc,
-                                       String agentURL,
-                                       String agentEmail) {
-    
-    if ( (agentName == null) || (agentName.trim().length() == 0) ) {
+
+  private static String getAgentString(String agentName, String agentVersion,
+      String agentDesc, String agentURL, String agentEmail) {
+
+    if ((agentName == null) || (agentName.trim().length() == 0)) {
       // TODO : NUTCH-258
       if (LOGGER.isErrorEnabled()) {
         LOGGER.error("No User-Agent string set (http.agent.name)!");
       }
     }
-    
-    StringBuffer buf= new StringBuffer();
-    
+
+    StringBuffer buf = new StringBuffer();
+
     buf.append(agentName);
     if (agentVersion != null) {
       buf.append("/");
       buf.append(agentVersion);
     }
-    if ( ((agentDesc != null) && (agentDesc.length() != 0))
-    || ((agentEmail != null) && (agentEmail.length() != 0))
-    || ((agentURL != null) && (agentURL.length() != 0)) ) {
+    if (((agentDesc != null) && (agentDesc.length() != 0))
+        || ((agentEmail != null) && (agentEmail.length() != 0))
+        || ((agentURL != null) && (agentURL.length() != 0))) {
       buf.append(" (");
-      
+
       if ((agentDesc != null) && (agentDesc.length() != 0)) {
         buf.append(agentDesc);
-        if ( (agentURL != null) || (agentEmail != null) )
+        if ((agentURL != null) || (agentEmail != null))
           buf.append("; ");
       }
-      
+
       if ((agentURL != null) && (agentURL.length() != 0)) {
         buf.append(agentURL);
         if (agentEmail != null)
           buf.append("; ");
       }
-      
+
       if ((agentEmail != null) && (agentEmail.length() != 0))
         buf.append(agentEmail);
-      
+
       buf.append(")");
     }
     return buf.toString();
@@ -294,42 +300,49 @@
       logger.info("http.accept.language = " + acceptLanguage);
     }
   }
-  
-  public byte[] processGzipEncoded(byte[] compressed, URL url) throws IOException {
 
-    if (LOGGER.isTraceEnabled()) { LOGGER.trace("uncompressing...."); }
+  public byte[] processGzipEncoded(byte[] compressed, URL url)
+      throws IOException {
 
+    if (LOGGER.isTraceEnabled()) {
+      LOGGER.trace("uncompressing....");
+    }
+
     byte[] content;
     if (getMaxContent() >= 0) {
-        content = GZIPUtils.unzipBestEffort(compressed, getMaxContent());
+      content = GZIPUtils.unzipBestEffort(compressed, getMaxContent());
     } else {
-        content = GZIPUtils.unzipBestEffort(compressed);
-    } 
+      content = GZIPUtils.unzipBestEffort(compressed);
+    }
 
     if (content == null)
       throw new IOException("unzipBestEffort returned null");
 
     if (LOGGER.isTraceEnabled()) {
       LOGGER.trace("fetched " + compressed.length
-                 + " bytes of compressed content (expanded to "
-                 + content.length + " bytes) from " + url);
+          + " bytes of compressed content (expanded to " + content.length
+          + " bytes) from " + url);
     }
     return content;
   }
 
-  public byte[] processDeflateEncoded(byte[] compressed, URL url) throws IOException {
+  public byte[] processDeflateEncoded(byte[] compressed, URL url)
+      throws IOException {
 
-    if (LOGGER.isTraceEnabled()) { LOGGER.trace("inflating...."); }
+    if (LOGGER.isTraceEnabled()) {
+      LOGGER.trace("inflating....");
+    }
 
-    byte[] content = DeflateUtils.inflateBestEffort(compressed, getMaxContent());
+    byte[] content = DeflateUtils
+        .inflateBestEffort(compressed, getMaxContent());
 
     if (content == null)
       throw new IOException("inflateBestEffort returned null");
 
     if (LOGGER.isTraceEnabled()) {
       LOGGER.trace("fetched " + compressed.length
-                 + " bytes of compressed content (expanded to "
-                 + content.length + " bytes) from " + url);
+          + " bytes of compressed content (expanded to " + content.length
+          + " bytes) from " + url);
     }
     return content;
   }
@@ -337,14 +350,14 @@
   protected static void main(HttpBase http, String[] args) throws Exception {
     boolean verbose = false;
     String url = null;
-    
+
     String usage = "Usage: Http [-verbose] [-timeout N] url";
-    
+
     if (args.length == 0) {
       System.err.println(usage);
       System.exit(-1);
     }
-    
+
     for (int i = 0; i < args.length; i++) { // parse command line
       if (args[i].equals("-timeout")) { // found -timeout option
         http.timeout = Integer.parseInt(args[++i]) * 1000;
@@ -353,35 +366,34 @@
       } else if (i != args.length - 1) {
         System.err.println(usage);
         System.exit(-1);
-      } else // root is required parameter
+      } else
+        // root is required parameter
         url = args[i];
     }
-    
-//    if (verbose) {
-//      LOGGER.setLevel(Level.FINE);
-//    }
-    
-    ProtocolOutput out = http.getProtocolOutput(new Text(url), new CrawlDatum());
+
+    // if (verbose) {
+    // LOGGER.setLevel(Level.FINE);
+    // }
+
+    ProtocolOutput out = http
+        .getProtocolOutput(new Text(url), new CrawlDatum());
     Content content = out.getContent();
-    
+
     System.out.println("Status: " + out.getStatus());
     if (content != null) {
       System.out.println("Content Type: " + content.getContentType());
-      System.out.println("Content Length: " +
-                         content.getMetadata().get(Response.CONTENT_LENGTH));
+      System.out.println("Content Length: "
+          + content.getMetadata().get(Response.CONTENT_LENGTH));
       System.out.println("Content:");
       String text = new String(content.getContent());
       System.out.println(text);
     }
-    
+
   }
-  
-  
-  protected abstract Response getResponse(URL url,
-                                          CrawlDatum datum,
-                                          boolean followRedirects)
-    throws ProtocolException, IOException;
 
+  protected abstract Response getResponse(URL url, CrawlDatum datum,
+      boolean followRedirects) throws ProtocolException, IOException;
+
   public RobotRules getRobotRules(Text url, CrawlDatum datum) {
     return robots.getRobotRulesSet(this, url);
   }
Index: src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/RobotRulesParser.java
===================================================================
--- src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/RobotRulesParser.java	(revision 1188252)
+++ src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/RobotRulesParser.java	(working copy)
@@ -40,37 +40,37 @@
 import org.apache.nutch.protocol.ProtocolException;
 import org.apache.nutch.protocol.RobotRules;
 
-
 /**
- * This class handles the parsing of <code>robots.txt</code> files.
- * It emits RobotRules objects, which describe the download permissions
- * as described in RobotRulesParser.
- *
+ * This class handles the parsing of <code>robots.txt</code> files. It emits
+ * RobotRules objects, which describe the download permissions as described in
+ * RobotRulesParser.
+ * 
  * @author Tom Pierce
  * @author Mike Cafarella
  * @author Doug Cutting
  */
 public class RobotRulesParser implements Configurable {
-  
-  public static final Logger LOG = LoggerFactory.getLogger(RobotRulesParser.class);
 
+  public static final Logger LOG = LoggerFactory
+      .getLogger(RobotRulesParser.class);
+
   private boolean allowForbidden = false;
 
   private static final Hashtable CACHE = new Hashtable();
-  
-  private static final String CHARACTER_ENCODING= "UTF-8";
-  private static final int NO_PRECEDENCE= Integer.MAX_VALUE;
-    
-  private static final RobotRuleSet EMPTY_RULES= new RobotRuleSet();
 
+  private static final String CHARACTER_ENCODING = "UTF-8";
+  private static final int NO_PRECEDENCE = Integer.MAX_VALUE;
+
+  private static final RobotRuleSet EMPTY_RULES = new RobotRuleSet();
+
   private static RobotRuleSet FORBID_ALL_RULES = getForbidAllRules();
 
   private Configuration conf;
   private HashMap robotNames;
 
   /**
-   * This class holds the rules which were parsed from a robots.txt
-   * file, and can test paths against those rules.
+   * This class holds the rules which were parsed from a robots.txt file, and
+   * can test paths against those rules.
    */
   public static class RobotRuleSet implements RobotRules {
     ArrayList tmpEntries = new ArrayList();
@@ -85,8 +85,8 @@
       boolean allowed;
 
       RobotsEntry(String prefix, boolean allowed) {
-        this.prefix= prefix;
-        this.allowed= allowed;
+        this.prefix = prefix;
+        this.allowed = allowed;
       }
     }
 
@@ -94,12 +94,12 @@
      */
     private void addPrefix(String prefix, boolean allow) {
       if (tmpEntries == null) {
-        tmpEntries= new ArrayList();
+        tmpEntries = new ArrayList();
         if (entries != null) {
-          for (int i= 0; i < entries.length; i++) 
+          for (int i = 0; i < entries.length; i++)
             tmpEntries.add(entries[i]);
         }
-        entries= null;
+        entries = null;
       }
 
       tmpEntries.add(new RobotsEntry(prefix, allow));
@@ -109,8 +109,8 @@
      */
     private void clearPrefixes() {
       if (tmpEntries == null) {
-        tmpEntries= new ArrayList();
-        entries= null;
+        tmpEntries = new ArrayList();
+        entries = null;
       } else {
         tmpEntries.clear();
       }
@@ -136,49 +136,48 @@
     public long getCrawlDelay() {
       return crawlDelay;
     }
-    
+
     /**
      * Set Crawl-Delay, in milliseconds
      */
     public void setCrawlDelay(long crawlDelay) {
       this.crawlDelay = crawlDelay;
     }
-    
+
     /**
-     *  Returns <code>false</code> if the <code>robots.txt</code> file
-     *  prohibits us from accessing the given <code>url</code>, or
-     *  <code>true</code> otherwise.
+     * Returns <code>false</code> if the <code>robots.txt</code> file prohibits
+     * us from accessing the given <code>url</code>, or <code>true</code>
+     * otherwise.
      */
     public boolean isAllowed(URL url) {
-      String path = url.getPath();                  // check rules
+      String path = url.getPath(); // check rules
       if ((path == null) || "".equals(path)) {
-        path= "/";
+        path = "/";
       }
       return isAllowed(path);
     }
-    
-    /** 
-     *  Returns <code>false</code> if the <code>robots.txt</code> file
-     *  prohibits us from accessing the given <code>path</code>, or
-     *  <code>true</code> otherwise.
-     */ 
+
+    /**
+     * Returns <code>false</code> if the <code>robots.txt</code> file prohibits
+     * us from accessing the given <code>path</code>, or <code>true</code>
+     * otherwise.
+     */
     public boolean isAllowed(String path) {
       try {
-        path= URLDecoder.decode(path, CHARACTER_ENCODING);
+        path = URLDecoder.decode(path, CHARACTER_ENCODING);
       } catch (Exception e) {
-        // just ignore it- we can still try to match 
+        // just ignore it- we can still try to match
         // path prefixes
       }
-      
+
       if (entries == null) {
-        entries= new RobotsEntry[tmpEntries.size()];
-        entries= (RobotsEntry[]) 
-          tmpEntries.toArray(entries);
-        tmpEntries= null;
+        entries = new RobotsEntry[tmpEntries.size()];
+        entries = (RobotsEntry[]) tmpEntries.toArray(entries);
+        tmpEntries = null;
       }
 
-      int pos= 0;
-      int end= entries.length;
+      int pos = 0;
+      int end = entries.length;
       while (pos < end) {
         if (path.startsWith(entries[pos].prefix))
           return entries[pos].allowed;
@@ -191,31 +190,31 @@
     /**
      */
     public String toString() {
-      isAllowed("x");  // force String[] representation
-      StringBuffer buf= new StringBuffer();
-      for (int i= 0; i < entries.length; i++) 
+      isAllowed("x"); // force String[] representation
+      StringBuffer buf = new StringBuffer();
+      for (int i = 0; i < entries.length; i++)
         if (entries[i].allowed)
           buf.append("Allow: " + entries[i].prefix
-                     + System.getProperty("line.separator"));
-        else 
+              + System.getProperty("line.separator"));
+        else
           buf.append("Disallow: " + entries[i].prefix
-                     + System.getProperty("line.separator"));
+              + System.getProperty("line.separator"));
       return buf.toString();
     }
   }
 
+  RobotRulesParser() {
+  }
 
-  RobotRulesParser() { }
-
   public RobotRulesParser(Configuration conf) {
     setConf(conf);
   }
 
+  /*
+   * ---------------------------------- * <implementation:Configurable> *
+   * ----------------------------------
+   */
 
-  /* ---------------------------------- *
-   * <implementation:Configurable> *
-   * ---------------------------------- */
-
   public void setConf(Configuration conf) {
     this.conf = conf;
     allowForbidden = conf.getBoolean("http.robots.403.allow", false);
@@ -237,13 +236,14 @@
     return conf;
   }
 
-  /* ---------------------------------- *
-   * <implementation:Configurable> *
-   * ---------------------------------- */
+  /*
+   * ---------------------------------- * <implementation:Configurable> *
+   * ----------------------------------
+   */
 
   private void setRobotNames(String[] robotNames) {
-    this.robotNames= new HashMap();
-    for (int i= 0; i < robotNames.length; i++) {
+    this.robotNames = new HashMap();
+    for (int i = 0; i < robotNames.length; i++) {
       this.robotNames.put(robotNames[i].toLowerCase(), new Integer(i));
     }
     // always make sure "*" is included
@@ -252,91 +252,90 @@
   }
 
   /**
-   *  Creates a new <code>RobotRulesParser</code> which will use the
-   *  supplied <code>robotNames</code> when choosing which stanza to
-   *  follow in <code>robots.txt</code> files.  Any name in the array
-   *  may be matched.  The order of the <code>robotNames</code>
-   *  determines the precedence- if many names are matched, only the
-   *  rules associated with the robot name having the smallest index
-   *  will be used.
+   * Creates a new <code>RobotRulesParser</code> which will use the supplied
+   * <code>robotNames</code> when choosing which stanza to follow in
+   * <code>robots.txt</code> files. Any name in the array may be matched. The
+   * order of the <code>robotNames</code> determines the precedence- if many
+   * names are matched, only the rules associated with the robot name having the
+   * smallest index will be used.
    */
   RobotRulesParser(String[] robotNames) {
-    setRobotNames(robotNames); 
+    setRobotNames(robotNames);
   }
 
   /**
-   * Returns a {@link RobotRuleSet} object which encapsulates the
-   * rules parsed from the supplied <code>robotContent</code>.
+   * Returns a {@link RobotRuleSet} object which encapsulates the rules parsed
+   * from the supplied <code>robotContent</code>.
    */
   RobotRuleSet parseRules(byte[] robotContent) {
-    if (robotContent == null) 
+    if (robotContent == null)
       return EMPTY_RULES;
 
-    String content= new String (robotContent);
+    String content = new String(robotContent);
 
-    StringTokenizer lineParser= new StringTokenizer(content, "\n\r");
+    StringTokenizer lineParser = new StringTokenizer(content, "\n\r");
 
-    RobotRuleSet bestRulesSoFar= null;
-    int bestPrecedenceSoFar= NO_PRECEDENCE;
+    RobotRuleSet bestRulesSoFar = null;
+    int bestPrecedenceSoFar = NO_PRECEDENCE;
 
-    RobotRuleSet currentRules= new RobotRuleSet();
-    int currentPrecedence= NO_PRECEDENCE;
+    RobotRuleSet currentRules = new RobotRuleSet();
+    int currentPrecedence = NO_PRECEDENCE;
 
-    boolean addRules= false;    // in stanza for our robot
-    boolean doneAgents= false;  // detect multiple agent lines
+    boolean addRules = false; // in stanza for our robot
+    boolean doneAgents = false; // detect multiple agent lines
 
     while (lineParser.hasMoreTokens()) {
-      String line= lineParser.nextToken();
+      String line = lineParser.nextToken();
 
       // trim out comments and whitespace
-      int hashPos= line.indexOf("#");
-      if (hashPos >= 0) 
-        line= line.substring(0, hashPos);
-      line= line.trim();
+      int hashPos = line.indexOf("#");
+      if (hashPos >= 0)
+        line = line.substring(0, hashPos);
+      line = line.trim();
 
-      if ( (line.length() >= 11) 
-           && (line.substring(0, 11).equalsIgnoreCase("User-agent:")) ) {
+      if ((line.length() >= 11)
+          && (line.substring(0, 11).equalsIgnoreCase("User-agent:"))) {
 
         if (doneAgents) {
           if (currentPrecedence < bestPrecedenceSoFar) {
-            bestPrecedenceSoFar= currentPrecedence;
-            bestRulesSoFar= currentRules;
-            currentPrecedence= NO_PRECEDENCE;
-            currentRules= new RobotRuleSet();
+            bestPrecedenceSoFar = currentPrecedence;
+            bestRulesSoFar = currentRules;
+            currentPrecedence = NO_PRECEDENCE;
+            currentRules = new RobotRuleSet();
           }
-          addRules= false;
+          addRules = false;
         }
-        doneAgents= false;
+        doneAgents = false;
 
-        String agentNames= line.substring(line.indexOf(":") + 1);
-        agentNames= agentNames.trim();
-        StringTokenizer agentTokenizer= new StringTokenizer(agentNames);
+        String agentNames = line.substring(line.indexOf(":") + 1);
+        agentNames = agentNames.trim();
+        StringTokenizer agentTokenizer = new StringTokenizer(agentNames);
 
         while (agentTokenizer.hasMoreTokens()) {
           // for each agent listed, see if it's us:
-          String agentName= agentTokenizer.nextToken().toLowerCase();
+          String agentName = agentTokenizer.nextToken().toLowerCase();
 
-          Integer precedenceInt= (Integer) robotNames.get(agentName);
+          Integer precedenceInt = (Integer) robotNames.get(agentName);
 
           if (precedenceInt != null) {
-            int precedence= precedenceInt.intValue();
-            if ( (precedence < currentPrecedence)
-                 && (precedence < bestPrecedenceSoFar) )
-              currentPrecedence= precedence;
+            int precedence = precedenceInt.intValue();
+            if ((precedence < currentPrecedence)
+                && (precedence < bestPrecedenceSoFar))
+              currentPrecedence = precedence;
           }
         }
 
-        if (currentPrecedence < bestPrecedenceSoFar) 
-          addRules= true;
+        if (currentPrecedence < bestPrecedenceSoFar)
+          addRules = true;
 
-      } else if ( (line.length() >= 9)
-                  && (line.substring(0, 9).equalsIgnoreCase("Disallow:")) ) {
+      } else if ((line.length() >= 9)
+          && (line.substring(0, 9).equalsIgnoreCase("Disallow:"))) {
 
-        doneAgents= true;
-        String path= line.substring(line.indexOf(":") + 1);
-        path= path.trim();
+        doneAgents = true;
+        String path = line.substring(line.indexOf(":") + 1);
+        path = path.trim();
         try {
-          path= URLDecoder.decode(path, CHARACTER_ENCODING);
+          path = URLDecoder.decode(path, CHARACTER_ENCODING);
         } catch (Exception e) {
           if (LOG.isWarnEnabled()) {
             LOG.warn("error parsing robots rules- can't decode path: " + path);
@@ -346,35 +345,37 @@
         if (path.length() == 0) { // "empty rule"
           if (addRules)
             currentRules.clearPrefixes();
-        } else {  // rule with path
+        } else { // rule with path
           if (addRules)
             currentRules.addPrefix(path, false);
         }
 
-      } else if ( (line.length() >= 6)
-                  && (line.substring(0, 6).equalsIgnoreCase("Allow:")) ) {
+      } else if ((line.length() >= 6)
+          && (line.substring(0, 6).equalsIgnoreCase("Allow:"))) {
 
-        doneAgents= true;
-        String path= line.substring(line.indexOf(":") + 1);
-        path= path.trim();
+        doneAgents = true;
+        String path = line.substring(line.indexOf(":") + 1);
+        path = path.trim();
 
-        if (path.length() == 0) { 
+        if (path.length() == 0) {
           // "empty rule"- treat same as empty disallow
           if (addRules)
             currentRules.clearPrefixes();
-        } else {  // rule with path
+        } else { // rule with path
           if (addRules)
             currentRules.addPrefix(path, true);
         }
-      } else if ( (line.length() >= 12)
-                  && (line.substring(0, 12).equalsIgnoreCase("Crawl-Delay:"))) {
+      } else if ((line.length() >= 12)
+          && (line.substring(0, 12).equalsIgnoreCase("Crawl-Delay:"))) {
         doneAgents = true;
         if (addRules) {
           long crawlDelay = -1;
-          String delay = line.substring("Crawl-Delay:".length(), line.length()).trim();
+          String delay = line.substring("Crawl-Delay:".length(), line.length())
+              .trim();
           if (delay.length() > 0) {
             try {
-              crawlDelay = Long.parseLong(delay) * 1000; // sec to millisec
+              crawlDelay = Long.parseLong(delay) * 1000; // sec to
+              // millisec
             } catch (Exception e) {
               LOG.info("can not parse Crawl-Delay:" + e.toString());
             }
@@ -385,36 +386,34 @@
     }
 
     if (currentPrecedence < bestPrecedenceSoFar) {
-      bestPrecedenceSoFar= currentPrecedence;
-      bestRulesSoFar= currentRules;
+      bestPrecedenceSoFar = currentPrecedence;
+      bestRulesSoFar = currentRules;
     }
 
-    if (bestPrecedenceSoFar == NO_PRECEDENCE) 
+    if (bestPrecedenceSoFar == NO_PRECEDENCE)
       return EMPTY_RULES;
     return bestRulesSoFar;
   }
 
   /**
-   *  Returns a <code>RobotRuleSet</code> object appropriate for use
-   *  when the <code>robots.txt</code> file is empty or missing; all
-   *  requests are allowed.
+   * Returns a <code>RobotRuleSet</code> object appropriate for use when the
+   * <code>robots.txt</code> file is empty or missing; all requests are allowed.
    */
   static RobotRuleSet getEmptyRules() {
     return EMPTY_RULES;
   }
 
   /**
-   *  Returns a <code>RobotRuleSet</code> object appropriate for use
-   *  when the <code>robots.txt</code> file is not fetched due to a
-   *  <code>403/Forbidden</code> response; all requests are
-   *  disallowed.
+   * Returns a <code>RobotRuleSet</code> object appropriate for use when the
+   * <code>robots.txt</code> file is not fetched due to a
+   * <code>403/Forbidden</code> response; all requests are disallowed.
    */
   static RobotRuleSet getForbidAllRules() {
-    RobotRuleSet rules= new RobotRuleSet();
+    RobotRuleSet rules = new RobotRuleSet();
     rules.addPrefix("", false);
     return rules;
   }
-  
+
   public RobotRuleSet getRobotRulesSet(HttpBase http, Text url) {
     URL u = null;
     try {
@@ -424,49 +423,53 @@
     }
     return getRobotRulesSet(http, u);
   }
-  
+
   private RobotRuleSet getRobotRulesSet(HttpBase http, URL url) {
 
     String host = url.getHost().toLowerCase(); // normalize to lower case
 
-    RobotRuleSet robotRules = (RobotRuleSet)CACHE.get(host);
+    RobotRuleSet robotRules = (RobotRuleSet) CACHE.get(host);
 
     boolean cacheRule = true;
-    
-    if (robotRules == null) {                     // cache miss
+
+    if (robotRules == null) { // cache miss
       URL redir = null;
-      if (LOG.isTraceEnabled()) { LOG.trace("cache miss " + url); }
+      if (LOG.isTraceEnabled()) {
+        LOG.trace("cache miss " + url);
+      }
       try {
         Response response = http.getResponse(new URL(url, "/robots.txt"),
-                                             new CrawlDatum(), true);
+            new CrawlDatum(), true);
         // try one level of redirection ?
         if (response.getCode() == 301 || response.getCode() == 302) {
           String redirection = response.getHeader("Location");
           if (redirection == null) {
-            // some versions of MS IIS are known to mangle this header
+            // some versions of MS IIS are known to mangle this
+            // header
             redirection = response.getHeader("location");
           }
           if (redirection != null) {
             if (!redirection.startsWith("http")) {
-              // RFC says it should be absolute, but apparently it isn't
+              // RFC says it should be absolute, but apparently it
+              // isn't
               redir = new URL(url, redirection);
             } else {
               redir = new URL(redirection);
             }
-            
+
             response = http.getResponse(redir, new CrawlDatum(), true);
           }
         }
 
-        if (response.getCode() == 200)               // found rules: parse them
+        if (response.getCode() == 200) // found rules: parse them
           robotRules = parseRules(response.getContent());
-        else if ( (response.getCode() == 403) && (!allowForbidden) )
-          robotRules = FORBID_ALL_RULES;            // use forbid all
+        else if ((response.getCode() == 403) && (!allowForbidden))
+          robotRules = FORBID_ALL_RULES; // use forbid all
         else if (response.getCode() >= 500) {
           cacheRule = false;
           robotRules = EMPTY_RULES;
-        }else                                        
-          robotRules = EMPTY_RULES;                 // use default rules
+        } else
+          robotRules = EMPTY_RULES; // use default rules
       } catch (Throwable t) {
         if (LOG.isInfoEnabled()) {
           LOG.info("Couldn't get robots.txt for " + url + ": " + t.toString());
@@ -476,7 +479,7 @@
       }
 
       if (cacheRule) {
-        CACHE.put(host, robotRules);  // cache rules for host
+        CACHE.put(host, robotRules); // cache rules for host
         if (redir != null && !redir.getHost().equals(host)) {
           // cache also for the redirected host
           CACHE.put(redir.getHost(), robotRules);
@@ -486,22 +489,22 @@
     return robotRules;
   }
 
-  public boolean isAllowed(HttpBase http, URL url)
-      throws ProtocolException, IOException {
-    String path = url.getPath();                  // check rules
+  public boolean isAllowed(HttpBase http, URL url) throws ProtocolException,
+      IOException {
+    String path = url.getPath(); // check rules
     if ((path == null) || "".equals(path)) {
-      path= "/";
+      path = "/";
     }
 
     return getRobotRulesSet(http, url).isAllowed(path);
   }
-  
-  public long getCrawlDelay(HttpBase http, URL url)
-      throws ProtocolException, IOException {
+
+  public long getCrawlDelay(HttpBase http, URL url) throws ProtocolException,
+      IOException {
     return getRobotRulesSet(http, url).getCrawlDelay();
   }
 
-  private final static int BUFSIZE= 2048;
+  private final static int BUFSIZE = 2048;
 
   /** command-line main for testing */
   public static void main(String[] argv) {
@@ -509,61 +512,62 @@
       System.out.println("Usage:");
       System.out.println("   java <robots-file> <url-file> <agent-name>+");
       System.out.println("");
-      System.out.println("The <robots-file> will be parsed as a robots.txt file,");
-      System.out.println("using the given <agent-name> to select rules.  URLs ");
-      System.out.println("will be read (one per line) from <url-file>, and tested");
+      System.out
+          .println("The <robots-file> will be parsed as a robots.txt file,");
+      System.out
+          .println("using the given <agent-name> to select rules.  URLs ");
+      System.out
+          .println("will be read (one per line) from <url-file>, and tested");
       System.out.println("against the rules.");
       System.exit(-1);
     }
-    try { 
-      FileInputStream robotsIn= new FileInputStream(argv[0]);
-      LineNumberReader testsIn= new LineNumberReader(new FileReader(argv[1]));
-      String[] robotNames= new String[argv.length - 2];
+    try {
+      FileInputStream robotsIn = new FileInputStream(argv[0]);
+      LineNumberReader testsIn = new LineNumberReader(new FileReader(argv[1]));
+      String[] robotNames = new String[argv.length - 2];
 
-      for (int i= 0; i < argv.length - 2; i++) 
-        robotNames[i]= argv[i+2];
+      for (int i = 0; i < argv.length - 2; i++)
+        robotNames[i] = argv[i + 2];
 
-      ArrayList bufs= new ArrayList();
-      byte[] buf= new byte[BUFSIZE];
-      int totBytes= 0;
+      ArrayList bufs = new ArrayList();
+      byte[] buf = new byte[BUFSIZE];
+      int totBytes = 0;
 
-      int rsize= robotsIn.read(buf);
+      int rsize = robotsIn.read(buf);
       while (rsize >= 0) {
-        totBytes+= rsize;
+        totBytes += rsize;
         if (rsize != BUFSIZE) {
-          byte[] tmp= new byte[rsize];
+          byte[] tmp = new byte[rsize];
           System.arraycopy(buf, 0, tmp, 0, rsize);
           bufs.add(tmp);
         } else {
           bufs.add(buf);
-          buf= new byte[BUFSIZE];
+          buf = new byte[BUFSIZE];
         }
-        rsize= robotsIn.read(buf);
+        rsize = robotsIn.read(buf);
       }
 
-      byte[] robotsBytes= new byte[totBytes];
-      int pos= 0;
+      byte[] robotsBytes = new byte[totBytes];
+      int pos = 0;
 
-      for (int i= 0; i < bufs.size(); i++) {
-        byte[] currBuf= (byte[]) bufs.get(i);
-        int currBufLen= currBuf.length;
+      for (int i = 0; i < bufs.size(); i++) {
+        byte[] currBuf = (byte[]) bufs.get(i);
+        int currBufLen = currBuf.length;
         System.arraycopy(currBuf, 0, robotsBytes, pos, currBufLen);
-        pos+= currBufLen;
+        pos += currBufLen;
       }
 
-      RobotRulesParser parser= 
-        new RobotRulesParser(robotNames);
-      RobotRuleSet rules= parser.parseRules(robotsBytes);
+      RobotRulesParser parser = new RobotRulesParser(robotNames);
+      RobotRuleSet rules = parser.parseRules(robotsBytes);
       System.out.println("Rules:");
       System.out.println(rules);
       System.out.println();
 
-      String testPath= testsIn.readLine().trim();
+      String testPath = testsIn.readLine().trim();
       while (testPath != null) {
-        System.out.println( (rules.isAllowed(testPath) ? 
-                             "allowed" : "not allowed")
-                            + ":\t" + testPath);
-        testPath= testsIn.readLine();
+        System.out.println((rules.isAllowed(testPath) ? "allowed"
+            : "not allowed") + ":\t" + testPath);
+        testPath = testsIn.readLine();
       }
 
     } catch (Exception e) {
Index: src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpException.java
===================================================================
--- src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpException.java	(revision 1188252)
+++ src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpException.java	(working copy)
@@ -19,7 +19,6 @@
 // Nutch imports
 import org.apache.nutch.protocol.ProtocolException;
 
-
 public class HttpException extends ProtocolException {
 
   public HttpException() {
Index: src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionBadSystResponse.java
===================================================================
--- src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionBadSystResponse.java	(revision 1188252)
+++ src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionBadSystResponse.java	(working copy)
@@ -19,7 +19,7 @@
 
 /**
  * Exception indicating bad reply of SYST command.
- *
+ * 
  * @author John Xing
  */
 public class FtpExceptionBadSystResponse extends FtpException {
Index: src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionControlClosedByForcedDataClose.java
===================================================================
--- src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionControlClosedByForcedDataClose.java	(revision 1188252)
+++ src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionControlClosedByForcedDataClose.java	(working copy)
@@ -18,9 +18,9 @@
 package org.apache.nutch.protocol.ftp;
 
 /**
- * Exception indicating control channel is closed by server end, due to
- * forced closure of data channel at client (our) end.
- *
+ * Exception indicating control channel is closed by server end, due to forced
+ * closure of data channel at client (our) end.
+ * 
  * @author John Xing
  */
 public class FtpExceptionControlClosedByForcedDataClose extends FtpException {
Index: src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/PrintCommandListener.java
===================================================================
--- src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/PrintCommandListener.java	(revision 1188252)
+++ src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/PrintCommandListener.java	(working copy)
@@ -28,45 +28,44 @@
 
 /***
  * This is a support class for logging all ftp command/reply traffic.
- *
+ * 
  * @author John Xing
  ***/
-public class PrintCommandListener implements ProtocolCommandListener
-{
-    private Logger __logger;
+public class PrintCommandListener implements ProtocolCommandListener {
+  private Logger __logger;
 
-    public PrintCommandListener(Logger logger)
-    {
-        __logger = logger;
-    }
+  public PrintCommandListener(Logger logger) {
+    __logger = logger;
+  }
 
-    public void protocolCommandSent(ProtocolCommandEvent event) {
-      try {
-        __logIt(event);
-      } catch (IOException e) {
-        if (__logger.isInfoEnabled()) {
-          __logger.info("PrintCommandListener.protocolCommandSent(): "+e);
-        }
+  public void protocolCommandSent(ProtocolCommandEvent event) {
+    try {
+      __logIt(event);
+    } catch (IOException e) {
+      if (__logger.isInfoEnabled()) {
+        __logger.info("PrintCommandListener.protocolCommandSent(): " + e);
       }
     }
+  }
 
-    public void protocolReplyReceived(ProtocolCommandEvent event) {
-      try {
-        __logIt(event);
-      } catch (IOException e) {
-        if (__logger.isInfoEnabled()) {
-          __logger.info("PrintCommandListener.protocolReplyReceived(): "+e);
-        }
+  public void protocolReplyReceived(ProtocolCommandEvent event) {
+    try {
+      __logIt(event);
+    } catch (IOException e) {
+      if (__logger.isInfoEnabled()) {
+        __logger.info("PrintCommandListener.protocolReplyReceived(): " + e);
       }
     }
+  }
 
-    private void __logIt(ProtocolCommandEvent event) throws IOException {
-      if (!__logger.isInfoEnabled()) { return; }
-      BufferedReader br =
-        new BufferedReader(new StringReader(event.getMessage()));
-      String line;
-      while ((line = br.readLine()) != null) {
-        __logger.info("ftp> "+line);
-      }
+  private void __logIt(ProtocolCommandEvent event) throws IOException {
+    if (!__logger.isInfoEnabled()) {
+      return;
     }
+    BufferedReader br = new BufferedReader(new StringReader(event.getMessage()));
+    String line;
+    while ((line = br.readLine()) != null) {
+      __logger.info("ftp> " + line);
+    }
+  }
 }
Index: src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpError.java
===================================================================
--- src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpError.java	(revision 1188252)
+++ src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpError.java	(working copy)
@@ -17,14 +17,17 @@
 
 package org.apache.nutch.protocol.ftp;
 
-/** Thrown for Ftp error codes.
+/**
+ * Thrown for Ftp error codes.
  */
 public class FtpError extends FtpException {
 
   private int code;
-  
-  public int getCode(int code) { return code; }
 
+  public int getCode(int code) {
+    return code;
+  }
+
   public FtpError(int code) {
     super("Ftp Error: " + code);
     this.code = code;
Index: src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpException.java
===================================================================
--- src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpException.java	(revision 1188252)
+++ src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpException.java	(working copy)
@@ -20,9 +20,9 @@
 import org.apache.nutch.protocol.ProtocolException;
 
 /***
- * Superclass for important exceptions thrown during FTP talk,
- * that must be handled with care.
- *
+ * Superclass for important exceptions thrown during FTP talk, that must be
+ * handled with care.
+ * 
  * @author John Xing
  */
 public class FtpException extends ProtocolException {
Index: src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java
===================================================================
--- src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java	(revision 1188252)
+++ src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java	(working copy)
@@ -17,7 +17,6 @@
 
 package org.apache.nutch.protocol.ftp;
 
-
 import org.apache.commons.net.ftp.FTP;
 import org.apache.commons.net.ftp.FTPFile;
 import org.apache.commons.net.ftp.FTPReply;
@@ -43,18 +42,14 @@
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 
-
 /************************************
- * FtpResponse.java mimics ftp replies as http response.
- * It tries its best to follow http's way for headers, response codes
- * as well as exceptions.
- *
- * Comments:
- * In this class, all FtpException*.java thrown by Client.java
- * and some important commons-net exceptions passed by Client.java
- * must have been properly dealt with. They'd better not be leaked
- * to the caller of this class.
- *
+ * FtpResponse.java mimics ftp replies as http response. It tries its best to
+ * follow http's way for headers, response codes as well as exceptions.
+ * 
+ * Comments: In this class, all FtpException*.java thrown by Client.java and
+ * some important commons-net exceptions passed by Client.java must have been
+ * properly dealt with. They'd better not be leaked to the caller of this class.
+ * 
  * @author John Xing
  ***********************************/
 public class FtpResponse {
@@ -70,23 +65,26 @@
   private Configuration conf;
 
   /** Returns the response code. */
-  public int getCode() { return code; }
+  public int getCode() {
+    return code;
+  }
 
   /** Returns the value of a named header. */
   public String getHeader(String name) {
     return headers.get(name);
   }
 
-  public byte[] getContent() { return content; }
+  public byte[] getContent() {
+    return content;
+  }
 
   public Content toContent() {
     return new Content(orig, base, (content != null ? content : EMPTY_CONTENT),
-                       getHeader(Response.CONTENT_TYPE),
-                       headers, this.conf);
+        getHeader(Response.CONTENT_TYPE), headers, this.conf);
   }
 
   public FtpResponse(URL url, CrawlDatum datum, Ftp ftp, Configuration conf)
-    throws FtpException, IOException {
+      throws FtpException, IOException {
 
     this.orig = url.toString();
     this.base = url.toString();
@@ -108,11 +106,11 @@
 
       if (ftp.followTalk) {
         if (Ftp.LOG.isInfoEnabled()) {
-          Ftp.LOG.info("fetching "+url);
+          Ftp.LOG.info("fetching " + url);
         }
       } else {
         if (Ftp.LOG.isTraceEnabled()) {
-          Ftp.LOG.trace("fetching "+url);
+          Ftp.LOG.trace("fetching " + url);
         }
       }
 
@@ -122,7 +120,7 @@
       // should start anew.
       if (ftp.client != null && ftp.keepConnection
           && ftp.renewalTime < System.currentTimeMillis()) {
-        if (Ftp.LOG.isInfoEnabled()) { 
+        if (Ftp.LOG.isInfoEnabled()) {
           Ftp.LOG.info("delete client because idled too long");
         }
         ftp.client = null;
@@ -136,8 +134,9 @@
         // the real client
         ftp.client = new Client();
         // when to renew, take the lesser
-        //ftp.renewalTime = System.currentTimeMillis()
-        //  + ((ftp.timeout<ftp.serverTimeout) ? ftp.timeout : ftp.serverTimeout);
+        // ftp.renewalTime = System.currentTimeMillis()
+        // + ((ftp.timeout<ftp.serverTimeout) ? ftp.timeout :
+        // ftp.serverTimeout);
 
         // timeout for control connection
         ftp.client.setDefaultTimeout(ftp.timeout);
@@ -146,8 +145,8 @@
 
         // follow ftp talk?
         if (ftp.followTalk)
-          ftp.client.addProtocolCommandListener(
-            new PrintCommandListener(ftp.LOG));
+          ftp.client.addProtocolCommandListener(new PrintCommandListener(
+              ftp.LOG));
       }
 
       // quit from previous site if at a different site now
@@ -155,8 +154,8 @@
         InetAddress remoteAddress = ftp.client.getRemoteAddress();
         if (!addr.equals(remoteAddress)) {
           if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
-            Ftp.LOG.info("disconnect from "+remoteAddress
-            +" before connect to "+addr);
+            Ftp.LOG.info("disconnect from " + remoteAddress
+                + " before connect to " + addr);
           }
           // quit from current site
           ftp.client.logout();
@@ -168,35 +167,36 @@
       if (!ftp.client.isConnected()) {
 
         if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
-          Ftp.LOG.info("connect to "+addr);
+          Ftp.LOG.info("connect to " + addr);
         }
 
         ftp.client.connect(addr);
         if (!FTPReply.isPositiveCompletion(ftp.client.getReplyCode())) {
           ftp.client.disconnect();
           if (Ftp.LOG.isWarnEnabled()) {
-            Ftp.LOG.warn("ftp.client.connect() failed: "
-              + addr + " " + ftp.client.getReplyString());
+            Ftp.LOG.warn("ftp.client.connect() failed: " + addr + " "
+                + ftp.client.getReplyString());
           }
           this.code = 500; // http Internal Server Error
           return;
         }
 
         if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
-          Ftp.LOG.info("log into "+addr);
+          Ftp.LOG.info("log into " + addr);
         }
 
         if (!ftp.client.login(ftp.userName, ftp.passWord)) {
           // login failed.
           // please note that some server may return 421 immediately
-          // after USER anonymous, thus ftp.client.login() won't return false,
+          // after USER anonymous, thus ftp.client.login() won't
+          // return false,
           // but throw exception, which then will be handled by caller
           // (not dealt with here at all) .
           ftp.client.disconnect();
           if (Ftp.LOG.isWarnEnabled()) {
-            Ftp.LOG.warn("ftp.client.login() failed: "+addr);
+            Ftp.LOG.warn("ftp.client.login() failed: " + addr);
           }
-          this.code = 401;  // http Unauthorized
+          this.code = 401; // http Unauthorized
           return;
         }
 
@@ -205,35 +205,38 @@
           ftp.client.logout();
           ftp.client.disconnect();
           if (Ftp.LOG.isWarnEnabled()) {
-            Ftp.LOG.warn("ftp.client.setFileType() failed: "+addr);
+            Ftp.LOG.warn("ftp.client.setFileType() failed: " + addr);
           }
           this.code = 500; // http Internal Server Error
           return;
         }
 
         if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
-          Ftp.LOG.info("set parser for "+addr);
+          Ftp.LOG.info("set parser for " + addr);
         }
 
         // SYST is valid only after login
         try {
           ftp.parser = null;
           String parserKey = ftp.client.getSystemName();
-          // some server reports as UNKNOWN Type: L8, but in fact UNIX Type: L8
+          // some server reports as UNKNOWN Type: L8, but in fact UNIX
+          // Type: L8
           if (parserKey.startsWith("UNKNOWN Type: L8"))
             parserKey = "UNIX Type: L8";
           ftp.parser = (new DefaultFTPFileEntryParserFactory())
-            .createFileEntryParser(parserKey);
+              .createFileEntryParser(parserKey);
         } catch (FtpExceptionBadSystResponse e) {
           if (Ftp.LOG.isWarnEnabled()) {
-            Ftp.LOG.warn("ftp.client.getSystemName() failed: "+addr+" "+e);
+            Ftp.LOG
+                .warn("ftp.client.getSystemName() failed: " + addr + " " + e);
           }
           ftp.parser = null;
         } catch (ParserInitializationException e) {
-          // ParserInitializationException is RuntimeException defined in
+          // ParserInitializationException is RuntimeException defined
+          // in
           // org.apache.commons.net.ftp.parser.ParserInitializationException
           if (Ftp.LOG.isWarnEnabled()) {
-            Ftp.LOG.warn("createFileEntryParser() failed. "+addr+" "+e);
+            Ftp.LOG.warn("createFileEntryParser() failed. " + addr + " " + e);
           }
           ftp.parser = null;
         } finally {
@@ -241,7 +244,7 @@
             // do not log as severe, otherwise
             // FetcherThread/RequestScheduler will abort
             if (Ftp.LOG.isWarnEnabled()) {
-              Ftp.LOG.warn("ftp.parser is null: "+addr);
+              Ftp.LOG.warn("ftp.parser is null: " + addr);
             }
             ftp.client.logout();
             ftp.client.disconnect();
@@ -267,10 +270,11 @@
       // reset next renewalTime, take the lesser
       if (ftp.client != null && ftp.keepConnection) {
         ftp.renewalTime = System.currentTimeMillis()
-          + ((ftp.timeout<ftp.serverTimeout) ? ftp.timeout : ftp.serverTimeout);
+            + ((ftp.timeout < ftp.serverTimeout) ? ftp.timeout
+                : ftp.serverTimeout);
         if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
           Ftp.LOG.info("reset renewalTime to "
-            + HttpDateFormat.toString(ftp.renewalTime));
+              + HttpDateFormat.toString(ftp.renewalTime));
         }
       }
 
@@ -278,15 +282,15 @@
       // may have deleted ftp.client
       if (ftp.client != null && !ftp.keepConnection) {
         if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
-          Ftp.LOG.info("disconnect from "+addr);
+          Ftp.LOG.info("disconnect from " + addr);
         }
         ftp.client.logout();
         ftp.client.disconnect();
       }
-      
+
     } catch (Exception e) {
       if (ftp.LOG.isWarnEnabled()) {
-        ftp.LOG.warn(""+e);
+        ftp.LOG.warn("" + e);
         e.printStackTrace(LogUtil.getWarnStream(ftp.LOG));
       }
       // for any un-foreseen exception (run time exception or not),
@@ -297,21 +301,21 @@
       ftp.client = null;
       // or do explicit garbage collection?
       // System.gc();
-// can we be less dramatic, using the following instead?
-// probably unnecessary for our practical purpose here
-//      try {
-//        ftp.client.logout();
-//        ftp.client.disconnect();
-//      }
+      // can we be less dramatic, using the following instead?
+      // probably unnecessary for our practical purpose here
+      // try {
+      // ftp.client.logout();
+      // ftp.client.disconnect();
+      // }
       throw new FtpException(e);
-      //throw e;
+      // throw e;
     }
 
   }
 
   // get ftp file as http response
   private void getFileAsHttpResponse(String path, long lastModified)
-    throws IOException {
+      throws IOException {
 
     ByteArrayOutputStream os = null;
     List list = null;
@@ -323,9 +327,9 @@
 
       FTPFile ftpFile = (FTPFile) list.get(0);
       this.headers.set(Response.CONTENT_LENGTH,
-                       new Long(ftpFile.getSize()).toString());
+          new Long(ftpFile.getSize()).toString());
       this.headers.set(Response.LAST_MODIFIED,
-                       HttpDateFormat.toString(ftpFile.getTimestamp()));
+          HttpDateFormat.toString(ftpFile.getTimestamp()));
       // don't retrieve the file if not changed.
       if (ftpFile.getTimestamp().getTimeInMillis() <= lastModified) {
         code = 304;
@@ -336,11 +340,11 @@
 
       this.content = os.toByteArray();
 
-//      // approximate bytes sent and read
-//      if (this.httpAccounting != null) {
-//        this.httpAccounting.incrementBytesSent(path.length());
-//        this.httpAccounting.incrementBytesRead(this.content.length);
-//      }
+      // // approximate bytes sent and read
+      // if (this.httpAccounting != null) {
+      // this.httpAccounting.incrementBytesSent(path.length());
+      // this.httpAccounting.incrementBytesRead(this.content.length);
+      // }
 
       this.code = 200; // http OK
 
@@ -349,64 +353,64 @@
       // control connection is off, clean up
       // ftp.client.disconnect();
       if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
-        Ftp.LOG.info("delete client because server cut off control channel: "+e);
+        Ftp.LOG.info("delete client because server cut off control channel: "
+            + e);
       }
       ftp.client = null;
 
       // in case this FtpExceptionControlClosedByForcedDataClose is
       // thrown by retrieveList() (not retrieveFile()) above,
       if (os == null) { // indicating throwing by retrieveList()
-        //throw new FtpException("fail to get attibutes: "+path);
+        // throw new FtpException("fail to get attibutes: "+path);
         if (Ftp.LOG.isWarnEnabled()) {
-          Ftp.LOG.warn(
-              "Please try larger maxContentLength for ftp.client.retrieveList(). "
-            + e);
+          Ftp.LOG
+              .warn("Please try larger maxContentLength for ftp.client.retrieveList(). "
+                  + e);
         }
         // in a way, this is our request fault
-        this.code = 400;  // http Bad request
+        this.code = 400; // http Bad request
         return;
       }
 
       FTPFile ftpFile = (FTPFile) list.get(0);
       this.headers.set(Response.CONTENT_LENGTH,
-                       new Long(ftpFile.getSize()).toString());
-      //this.headers.put("content-type", "text/html");
+          new Long(ftpFile.getSize()).toString());
+      // this.headers.put("content-type", "text/html");
       this.headers.set(Response.LAST_MODIFIED,
-                      HttpDateFormat.toString(ftpFile.getTimestamp()));
+          HttpDateFormat.toString(ftpFile.getTimestamp()));
       this.content = os.toByteArray();
       if (ftpFile.getTimestamp().getTimeInMillis() <= lastModified) {
         code = 304;
         return;
       }
 
-//      // approximate bytes sent and read
-//      if (this.httpAccounting != null) {
-//        this.httpAccounting.incrementBytesSent(path.length());
-//        this.httpAccounting.incrementBytesRead(this.content.length);
-//      }
+      // // approximate bytes sent and read
+      // if (this.httpAccounting != null) {
+      // this.httpAccounting.incrementBytesSent(path.length());
+      // this.httpAccounting.incrementBytesRead(this.content.length);
+      // }
 
       this.code = 200; // http OK
 
     } catch (FtpExceptionCanNotHaveDataConnection e) {
 
       if (FTPReply.isPositiveCompletion(ftp.client.cwd(path))) {
-      // it is not a file, but dir, so redirect as a dir
+        // it is not a file, but dir, so redirect as a dir
         this.headers.set(Response.LOCATION, path + "/");
-        this.code = 300;  // http redirect
+        this.code = 300; // http redirect
         // fixme, should we do ftp.client.cwd("/"), back to top dir?
       } else {
-      // it is not a dir either
-        this.code = 404;  // http Not Found
+        // it is not a dir either
+        this.code = 404; // http Not Found
       }
 
     } catch (FtpExceptionUnknownForcedDataClose e) {
       // Please note control channel is still live.
       // in a way, this is our request fault
       if (Ftp.LOG.isWarnEnabled()) {
-        Ftp.LOG.warn(
-            "Unrecognized reply after forced close of data channel. "
-          + "If this is acceptable, please modify Client.java accordingly. "
-          + e);
+        Ftp.LOG.warn("Unrecognized reply after forced close of data channel. "
+            + "If this is acceptable, please modify Client.java accordingly. "
+            + e);
       }
       this.code = 400; // http Bad Request
     }
@@ -415,14 +419,14 @@
 
   // get ftp dir list as http response
   private void getDirAsHttpResponse(String path, long lastModified)
-    throws IOException {
+      throws IOException {
     List list = new LinkedList();
 
     try {
 
       // change to that dir first
       if (!FTPReply.isPositiveCompletion(ftp.client.cwd(path))) {
-        this.code = 404;  // http Not Found
+        this.code = 404; // http Not Found
         return;
       }
 
@@ -431,15 +435,15 @@
       ftp.client.retrieveList(null, list, ftp.maxContentLength, ftp.parser);
       this.content = list2html(list, path, "/".equals(path) ? false : true);
       this.headers.set(Response.CONTENT_LENGTH,
-                       new Integer(this.content.length).toString());
+          new Integer(this.content.length).toString());
       this.headers.set(Response.CONTENT_TYPE, "text/html");
       // this.headers.put("Last-Modified", null);
 
-//      // approximate bytes sent and read
-//      if (this.httpAccounting != null) {
-//        this.httpAccounting.incrementBytesSent(path.length());
-//        this.httpAccounting.incrementBytesRead(this.content.length);
-//      }
+      // // approximate bytes sent and read
+      // if (this.httpAccounting != null) {
+      // this.httpAccounting.incrementBytesSent(path.length());
+      // this.httpAccounting.incrementBytesRead(this.content.length);
+      // }
 
       this.code = 200; // http OK
 
@@ -448,21 +452,22 @@
       // control connection is off, clean up
       // ftp.client.disconnect();
       if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
-        Ftp.LOG.info("delete client because server cut off control channel: "+e);
+        Ftp.LOG.info("delete client because server cut off control channel: "
+            + e);
       }
       ftp.client = null;
 
       this.content = list2html(list, path, "/".equals(path) ? false : true);
       this.headers.set(Response.CONTENT_LENGTH,
-                       new Integer(this.content.length).toString());
+          new Integer(this.content.length).toString());
       this.headers.set(Response.CONTENT_TYPE, "text/html");
       // this.headers.put("Last-Modified", null);
 
-//      // approximate bytes sent and read
-//      if (this.httpAccounting != null) {
-//        this.httpAccounting.incrementBytesSent(path.length());
-//        this.httpAccounting.incrementBytesRead(this.content.length);
-//      }
+      // // approximate bytes sent and read
+      // if (this.httpAccounting != null) {
+      // this.httpAccounting.incrementBytesSent(path.length());
+      // this.httpAccounting.incrementBytesRead(this.content.length);
+      // }
 
       this.code = 200; // http OK
 
@@ -470,14 +475,15 @@
       // Please note control channel is still live.
       // in a way, this is our request fault
       if (Ftp.LOG.isWarnEnabled()) {
-        Ftp.LOG.warn(
-            "Unrecognized reply after forced close of data channel. "
-          + "If this is acceptable, please modify Client.java accordingly. "
-          + e);
+        Ftp.LOG.warn("Unrecognized reply after forced close of data channel. "
+            + "If this is acceptable, please modify Client.java accordingly. "
+            + e);
       }
       this.code = 400; // http Bad Request
     } catch (FtpExceptionCanNotHaveDataConnection e) {
-      if (Ftp.LOG.isWarnEnabled()) { Ftp.LOG.warn(""+ e); }
+      if (Ftp.LOG.isWarnEnabled()) {
+        Ftp.LOG.warn("" + e);
+      }
       this.code = 500; // http Iternal Server Error
     }
 
@@ -486,16 +492,17 @@
   // generate html page from ftp dir list
   private byte[] list2html(List list, String path, boolean includeDotDot) {
 
-    //StringBuffer x = new StringBuffer("<!doctype html public \"-//ietf//dtd html//en\"><html><head>");
+    // StringBuffer x = new
+    // StringBuffer("<!doctype html public \"-//ietf//dtd html//en\"><html><head>");
     StringBuffer x = new StringBuffer("<html><head>");
-    x.append("<title>Index of "+path+"</title></head>\n");
-    x.append("<body><h1>Index of "+path+"</h1><pre>\n");
+    x.append("<title>Index of " + path + "</title></head>\n");
+    x.append("<body><h1>Index of " + path + "</h1><pre>\n");
 
     if (includeDotDot) {
       x.append("<a href='../'>../</a>\t-\t-\t-\n");
     }
 
-    for (int i=0; i<list.size(); i++) {
+    for (int i = 0; i < list.size(); i++) {
       FTPFile f = (FTPFile) list.get(i);
       String name = f.getName();
       String time = HttpDateFormat.toString(f.getTimestamp());
@@ -503,11 +510,11 @@
         // some ftp server LIST "." and "..", we skip them here
         if (name.equals(".") || name.equals(".."))
           continue;
-        x.append("<a href='"+name+"/"+"'>"+name+"/</a>\t");
-        x.append(time+"\t-\n");
+        x.append("<a href='" + name + "/" + "'>" + name + "/</a>\t");
+        x.append(time + "\t-\n");
       } else if (f.isFile()) {
-        x.append("<a href='"+name+    "'>"+name+"</a>\t");
-        x.append(time+"\t"+f.getSize()+"\n");
+        x.append("<a href='" + name + "'>" + name + "</a>\t");
+        x.append(time + "\t" + f.getSize() + "\n");
       } else {
         // ignore isSymbolicLink()
         // ignore isUnknown()
Index: src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java
===================================================================
--- src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java	(revision 1188252)
+++ src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java	(working copy)
@@ -42,10 +42,10 @@
 
 /************************************
  * Ftp.java deals with ftp: scheme.
- *
- * Configurable parameters are defined under "FTP properties" section
- * in ./conf/nutch-default.xml or similar.
- *
+ * 
+ * Configurable parameters are defined under "FTP properties" section in
+ * ./conf/nutch-default.xml or similar.
+ * 
  * @author John Xing
  ***********************************/
 public class Ftp implements Protocol {
@@ -61,7 +61,7 @@
   int maxContentLength;
 
   String userName;
-  String passWord; 
+  String passWord;
 
   // typical/default server timeout is 120*1000 millisec.
   // better be conservative here
@@ -81,7 +81,6 @@
 
   private Configuration conf;
 
-
   // constructor
   public Ftp() {
   }
@@ -110,36 +109,38 @@
     String urlString = url.toString();
     try {
       URL u = new URL(urlString);
-  
+
       int redirects = 0;
-  
+
       while (true) {
         FtpResponse response;
-        response = new FtpResponse(u, datum, this, getConf());   // make a request
-  
+        response = new FtpResponse(u, datum, this, getConf()); // make a
+        // request
+
         int code = response.getCode();
-  
-        if (code == 200) {                          // got a good response
-          return new ProtocolOutput(response.toContent());              // return it
-  
-        } else if (code >= 300 && code < 400) {     // handle redirect
+
+        if (code == 200) { // got a good response
+          return new ProtocolOutput(response.toContent()); // return
+          // it
+
+        } else if (code >= 300 && code < 400) { // handle redirect
           if (redirects == MAX_REDIRECTS)
             throw new FtpException("Too many redirects: " + url);
           u = new URL(response.getHeader("Location"));
-          redirects++;                
+          redirects++;
           if (LOG.isTraceEnabled()) {
-            LOG.trace("redirect to " + u); 
+            LOG.trace("redirect to " + u);
           }
-        } else {                                    // convert to exception
+        } else { // convert to exception
           throw new FtpError(code);
         }
-      } 
+      }
     } catch (Exception e) {
       return new ProtocolOutput(null, new ProtocolStatus(e));
     }
   }
 
-  protected void finalize () {
+  protected void finalize() {
     try {
       if (this.client != null && this.client.isConnected()) {
         this.client.logout();
@@ -166,7 +167,7 @@
       System.err.println(usage);
       System.exit(-1);
     }
-      
+
     for (int i = 0; i < args.length; i++) {
       if (args[i].equals("-logLevel")) {
         logLevel = args[++i];
@@ -180,7 +181,7 @@
         maxContentLength = Integer.parseInt(args[++i]);
       } else if (args[i].equals("-dumpContent")) {
         dumpContent = true;
-      } else if (i != args.length-1) {
+      } else if (i != args.length - 1) {
         System.err.println(usage);
         System.exit(-1);
       } else {
@@ -200,15 +201,16 @@
       ftp.setMaxContentLength(maxContentLength);
 
     // set log level
-    //LOG.setLevel(Level.parse((new String(logLevel)).toUpperCase()));
+    // LOG.setLevel(Level.parse((new String(logLevel)).toUpperCase()));
 
-    Content content = ftp.getProtocolOutput(new Text(urlString), new CrawlDatum()).getContent();
+    Content content = ftp.getProtocolOutput(new Text(urlString),
+        new CrawlDatum()).getContent();
 
     System.err.println("Content-Type: " + content.getContentType());
-    System.err.println("Content-Length: " +
-                       content.getMetadata().get(Response.CONTENT_LENGTH));
-    System.err.println("Last-Modified: " +
-                      content.getMetadata().get(Response.LAST_MODIFIED));
+    System.err.println("Content-Length: "
+        + content.getMetadata().get(Response.CONTENT_LENGTH));
+    System.err.println("Last-Modified: "
+        + content.getMetadata().get(Response.LAST_MODIFIED));
     if (dumpContent) {
       System.out.print(new String(content.getContent()));
     }
@@ -216,7 +218,6 @@
     ftp = null;
   }
 
-  
   public void setConf(Configuration conf) {
     this.conf = conf;
     this.maxContentLength = conf.getInt("ftp.content.limit", 64 * 1024);
Index: src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionCanNotHaveDataConnection.java
===================================================================
--- src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionCanNotHaveDataConnection.java	(revision 1188252)
+++ src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionCanNotHaveDataConnection.java	(working copy)
@@ -19,7 +19,7 @@
 
 /**
  * Exception indicating failure of opening data connection.
- *
+ * 
  * @author John Xing
  */
 public class FtpExceptionCanNotHaveDataConnection extends FtpException {
Index: src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionUnknownForcedDataClose.java
===================================================================
--- src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionUnknownForcedDataClose.java	(revision 1188252)
+++ src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionUnknownForcedDataClose.java	(working copy)
@@ -18,9 +18,9 @@
 package org.apache.nutch.protocol.ftp;
 
 /**
- * Exception indicating unrecognizable reply from server after
- * forced closure of data channel by client (our) side.
- *
+ * Exception indicating unrecognizable reply from server after forced closure of
+ * data channel by client (our) side.
+ * 
  * @author John Xing
  */
 public class FtpExceptionUnknownForcedDataClose extends FtpException {
Index: src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java
===================================================================
--- src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java	(revision 1188252)
+++ src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java	(working copy)
@@ -40,535 +40,526 @@
 import org.apache.commons.net.ftp.FTPConnectionClosedException;
 
 /***********************************************
- * Client.java encapsulates functionalities necessary for nutch to
- * get dir list and retrieve file from an FTP server.
- * This class takes care of all low level details of interacting
- * with an FTP server and provides a convenient higher level interface.
- *
+ * Client.java encapsulates functionalities necessary for nutch to get dir list
+ * and retrieve file from an FTP server. This class takes care of all low level
+ * details of interacting with an FTP server and provides a convenient higher
+ * level interface.
+ * 
  * Modified from FtpClient.java in apache commons-net.
  * 
- * Notes by John Xing:
- * ftp server implementations are hardly uniform and none seems to follow
- * RFCs whole-heartedly. We have no choice, but assume common denominator
- * as following:
- * (1) Use stream mode for data tranfer. Block mode will be better for
- *     multiple file downloading and partial file downloading. However
- *     not every ftpd has block mode support.
- * (2) Use passive mode for data connection.
- *     So nutch will work if we run behind firewall.
- * (3) Data connection is opened/closed per ftp command for the reasons
- *     listed in (1). There are ftp servers out there,
- *     when partial downloading is enforeced by closing data channel
- *     socket on our client side, the server side immediately closes
- *     control channel (socket). Our codes deal with such a bad behavior.
- * (4) LIST is used to obtain remote file attributes if possible.
- *     MDTM & SIZE would be nice, but not as ubiquitously implemented as LIST.
- * (5) Avoid using ABOR in single thread? Do not use it at all.
- *
- * About exceptions:
- * Some specific exceptions are re-thrown as one of FtpException*.java
- * In fact, each function throws FtpException*.java or pass IOException.
- *
+ * Notes by John Xing: ftp server implementations are hardly uniform and none
+ * seems to follow RFCs whole-heartedly. We have no choice, but assume common
+ * denominator as following: (1) Use stream mode for data tranfer. Block mode
+ * will be better for multiple file downloading and partial file downloading.
+ * However not every ftpd has block mode support. (2) Use passive mode for data
+ * connection. So nutch will work if we run behind firewall. (3) Data connection
+ * is opened/closed per ftp command for the reasons listed in (1). There are ftp
+ * servers out there, when partial downloading is enforeced by closing data
+ * channel socket on our client side, the server side immediately closes control
+ * channel (socket). Our codes deal with such a bad behavior. (4) LIST is used
+ * to obtain remote file attributes if possible. MDTM & SIZE would be nice, but
+ * not as ubiquitously implemented as LIST. (5) Avoid using ABOR in single
+ * thread? Do not use it at all.
+ * 
+ * About exceptions: Some specific exceptions are re-thrown as one of
+ * FtpException*.java In fact, each function throws FtpException*.java or pass
+ * IOException.
+ * 
  * @author John Xing
  ***********************************************/
 
-public class Client extends FTP
-{
-    private int __dataTimeout;
-    private int __passivePort;
-    private String __passiveHost;
-    private int __fileType, __fileFormat;
-    private boolean __remoteVerificationEnabled;
-    private FTPFileEntryParser __entryParser;
-    private String __systemName;
+public class Client extends FTP {
+  private int __dataTimeout;
+  private int __passivePort;
+  private String __passiveHost;
+  private int __fileType, __fileFormat;
+  private boolean __remoteVerificationEnabled;
+  private FTPFileEntryParser __entryParser;
+  private String __systemName;
 
-    // constructor
-    public Client()
-    {
-        __initDefaults();
-        __dataTimeout = -1;
-        __remoteVerificationEnabled = true;
-    }
+  // constructor
+  public Client() {
+    __initDefaults();
+    __dataTimeout = -1;
+    __remoteVerificationEnabled = true;
+  }
 
-    // defaults when initialize
-    private void __initDefaults()
-    {
-        __passiveHost        = null;
-        __passivePort        = -1;
-        __fileType           = FTP.ASCII_FILE_TYPE;
-        __fileFormat         = FTP.NON_PRINT_TEXT_FORMAT;
-        __systemName         = null;
-        __entryParser        = null;
-    }
+  // defaults when initialize
+  private void __initDefaults() {
+    __passiveHost = null;
+    __passivePort = -1;
+    __fileType = FTP.ASCII_FILE_TYPE;
+    __fileFormat = FTP.NON_PRINT_TEXT_FORMAT;
+    __systemName = null;
+    __entryParser = null;
+  }
 
-    // parse reply for pass()
-    private void __parsePassiveModeReply(String reply)
-    throws MalformedServerReplyException
-    {
-        int i, index, lastIndex;
-        String octet1, octet2;
-        StringBuffer host;
+  // parse reply for pass()
+  private void __parsePassiveModeReply(String reply)
+      throws MalformedServerReplyException {
+    int i, index, lastIndex;
+    String octet1, octet2;
+    StringBuffer host;
 
-        reply = reply.substring(reply.indexOf('(') + 1,
-                                reply.indexOf(')')).trim();
+    reply = reply.substring(reply.indexOf('(') + 1, reply.indexOf(')')).trim();
 
-        host = new StringBuffer(24);
-        lastIndex = 0;
-        index = reply.indexOf(',');
-        host.append(reply.substring(lastIndex, index));
+    host = new StringBuffer(24);
+    lastIndex = 0;
+    index = reply.indexOf(',');
+    host.append(reply.substring(lastIndex, index));
 
-        for (i = 0; i < 3; i++)
-        {
-            host.append('.');
-            lastIndex = index + 1;
-            index = reply.indexOf(',', lastIndex);
-            host.append(reply.substring(lastIndex, index));
-        }
+    for (i = 0; i < 3; i++) {
+      host.append('.');
+      lastIndex = index + 1;
+      index = reply.indexOf(',', lastIndex);
+      host.append(reply.substring(lastIndex, index));
+    }
 
-        lastIndex = index + 1;
-        index = reply.indexOf(',', lastIndex);
+    lastIndex = index + 1;
+    index = reply.indexOf(',', lastIndex);
 
-        octet1 = reply.substring(lastIndex, index);
-        octet2 = reply.substring(index + 1);
+    octet1 = reply.substring(lastIndex, index);
+    octet2 = reply.substring(index + 1);
 
-        // index and lastIndex now used as temporaries
-        try
-        {
-            index = Integer.parseInt(octet1);
-            lastIndex = Integer.parseInt(octet2);
-        }
-        catch (NumberFormatException e)
-        {
-            throw new MalformedServerReplyException(
-                "Could not parse passive host information.\nServer Reply: " + reply);
-        }
+    // index and lastIndex now used as temporaries
+    try {
+      index = Integer.parseInt(octet1);
+      lastIndex = Integer.parseInt(octet2);
+    } catch (NumberFormatException e) {
+      throw new MalformedServerReplyException(
+          "Could not parse passive host information.\nServer Reply: " + reply);
+    }
 
-        index <<= 8;
-        index |= lastIndex;
+    index <<= 8;
+    index |= lastIndex;
 
-        __passiveHost = host.toString();
-        __passivePort = index;
-    }
+    __passiveHost = host.toString();
+    __passivePort = index;
+  }
 
-    // open passive data connection socket
-    protected Socket __openPassiveDataConnection(int command, String arg)
+  // open passive data connection socket
+  protected Socket __openPassiveDataConnection(int command, String arg)
       throws IOException, FtpExceptionCanNotHaveDataConnection {
-        Socket socket;
+    Socket socket;
 
-//        // 20040317, xing, accommodate ill-behaved servers, see below
-//        int port_previous = __passivePort;
+    // // 20040317, xing, accommodate ill-behaved servers, see below
+    // int port_previous = __passivePort;
 
-        if (pasv() != FTPReply.ENTERING_PASSIVE_MODE)
-          throw new FtpExceptionCanNotHaveDataConnection(
-            "pasv() failed. " + getReplyString());
+    if (pasv() != FTPReply.ENTERING_PASSIVE_MODE)
+      throw new FtpExceptionCanNotHaveDataConnection("pasv() failed. "
+          + getReplyString());
 
-        try {
-          __parsePassiveModeReply(getReplyStrings()[0]);
-        } catch (MalformedServerReplyException e) {
-          throw new FtpExceptionCanNotHaveDataConnection(e.getMessage());
-        }
+    try {
+      __parsePassiveModeReply(getReplyStrings()[0]);
+    } catch (MalformedServerReplyException e) {
+      throw new FtpExceptionCanNotHaveDataConnection(e.getMessage());
+    }
 
-//        // 20040317, xing, accommodate ill-behaved servers, see above
-//        int count = 0;
-//        System.err.println("__passivePort "+__passivePort);
-//        System.err.println("port_previous "+port_previous);
-//        while (__passivePort == port_previous) {
-//          // just quit if too many tries. make it an exception here?
-//          if (count++ > 10)
-//            return null;
-//          // slow down further for each new try
-//          Thread.sleep(500*count);
-//          if (pasv() != FTPReply.ENTERING_PASSIVE_MODE)
-//            throw new FtpExceptionCanNotHaveDataConnection(
-//              "pasv() failed. " + getReplyString());
-//            //return null;
-//          try {
-//            __parsePassiveModeReply(getReplyStrings()[0]);
-//          } catch (MalformedServerReplyException e) {
-//            throw new FtpExceptionCanNotHaveDataConnection(e.getMessage());
-//          }
-//        }
+    // // 20040317, xing, accommodate ill-behaved servers, see above
+    // int count = 0;
+    // System.err.println("__passivePort "+__passivePort);
+    // System.err.println("port_previous "+port_previous);
+    // while (__passivePort == port_previous) {
+    // // just quit if too many tries. make it an exception here?
+    // if (count++ > 10)
+    // return null;
+    // // slow down further for each new try
+    // Thread.sleep(500*count);
+    // if (pasv() != FTPReply.ENTERING_PASSIVE_MODE)
+    // throw new FtpExceptionCanNotHaveDataConnection(
+    // "pasv() failed. " + getReplyString());
+    // //return null;
+    // try {
+    // __parsePassiveModeReply(getReplyStrings()[0]);
+    // } catch (MalformedServerReplyException e) {
+    // throw new FtpExceptionCanNotHaveDataConnection(e.getMessage());
+    // }
+    // }
 
-        socket = _socketFactory_.createSocket(__passiveHost, __passivePort);
+    socket = _socketFactory_.createSocket(__passiveHost, __passivePort);
 
-        if (!FTPReply.isPositivePreliminary(sendCommand(command, arg))) {
-          socket.close();
-          return null;
-        }
+    if (!FTPReply.isPositivePreliminary(sendCommand(command, arg))) {
+      socket.close();
+      return null;
+    }
 
-        if (__remoteVerificationEnabled && !verifyRemote(socket))
-        {
-            InetAddress host1, host2;
+    if (__remoteVerificationEnabled && !verifyRemote(socket)) {
+      InetAddress host1, host2;
 
-            host1 = socket.getInetAddress();
-            host2 = getRemoteAddress();
+      host1 = socket.getInetAddress();
+      host2 = getRemoteAddress();
 
-            socket.close();
+      socket.close();
 
-            // our precaution
-            throw new FtpExceptionCanNotHaveDataConnection(
-                "Host attempting data connection " + host1.getHostAddress() +
-                " is not same as server " + host2.getHostAddress() +
-                " So we intentionally close it for security precaution."
-                );
-        }
+      // our precaution
+      throw new FtpExceptionCanNotHaveDataConnection(
+          "Host attempting data connection " + host1.getHostAddress()
+              + " is not same as server " + host2.getHostAddress()
+              + " So we intentionally close it for security precaution.");
+    }
 
-        if (__dataTimeout >= 0)
-            socket.setSoTimeout(__dataTimeout);
+    if (__dataTimeout >= 0)
+      socket.setSoTimeout(__dataTimeout);
 
-        return socket;
-    }
+    return socket;
+  }
 
-    /***
-     * Sets the timeout in milliseconds to use for data connection.
-     * set immediately after opening the data connection.
-     ***/
-    public void setDataTimeout(int timeout)
-    {
-        __dataTimeout = timeout;
-    }
+  /***
+   * Sets the timeout in milliseconds to use for data connection. set
+   * immediately after opening the data connection.
+   ***/
+  public void setDataTimeout(int timeout) {
+    __dataTimeout = timeout;
+  }
 
-    /***
-     * Closes the connection to the FTP server and restores
-     * connection parameters to the default values.
-     * <p>
-     * @exception IOException If an error occurs while disconnecting.
-     ***/
-    public void disconnect() throws IOException
-    {
-        __initDefaults();
-        super.disconnect();
-        // no worry for data connection, since we always close it
-        // in every ftp command that invloves data connection
-    }
+  /***
+   * Closes the connection to the FTP server and restores connection parameters
+   * to the default values.
+   * <p>
+   * 
+   * @exception IOException
+   *              If an error occurs while disconnecting.
+   ***/
+  public void disconnect() throws IOException {
+    __initDefaults();
+    super.disconnect();
+    // no worry for data connection, since we always close it
+    // in every ftp command that invloves data connection
+  }
 
-    /***
-     * Enable or disable verification that the remote host taking part
-     * of a data connection is the same as the host to which the control
-     * connection is attached.  The default is for verification to be
-     * enabled.  You may set this value at any time, whether the
-     * FTPClient is currently connected or not.
-     * <p>
-     * @param enable True to enable verification, false to disable verification.
-     ***/
-    public void setRemoteVerificationEnabled(boolean enable)
-    {
-        __remoteVerificationEnabled = enable;
-    }
+  /***
+   * Enable or disable verification that the remote host taking part of a data
+   * connection is the same as the host to which the control connection is
+   * attached. The default is for verification to be enabled. You may set this
+   * value at any time, whether the FTPClient is currently connected or not.
+   * <p>
+   * 
+   * @param enable
+   *          True to enable verification, false to disable verification.
+   ***/
+  public void setRemoteVerificationEnabled(boolean enable) {
+    __remoteVerificationEnabled = enable;
+  }
 
-    /***
-     * Return whether or not verification of the remote host participating
-     * in data connections is enabled.  The default behavior is for
-     * verification to be enabled.
-     * <p>
-     * @return True if verification is enabled, false if not.
-     ***/
-    public boolean isRemoteVerificationEnabled()
-    {
-        return __remoteVerificationEnabled;
-    }
+  /***
+   * Return whether or not verification of the remote host participating in data
+   * connections is enabled. The default behavior is for verification to be
+   * enabled.
+   * <p>
+   * 
+   * @return True if verification is enabled, false if not.
+   ***/
+  public boolean isRemoteVerificationEnabled() {
+    return __remoteVerificationEnabled;
+  }
 
-    /***
-     * Login to the FTP server using the provided username and password.
-     * <p>
-     * @param username The username to login under.
-     * @param password The password to use.
-     * @return True if successfully completed, false if not.
-     * @exception FTPConnectionClosedException
-     *      If the FTP server prematurely closes the connection as a result
-     *      of the client being idle or some other reason causing the server
-     *      to send FTP reply code 421.  This exception may be caught either
-     *      as an IOException or independently as itself.
-     * @exception IOException  If an I/O error occurs while either sending a
-     *      command to the server or receiving a reply from the server.
-     ***/
-    public boolean login(String username, String password) throws IOException
-    {
-        user(username);
+  /***
+   * Login to the FTP server using the provided username and password.
+   * <p>
+   * 
+   * @param username
+   *          The username to login under.
+   * @param password
+   *          The password to use.
+   * @return True if successfully completed, false if not.
+   * @exception FTPConnectionClosedException
+   *              If the FTP server prematurely closes the connection as a
+   *              result of the client being idle or some other reason causing
+   *              the server to send FTP reply code 421. This exception may be
+   *              caught either as an IOException or independently as itself.
+   * @exception IOException
+   *              If an I/O error occurs while either sending a command to the
+   *              server or receiving a reply from the server.
+   ***/
+  public boolean login(String username, String password) throws IOException {
+    user(username);
 
-        if (FTPReply.isPositiveCompletion(getReplyCode()))
-            return true;
+    if (FTPReply.isPositiveCompletion(getReplyCode()))
+      return true;
 
-        // If we get here, we either have an error code, or an intermmediate
-        // reply requesting password.
-        if (!FTPReply.isPositiveIntermediate(getReplyCode()))
-            return false;
+    // If we get here, we either have an error code, or an intermmediate
+    // reply requesting password.
+    if (!FTPReply.isPositiveIntermediate(getReplyCode()))
+      return false;
 
-        return FTPReply.isPositiveCompletion(pass(password));
-    }
+    return FTPReply.isPositiveCompletion(pass(password));
+  }
 
-    /***
-     * Logout of the FTP server by sending the QUIT command.
-     * <p>
-     * @return True if successfully completed, false if not.
-     * @exception FTPConnectionClosedException
-     *      If the FTP server prematurely closes the connection as a result
-     *      of the client being idle or some other reason causing the server
-     *      to send FTP reply code 421.  This exception may be caught either
-     *      as an IOException or independently as itself.
-     * @exception IOException  If an I/O error occurs while either sending a
-     *      command to the server or receiving a reply from the server.
-     ***/
-    public boolean logout() throws IOException
-    {
-        return FTPReply.isPositiveCompletion(quit());
-    }
+  /***
+   * Logout of the FTP server by sending the QUIT command.
+   * <p>
+   * 
+   * @return True if successfully completed, false if not.
+   * @exception FTPConnectionClosedException
+   *              If the FTP server prematurely closes the connection as a
+   *              result of the client being idle or some other reason causing
+   *              the server to send FTP reply code 421. This exception may be
+   *              caught either as an IOException or independently as itself.
+   * @exception IOException
+   *              If an I/O error occurs while either sending a command to the
+   *              server or receiving a reply from the server.
+   ***/
+  public boolean logout() throws IOException {
+    return FTPReply.isPositiveCompletion(quit());
+  }
 
-    // retrieve list reply for path
-    public void retrieveList(String path, List entries, int limit,
-      FTPFileEntryParser parser)
-      throws IOException,
-        FtpExceptionCanNotHaveDataConnection,
-        FtpExceptionUnknownForcedDataClose,
-        FtpExceptionControlClosedByForcedDataClose {
-      Socket socket = __openPassiveDataConnection(FTPCommand.LIST, path);
+  // retrieve list reply for path
+  public void retrieveList(String path, List entries, int limit,
+      FTPFileEntryParser parser) throws IOException,
+      FtpExceptionCanNotHaveDataConnection, FtpExceptionUnknownForcedDataClose,
+      FtpExceptionControlClosedByForcedDataClose {
+    Socket socket = __openPassiveDataConnection(FTPCommand.LIST, path);
 
-      if (socket == null)
-        throw new FtpExceptionCanNotHaveDataConnection("LIST "
+    if (socket == null)
+      throw new FtpExceptionCanNotHaveDataConnection("LIST "
           + ((path == null) ? "" : path));
 
-      BufferedReader reader =
-          new BufferedReader(new InputStreamReader(socket.getInputStream()));
+    BufferedReader reader = new BufferedReader(new InputStreamReader(
+        socket.getInputStream()));
 
-      // force-close data channel socket, when download limit is reached
-      boolean mandatory_close = false;
+    // force-close data channel socket, when download limit is reached
+    boolean mandatory_close = false;
 
-      //List entries = new LinkedList();
-      int count = 0;
-      String line = parser.readNextEntry(reader);
-      while (line != null) {
-        FTPFile ftpFile = parser.parseFTPEntry(line);
-        // skip non-formatted lines
-        if (ftpFile == null) {
-          line = parser.readNextEntry(reader);
-          continue;
-        }
-        entries.add(ftpFile);
-        count += line.length();
-        // impose download limit if limit >= 0, otherwise no limit
-        // here, cut off is up to the line when total bytes is just over limit
-        if (limit >= 0 && count > limit) {
-          mandatory_close = true;
-          break;
-        }
+    // List entries = new LinkedList();
+    int count = 0;
+    String line = parser.readNextEntry(reader);
+    while (line != null) {
+      FTPFile ftpFile = parser.parseFTPEntry(line);
+      // skip non-formatted lines
+      if (ftpFile == null) {
         line = parser.readNextEntry(reader);
+        continue;
       }
+      entries.add(ftpFile);
+      count += line.length();
+      // impose download limit if limit >= 0, otherwise no limit
+      // here, cut off is up to the line when total bytes is just over
+      // limit
+      if (limit >= 0 && count > limit) {
+        mandatory_close = true;
+        break;
+      }
+      line = parser.readNextEntry(reader);
+    }
 
-      //if (mandatory_close)
-      // you always close here, no matter mandatory_close or not.
-      // however different ftp servers respond differently, see below.
-      socket.close();
+    // if (mandatory_close)
+    // you always close here, no matter mandatory_close or not.
+    // however different ftp servers respond differently, see below.
+    socket.close();
 
-      // scenarios:
-      // (1) mandatory_close is false, download limit not reached
-      //     no special care here
-      // (2) mandatory_close is true, download limit is reached
-      //     different servers have different reply codes:
+    // scenarios:
+    // (1) mandatory_close is false, download limit not reached
+    // no special care here
+    // (2) mandatory_close is true, download limit is reached
+    // different servers have different reply codes:
 
-      try {
-        int reply = getReply();
-        if (!_notBadReply(reply))
-          throw new FtpExceptionUnknownForcedDataClose(getReplyString());
-      } catch (FTPConnectionClosedException e) {
-        // some ftp servers will close control channel if data channel socket
-        // is closed by our end before all data has been read out. Check:
-        // tux414.q-tam.hp.com FTP server (hp.com version whp02)
-        // so must catch FTPConnectionClosedException thrown by getReply() above
-        //disconnect();
-        throw new FtpExceptionControlClosedByForcedDataClose(e.getMessage());
-      }
-
+    try {
+      int reply = getReply();
+      if (!_notBadReply(reply))
+        throw new FtpExceptionUnknownForcedDataClose(getReplyString());
+    } catch (FTPConnectionClosedException e) {
+      // some ftp servers will close control channel if data channel
+      // socket
+      // is closed by our end before all data has been read out. Check:
+      // tux414.q-tam.hp.com FTP server (hp.com version whp02)
+      // so must catch FTPConnectionClosedException thrown by getReply()
+      // above
+      // disconnect();
+      throw new FtpExceptionControlClosedByForcedDataClose(e.getMessage());
     }
 
-    // retrieve file for path
-    public void retrieveFile(String path, OutputStream os, int limit)
-      throws IOException,
-        FtpExceptionCanNotHaveDataConnection,
-        FtpExceptionUnknownForcedDataClose,
-        FtpExceptionControlClosedByForcedDataClose {
+  }
 
-      Socket socket = __openPassiveDataConnection(FTPCommand.RETR, path);
+  // retrieve file for path
+  public void retrieveFile(String path, OutputStream os, int limit)
+      throws IOException, FtpExceptionCanNotHaveDataConnection,
+      FtpExceptionUnknownForcedDataClose,
+      FtpExceptionControlClosedByForcedDataClose {
 
-      if (socket == null)
-        throw new FtpExceptionCanNotHaveDataConnection("RETR "
+    Socket socket = __openPassiveDataConnection(FTPCommand.RETR, path);
+
+    if (socket == null)
+      throw new FtpExceptionCanNotHaveDataConnection("RETR "
           + ((path == null) ? "" : path));
 
-      InputStream input = socket.getInputStream();
+    InputStream input = socket.getInputStream();
 
-      // 20040318, xing, treat everything as BINARY_FILE_TYPE for now
-      // do we ever need ASCII_FILE_TYPE?
-      //if (__fileType == ASCII_FILE_TYPE)
-      // input = new FromNetASCIIInputStream(input);
+    // 20040318, xing, treat everything as BINARY_FILE_TYPE for now
+    // do we ever need ASCII_FILE_TYPE?
+    // if (__fileType == ASCII_FILE_TYPE)
+    // input = new FromNetASCIIInputStream(input);
 
-      // fixme, should we instruct server here for binary file type?
+    // fixme, should we instruct server here for binary file type?
 
-      // force-close data channel socket
-      boolean mandatory_close = false;
+    // force-close data channel socket
+    boolean mandatory_close = false;
 
-      int len; int count = 0;
-      byte[] buf =
-        new byte[org.apache.commons.net.io.Util.DEFAULT_COPY_BUFFER_SIZE];
-      while((len=input.read(buf,0,buf.length)) != -1){
-        count += len;
-        // impose download limit if limit >= 0, otherwise no limit
-        // here, cut off is exactly of limit bytes
-        if (limit >= 0 && count > limit) {
-          os.write(buf,0,len-(count-limit));
-          mandatory_close = true;
-          break;
-        }
-        os.write(buf,0,len);
-        os.flush();
+    int len;
+    int count = 0;
+    byte[] buf = new byte[org.apache.commons.net.io.Util.DEFAULT_COPY_BUFFER_SIZE];
+    while ((len = input.read(buf, 0, buf.length)) != -1) {
+      count += len;
+      // impose download limit if limit >= 0, otherwise no limit
+      // here, cut off is exactly of limit bytes
+      if (limit >= 0 && count > limit) {
+        os.write(buf, 0, len - (count - limit));
+        mandatory_close = true;
+        break;
       }
+      os.write(buf, 0, len);
+      os.flush();
+    }
 
-      //if (mandatory_close)
-      // you always close here, no matter mandatory_close or not.
-      // however different ftp servers respond differently, see below.
-      socket.close();
+    // if (mandatory_close)
+    // you always close here, no matter mandatory_close or not.
+    // however different ftp servers respond differently, see below.
+    socket.close();
 
-      // scenarios:
-      // (1) mandatory_close is false, download limit not reached
-      //     no special care here
-      // (2) mandatory_close is true, download limit is reached
-      //     different servers have different reply codes:
+    // scenarios:
+    // (1) mandatory_close is false, download limit not reached
+    // no special care here
+    // (2) mandatory_close is true, download limit is reached
+    // different servers have different reply codes:
 
-      // do not need this
-      //sendCommand("ABOR");
+    // do not need this
+    // sendCommand("ABOR");
 
-      try {
-        int reply = getReply();
-        if (!_notBadReply(reply))
-          throw new FtpExceptionUnknownForcedDataClose(getReplyString());
-      } catch (FTPConnectionClosedException e) {
-        // some ftp servers will close control channel if data channel socket
-        // is closed by our end before all data has been read out. Check:
-        // tux414.q-tam.hp.com FTP server (hp.com version whp02)
-        // so must catch FTPConnectionClosedException thrown by getReply() above
-        //disconnect();
-        throw new FtpExceptionControlClosedByForcedDataClose(e.getMessage());
-      }
-
+    try {
+      int reply = getReply();
+      if (!_notBadReply(reply))
+        throw new FtpExceptionUnknownForcedDataClose(getReplyString());
+    } catch (FTPConnectionClosedException e) {
+      // some ftp servers will close control channel if data channel
+      // socket
+      // is closed by our end before all data has been read out. Check:
+      // tux414.q-tam.hp.com FTP server (hp.com version whp02)
+      // so must catch FTPConnectionClosedException thrown by getReply()
+      // above
+      // disconnect();
+      throw new FtpExceptionControlClosedByForcedDataClose(e.getMessage());
     }
 
-    // reply check after closing data connection
-    private boolean _notBadReply(int reply) {
+  }
 
-      if (FTPReply.isPositiveCompletion(reply)) {
-        // do nothing
-      } else if (reply == 426) { // FTPReply.TRANSFER_ABORTED
+  // reply check after closing data connection
+  private boolean _notBadReply(int reply) {
+
+    if (FTPReply.isPositiveCompletion(reply)) {
+      // do nothing
+    } else if (reply == 426) { // FTPReply.TRANSFER_ABORTED
       // some ftp servers reply 426, e.g.,
       // foggy FTP server (Version wu-2.6.2(2)
-        // there is second reply witing? no!
-        //getReply();
-      } else if (reply == 450) { // FTPReply.FILE_ACTION_NOT_TAKEN
+      // there is second reply witing? no!
+      // getReply();
+    } else if (reply == 450) { // FTPReply.FILE_ACTION_NOT_TAKEN
       // some ftp servers reply 450, e.g.,
       // ProFTPD [ftp.kernel.org]
-        // there is second reply witing? no!
-        //getReply();
-      } else if (reply == 451) { // FTPReply.ACTION_ABORTED
+      // there is second reply witing? no!
+      // getReply();
+    } else if (reply == 451) { // FTPReply.ACTION_ABORTED
       // some ftp servers reply 451, e.g.,
       // ProFTPD [ftp.kernel.org]
-        // there is second reply witing? no!
-        //getReply();
-      } else if (reply == 451) { // FTPReply.ACTION_ABORTED
-      } else {
+      // there is second reply witing? no!
+      // getReply();
+    } else if (reply == 451) { // FTPReply.ACTION_ABORTED
+    } else {
       // what other kind of ftp server out there?
-        return false;
-      }
+      return false;
+    }
 
+    return true;
+  }
+
+  /***
+   * Sets the file type to be transferred. This should be one of
+   * <code> FTP.ASCII_FILE_TYPE </code>, <code> FTP.IMAGE_FILE_TYPE </code>,
+   * etc. The file type only needs to be set when you want to change the type.
+   * After changing it, the new type stays in effect until you change it again.
+   * The default file type is <code> FTP.ASCII_FILE_TYPE </code> if this method
+   * is never called.
+   * <p>
+   * 
+   * @param fileType
+   *          The <code> _FILE_TYPE </code> constant indcating the type of file.
+   * @return True if successfully completed, false if not.
+   * @exception FTPConnectionClosedException
+   *              If the FTP server prematurely closes the connection as a
+   *              result of the client being idle or some other reason causing
+   *              the server to send FTP reply code 421. This exception may be
+   *              caught either as an IOException or independently as itself.
+   * @exception IOException
+   *              If an I/O error occurs while either sending a command to the
+   *              server or receiving a reply from the server.
+   ***/
+  public boolean setFileType(int fileType) throws IOException {
+    if (FTPReply.isPositiveCompletion(type(fileType))) {
+      __fileType = fileType;
+      __fileFormat = FTP.NON_PRINT_TEXT_FORMAT;
       return true;
     }
+    return false;
+  }
 
-    /***
-     * Sets the file type to be transferred.  This should be one of 
-     * <code> FTP.ASCII_FILE_TYPE </code>, <code> FTP.IMAGE_FILE_TYPE </code>,
-     * etc.  The file type only needs to be set when you want to change the
-     * type.  After changing it, the new type stays in effect until you change
-     * it again.  The default file type is <code> FTP.ASCII_FILE_TYPE </code>
-     * if this method is never called.
-     * <p>
-     * @param fileType The <code> _FILE_TYPE </code> constant indcating the
-     *                 type of file.
-     * @return True if successfully completed, false if not.
-     * @exception FTPConnectionClosedException
-     *      If the FTP server prematurely closes the connection as a result
-     *      of the client being idle or some other reason causing the server
-     *      to send FTP reply code 421.  This exception may be caught either
-     *      as an IOException or independently as itself.
-     * @exception IOException  If an I/O error occurs while either sending a
-     *      command to the server or receiving a reply from the server.
-     ***/
-    public boolean setFileType(int fileType) throws IOException
-    {
-        if (FTPReply.isPositiveCompletion(type(fileType)))
-        {
-            __fileType = fileType;
-            __fileFormat = FTP.NON_PRINT_TEXT_FORMAT;
-            return true;
-        }
-        return false;
+  /***
+   * Fetches the system type name from the server and returns the string. This
+   * value is cached for the duration of the connection after the first call to
+   * this method. In other words, only the first time that you invoke this
+   * method will it issue a SYST command to the FTP server. FTPClient will
+   * remember the value and return the cached value until a call to disconnect.
+   * <p>
+   * 
+   * @return The system type name obtained from the server. null if the
+   *         information could not be obtained.
+   * @exception FTPConnectionClosedException
+   *              If the FTP server prematurely closes the connection as a
+   *              result of the client being idle or some other reason causing
+   *              the server to send FTP reply code 421. This exception may be
+   *              caught either as an IOException or independently as itself.
+   * @exception IOException
+   *              If an I/O error occurs while either sending a command to the
+   *              server or receiving a reply from the server.
+   ***/
+  public String getSystemName() throws IOException, FtpExceptionBadSystResponse {
+    // if (syst() == FTPReply.NAME_SYSTEM_TYPE)
+    // Technically, we should expect a NAME_SYSTEM_TYPE response, but
+    // in practice FTP servers deviate, so we soften the condition to
+    // a positive completion.
+    if (__systemName == null && FTPReply.isPositiveCompletion(syst())) {
+      __systemName = (getReplyStrings()[0]).substring(4);
+    } else {
+      throw new FtpExceptionBadSystResponse("Bad response of SYST: "
+          + getReplyString());
     }
 
-    /***
-     * Fetches the system type name from the server and returns the string.
-     * This value is cached for the duration of the connection after the
-     * first call to this method.  In other words, only the first time
-     * that you invoke this method will it issue a SYST command to the
-     * FTP server.  FTPClient will remember the value and return the
-     * cached value until a call to disconnect.
-     * <p>
-     * @return The system type name obtained from the server.  null if the
-     *       information could not be obtained.
-     * @exception FTPConnectionClosedException
-     *      If the FTP server prematurely closes the connection as a result
-     *      of the client being idle or some other reason causing the server
-     *      to send FTP reply code 421.  This exception may be caught either
-     *      as an IOException or independently as itself.
-     * @exception IOException  If an I/O error occurs while either sending a
-     *  command to the server or receiving a reply from the server.
-     ***/
-    public String getSystemName()
-      throws IOException, FtpExceptionBadSystResponse
-    {
-      //if (syst() == FTPReply.NAME_SYSTEM_TYPE)
-      // Technically, we should expect a NAME_SYSTEM_TYPE response, but
-      // in practice FTP servers deviate, so we soften the condition to
-      // a positive completion.
-        if (__systemName == null && FTPReply.isPositiveCompletion(syst())) {
-            __systemName = (getReplyStrings()[0]).substring(4);
-        } else {
-            throw new FtpExceptionBadSystResponse(
-              "Bad response of SYST: " + getReplyString());
-        }
+    return __systemName;
+  }
 
-        return __systemName;
-    }
+  /***
+   * Sends a NOOP command to the FTP server. This is useful for preventing
+   * server timeouts.
+   * <p>
+   * 
+   * @return True if successfully completed, false if not.
+   * @exception FTPConnectionClosedException
+   *              If the FTP server prematurely closes the connection as a
+   *              result of the client being idle or some other reason causing
+   *              the server to send FTP reply code 421. This exception may be
+   *              caught either as an IOException or independently as itself.
+   * @exception IOException
+   *              If an I/O error occurs while either sending a command to the
+   *              server or receiving a reply from the server.
+   ***/
+  public boolean sendNoOp() throws IOException {
+    return FTPReply.isPositiveCompletion(noop());
+  }
 
-    /***
-     * Sends a NOOP command to the FTP server.  This is useful for preventing
-     * server timeouts.
-     * <p>
-     * @return True if successfully completed, false if not.
-     * @exception FTPConnectionClosedException
-     *      If the FTP server prematurely closes the connection as a result
-     *      of the client being idle or some other reason causing the server
-     *      to send FTP reply code 421.  This exception may be caught either
-     *      as an IOException or independently as itself.
-     * @exception IOException  If an I/O error occurs while either sending a
-     *      command to the server or receiving a reply from the server.
-     ***/
-    public boolean sendNoOp() throws IOException
-    {
-        return FTPReply.isPositiveCompletion(noop());
-    }
+  // client.stat(path);
+  // client.sendCommand("STAT");
+  // client.sendCommand("STAT",path);
+  // client.sendCommand("MDTM",path);
+  // client.sendCommand("SIZE",path);
+  // client.sendCommand("HELP","SITE");
+  // client.sendCommand("SYST");
+  // client.setRestartOffset(120);
 
-//    client.stat(path);
-//    client.sendCommand("STAT");
-//    client.sendCommand("STAT",path);
-//    client.sendCommand("MDTM",path);
-//    client.sendCommand("SIZE",path);
-//    client.sendCommand("HELP","SITE");
-//    client.sendCommand("SYST");
-//    client.setRestartOffset(120);
-
 }
Index: src/plugin/creativecommons/src/test/org/creativecommons/nutch/TestCCParseFilter.java
===================================================================
--- src/plugin/creativecommons/src/test/org/creativecommons/nutch/TestCCParseFilter.java	(revision 1188252)
+++ src/plugin/creativecommons/src/test/org/creativecommons/nutch/TestCCParseFilter.java	(working copy)
@@ -32,29 +32,27 @@
 
 public class TestCCParseFilter extends TestCase {
 
-  private static final File testDir =
-    new File(System.getProperty("test.input"));
+  private static final File testDir = new File(System.getProperty("test.input"));
 
   public void testPages() throws Exception {
     pageTest(new File(testDir, "anchor.html"), "http://foo.com/",
-             "http://creativecommons.org/licenses/by-nc-sa/1.0", "a", null);
+        "http://creativecommons.org/licenses/by-nc-sa/1.0", "a", null);
     // Tika returns <a> whereas parse-html returns <rel>
     // check later
     pageTest(new File(testDir, "rel.html"), "http://foo.com/",
-             "http://creativecommons.org/licenses/by-nc/2.0", "rel", null);
+        "http://creativecommons.org/licenses/by-nc/2.0", "rel", null);
     // Tika returns <a> whereas parse-html returns <rdf>
     // check later
     pageTest(new File(testDir, "rdf.html"), "http://foo.com/",
-             "http://creativecommons.org/licenses/by-nc/1.0", "rdf", "text");
+        "http://creativecommons.org/licenses/by-nc/1.0", "rdf", "text");
   }
 
-  public void pageTest(File file, String url,
-                       String license, String location, String type)
-    throws Exception {
+  public void pageTest(File file, String url, String license, String location,
+      String type) throws Exception {
 
     String contentType = "text/html";
     InputStream in = new FileInputStream(file);
-    ByteArrayOutputStream out = new ByteArrayOutputStream((int)file.length());
+    ByteArrayOutputStream out = new ByteArrayOutputStream((int) file.length());
     byte[] buffer = new byte[1024];
     int i;
     while ((i = in.read(buffer)) != -1) {
@@ -64,14 +62,13 @@
     byte[] bytes = out.toByteArray();
     Configuration conf = NutchConfiguration.create();
 
-    Content content =
-      new Content(url, url, bytes, contentType, new Metadata(), conf);
-    Parse parse =  new ParseUtil(conf).parse(content).get(content.getUrl());
-    
+    Content content = new Content(url, url, bytes, contentType, new Metadata(),
+        conf);
+    Parse parse = new ParseUtil(conf).parse(content).get(content.getUrl());
+
     Metadata metadata = parse.getData().getParseMeta();
     assertEquals(license, metadata.get("License-Url"));
     assertEquals(location, metadata.get("License-Location"));
     assertEquals(type, metadata.get("Work-Type"));
   }
 }
-
Index: src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCIndexingFilter.java
===================================================================
--- src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCIndexingFilter.java	(revision 1188252)
+++ src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCIndexingFilter.java	(working copy)
@@ -41,16 +41,17 @@
 
 /** Adds basic searchable fields to a document. */
 public class CCIndexingFilter implements IndexingFilter {
-  public static final Logger LOG = LoggerFactory.getLogger(CCIndexingFilter.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(CCIndexingFilter.class);
 
   /** The name of the document field we use. */
   public static String FIELD = "cc";
 
   private Configuration conf;
 
-  public NutchDocument filter(NutchDocument doc, Parse parse, Text url, CrawlDatum datum, Inlinks inlinks)
-    throws IndexingException {
-    
+  public NutchDocument filter(NutchDocument doc, Parse parse, Text url,
+      CrawlDatum datum, Inlinks inlinks) throws IndexingException {
+
     Metadata metadata = parse.getData().getParseMeta();
     // index the license
     String licenseUrl = metadata.get(CreativeCommons.LICENSE_URL);
@@ -81,9 +82,11 @@
     return doc;
   }
 
-  /** Add the features represented by a license URL.  Urls are of the form
+  /**
+   * Add the features represented by a license URL. Urls are of the form
    * "http://creativecommons.org/licenses/xx-xx/xx/xx", where "xx" names a
-   * license feature. */
+   * license feature.
+   */
   public void addUrlFeatures(NutchDocument doc, String urlString) {
     try {
       URL url = new URL(urlString);
@@ -92,7 +95,7 @@
       StringTokenizer names = new StringTokenizer(url.getPath(), "/-");
 
       if (names.hasMoreTokens())
-        names.nextToken();                        // throw away "licenses"
+        names.nextToken(); // throw away "licenses"
 
       // add a feature per component after "licenses"
       while (names.hasMoreTokens()) {
@@ -105,7 +108,7 @@
       }
     }
   }
-  
+
   private void addFeature(NutchDocument doc, String feature) {
     doc.add(FIELD, feature);
   }
Index: src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCParseFilter.java
===================================================================
--- src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCParseFilter.java	(revision 1188252)
+++ src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCParseFilter.java	(working copy)
@@ -34,27 +34,25 @@
 import org.xml.sax.InputSource;
 import org.w3c.dom.*;
 
-
 /** Adds metadata identifying the Creative Commons license used, if any. */
 public class CCParseFilter implements HtmlParseFilter {
   public static final Logger LOG = LoggerFactory.getLogger(CCParseFilter.class);
 
-
-  /** Walks DOM tree, looking for RDF in comments and licenses in anchors.*/
+  /** Walks DOM tree, looking for RDF in comments and licenses in anchors. */
   public static class Walker {
-    private URL base;                             // base url of page
-    private String rdfLicense;                    // subject url found, if any
-    private URL relLicense;                       // license url found, if any
-    private URL anchorLicense;                    // anchor url found, if any
-    private String workType;                      // work type URI
+    private URL base; // base url of page
+    private String rdfLicense; // subject url found, if any
+    private URL relLicense; // license url found, if any
+    private URL anchorLicense; // anchor url found, if any
+    private String workType; // work type URI
 
     private Walker(URL base) {
       this.base = base;
     }
 
-    /** Scan the document adding attributes to metadata.*/
-    public static void walk(Node doc, URL base, Metadata metadata, Configuration conf)
-      throws ParseException {
+    /** Scan the document adding attributes to metadata. */
+    public static void walk(Node doc, URL base, Metadata metadata,
+        Configuration conf) throws ParseException {
 
       // walk the DOM tree, scanning for license data
       Walker walker = new Walker(base);
@@ -63,13 +61,15 @@
       // interpret results of walk
       String licenseUrl = null;
       String licenseLocation = null;
-      if (walker.rdfLicense != null) {            // 1st choice: subject in RDF
+      if (walker.rdfLicense != null) { // 1st choice: subject in RDF
         licenseLocation = "rdf";
         licenseUrl = walker.rdfLicense;
-      } else if (walker.relLicense != null) {     // 2nd: anchor w/ rel=license
+      } else if (walker.relLicense != null) { // 2nd: anchor w/
+        // rel=license
         licenseLocation = "rel";
         licenseUrl = walker.relLicense.toString();
-      } else if (walker.anchorLicense != null) {  // 3rd: anchor w/ CC license
+      } else if (walker.anchorLicense != null) { // 3rd: anchor w/ CC
+        // license
         licenseLocation = "a";
         licenseUrl = walker.anchorLicense.toString();
       } else if (conf.getBoolean("creativecommons.exclude.unlicensed", false)) {
@@ -79,7 +79,8 @@
       // add license to metadata
       if (licenseUrl != null) {
         if (LOG.isInfoEnabled()) {
-          LOG.info("CC: found "+licenseUrl+" in "+licenseLocation+" of "+base);
+          LOG.info("CC: found " + licenseUrl + " in " + licenseLocation
+              + " of " + base);
         }
         metadata.add(CreativeCommons.LICENSE_URL, licenseUrl);
         metadata.add(CreativeCommons.LICENSE_LOCATION, licenseLocation);
@@ -87,36 +88,38 @@
 
       if (walker.workType != null) {
         if (LOG.isInfoEnabled()) {
-          LOG.info("CC: found "+walker.workType+" in "+base);
+          LOG.info("CC: found " + walker.workType + " in " + base);
         }
         metadata.add(CreativeCommons.WORK_TYPE, walker.workType);
       }
 
     }
 
-    /** Scan the document looking for RDF in comments and license elements.*/
+    /** Scan the document looking for RDF in comments and license elements. */
     private void walk(Node node) {
-      
+
       // check element nodes for license URL
       if (node instanceof Element) {
-        findLicenseUrl((Element)node);
+        findLicenseUrl((Element) node);
       }
 
       // check comment nodes for license RDF
       if (node instanceof Comment) {
-        findRdf(((Comment)node).getData());
+        findRdf(((Comment) node).getData());
       }
 
       // recursively walk child nodes
       NodeList children = node.getChildNodes();
-      for (int i = 0; children != null && i < children.getLength(); i++ ) {
+      for (int i = 0; children != null && i < children.getLength(); i++) {
         walk(children.item(i));
       }
     }
 
-    /** Extract license url from element, if any.  Thse are the href attribute
-     * of anchor elements with rel="license".  These must also point to
-     * http://creativecommons.org/licenses/. */
+    /**
+     * Extract license url from element, if any. Thse are the href attribute of
+     * anchor elements with rel="license". These must also point to
+     * http://creativecommons.org/licenses/.
+     */
     private void findLicenseUrl(Element element) {
       // only look in Anchor elements
       if (!"a".equalsIgnoreCase(element.getTagName()))
@@ -126,54 +129,52 @@
       String href = element.getAttribute("href");
       if (href == null)
         return;
-      
+
       try {
-        URL url = new URL(base, href);            // resolve the url
+        URL url = new URL(base, href); // resolve the url
 
         // check that it's a CC license URL
-        if ("http".equalsIgnoreCase(url.getProtocol()) &&
-            "creativecommons.org".equalsIgnoreCase(url.getHost()) &&
-            url.getPath() != null &&
-            url.getPath().startsWith("/licenses/") &&
-            url.getPath().length() > "/licenses/".length()) {
+        if ("http".equalsIgnoreCase(url.getProtocol())
+            && "creativecommons.org".equalsIgnoreCase(url.getHost())
+            && url.getPath() != null && url.getPath().startsWith("/licenses/")
+            && url.getPath().length() > "/licenses/".length()) {
 
           // check rel="license"
           String rel = element.getAttribute("rel");
           if (rel != null && "license".equals(rel) && this.relLicense == null) {
-            this.relLicense = url;                   // found rel license
+            this.relLicense = url; // found rel license
           } else if (this.anchorLicense == null) {
-            this.anchorLicense = url;             // found anchor license
+            this.anchorLicense = url; // found anchor license
           }
         }
-      } catch (MalformedURLException e) {         // ignore malformed urls
+      } catch (MalformedURLException e) { // ignore malformed urls
       }
     }
 
-   /** Configure a namespace aware XML parser. */
-    private static final DocumentBuilderFactory FACTORY
-      = DocumentBuilderFactory.newInstance();
+    /** Configure a namespace aware XML parser. */
+    private static final DocumentBuilderFactory FACTORY = DocumentBuilderFactory
+        .newInstance();
     static {
       FACTORY.setNamespaceAware(true);
     }
 
     /** Creative Commons' namespace URI. */
     private static final String CC_NS = "http://web.resource.org/cc/";
-    
+
     /** Dublin Core namespace URI. */
     private static final String DC_NS = "http://purl.org/dc/elements/1.1/";
-    
+
     /** RDF syntax namespace URI. */
-    private static final String RDF_NS
-      = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
+    private static final String RDF_NS = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
 
     private void findRdf(String comment) {
       // first check for likely RDF in comment
       int rdfPosition = comment.indexOf("RDF");
       if (rdfPosition < 0)
-        return;                                   // no RDF, abort
+        return; // no RDF, abort
       int nsPosition = comment.indexOf(CC_NS);
       if (nsPosition < 0)
-        return;                                   // no RDF, abort
+        return; // no RDF, abort
 
       // try to parse the XML
       Document doc;
@@ -182,28 +183,30 @@
         doc = parser.parse(new InputSource(new StringReader(comment)));
       } catch (Exception e) {
         if (LOG.isWarnEnabled()) {
-          LOG.warn("CC: Failed to parse RDF in "+base+": "+e);
+          LOG.warn("CC: Failed to parse RDF in " + base + ": " + e);
         }
-        //e.printStackTrace();
+        // e.printStackTrace();
         return;
       }
 
       // check that root is rdf:RDF
       NodeList roots = doc.getElementsByTagNameNS(RDF_NS, "RDF");
       if (roots.getLength() != 1) {
-        if (LOG.isWarnEnabled()) { LOG.warn("CC: No RDF root in "+base); }
+        if (LOG.isWarnEnabled()) {
+          LOG.warn("CC: No RDF root in " + base);
+        }
         return;
       }
-      Element rdf = (Element)roots.item(0);
+      Element rdf = (Element) roots.item(0);
 
       // get cc:License nodes inside rdf:RDF
       NodeList licenses = rdf.getElementsByTagNameNS(CC_NS, "License");
       for (int i = 0; i < licenses.getLength(); i++) {
 
-        Element l = (Element)licenses.item(i);
+        Element l = (Element) licenses.item(i);
 
         // license is rdf:about= attribute from cc:License
-        this.rdfLicense = l.getAttributeNodeNS(RDF_NS,"about").getValue();
+        this.rdfLicense = l.getAttributeNodeNS(RDF_NS, "about").getValue();
 
         // walk predicates of cc:License
         NodeList predicates = l.getChildNodes();
@@ -211,7 +214,7 @@
           Node predicateNode = predicates.item(j);
           if (!(predicateNode instanceof Element))
             continue;
-          Element predicateElement = (Element)predicateNode;
+          Element predicateElement = (Element) predicateNode;
 
           // extract predicates of cc:xxx predicates
           if (!CC_NS.equals(predicateElement.getNamespaceURI())) {
@@ -220,13 +223,13 @@
           String predicate = predicateElement.getLocalName();
 
           // object is rdf:resource from cc:xxx predicates
-          String object =
-            predicateElement.getAttributeNodeNS(RDF_NS, "resource").getValue();
-        
+          String object = predicateElement.getAttributeNodeNS(RDF_NS,
+              "resource").getValue();
+
           // add object and predicate to metadata
           // metadata.put(object, predicate);
           // if (LOG.isInfoEnabled()) {
-          //   LOG.info("CC: found: "+predicate+"="+object);
+          // LOG.info("CC: found: "+predicate+"="+object);
           // }
         }
       }
@@ -234,15 +237,15 @@
       // get cc:Work nodes from rdf:RDF
       NodeList works = rdf.getElementsByTagNameNS(CC_NS, "Work");
       for (int i = 0; i < works.getLength(); i++) {
-        Element l = (Element)works.item(i);
-        
+        Element l = (Element) works.item(i);
+
         // get dc:type nodes from cc:Work
         NodeList types = rdf.getElementsByTagNameNS(DC_NS, "type");
         for (int j = 0; j < types.getLength(); j++) {
-          Element type = (Element)types.item(j);
-          String workUri = 
-            type.getAttributeNodeNS(RDF_NS, "resource").getValue();
-          this.workType = (String)WORK_TYPE_NAMES.get(workUri);
+          Element type = (Element) types.item(j);
+          String workUri = type.getAttributeNodeNS(RDF_NS, "resource")
+              .getValue();
+          this.workType = (String) WORK_TYPE_NAMES.get(workUri);
           break;
         }
       }
@@ -255,16 +258,20 @@
     WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/StillImage", "image");
     WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Sound", "audio");
     WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Text", "text");
-    WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Interactive", "interactive");
+    WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Interactive",
+        "interactive");
     WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Software", "software");
     WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Image", "image");
   }
 
   private Configuration conf;
 
-  /** Adds metadata or otherwise modifies a parse of an HTML document, given
-   * the DOM tree of a page. */
-  public ParseResult filter(Content content, ParseResult parseResult, HTMLMetaTags metaTags, DocumentFragment doc) {
+  /**
+   * Adds metadata or otherwise modifies a parse of an HTML document, given the
+   * DOM tree of a page.
+   */
+  public ParseResult filter(Content content, ParseResult parseResult,
+      HTMLMetaTags metaTags, DocumentFragment doc) {
 
     // get parse obj
     Parse parse = parseResult.get(content.getUrl());
@@ -275,9 +282,8 @@
       base = new URL(content.getBaseUrl());
     } catch (MalformedURLException e) {
       Parse emptyParse = new ParseStatus(e).getEmptyParse(getConf());
-      parseResult.put(content.getUrl(), 
-                      new ParseText(emptyParse.getText()), 
-                      emptyParse.getData());
+      parseResult.put(content.getUrl(), new ParseText(emptyParse.getText()),
+          emptyParse.getData());
       return parseResult;
     }
 
@@ -286,9 +292,8 @@
       Walker.walk(doc, base, parse.getData().getParseMeta(), getConf());
     } catch (ParseException e) {
       Parse emptyParse = new ParseStatus(e).getEmptyParse(getConf());
-      parseResult.put(content.getUrl(), 
-                      new ParseText(emptyParse.getText()), 
-                      emptyParse.getData());
+      parseResult.put(content.getUrl(), new ParseText(emptyParse.getText()),
+          emptyParse.getData());
       return parseResult;
     }
 
Index: src/plugin/parse-tika/src/test/org/apache/nutch/tika/TestFeedParser.java
===================================================================
--- src/plugin/parse-tika/src/test/org/apache/nutch/tika/TestFeedParser.java	(revision 1188252)
+++ src/plugin/parse-tika/src/test/org/apache/nutch/tika/TestFeedParser.java	(working copy)
@@ -45,86 +45,85 @@
  */
 public class TestFeedParser extends TestCase {
 
-	private String fileSeparator = System.getProperty("file.separator");
+  private String fileSeparator = System.getProperty("file.separator");
 
-	// This system property is defined in ./src/plugin/build-plugin.xml
-	private String sampleDir = System.getProperty("test.data", ".");
+  // This system property is defined in ./src/plugin/build-plugin.xml
+  private String sampleDir = System.getProperty("test.data", ".");
 
-	private String[] sampleFiles = { "rsstest.rss" };
+  private String[] sampleFiles = { "rsstest.rss" };
 
-	public static final Logger LOG = LoggerFactory.getLogger(TestFeedParser.class
-			.getName());
+  public static final Logger LOG = LoggerFactory.getLogger(TestFeedParser.class
+      .getName());
 
-	/**
-	 * Default Constructor.
-	 * 
-	 * @param name
-	 *            The name of this {@link TestCase}.
-	 */
-	public TestFeedParser(String name) {
-		super(name);
-	}
+  /**
+   * Default Constructor.
+   * 
+   * @param name
+   *          The name of this {@link TestCase}.
+   */
+  public TestFeedParser(String name) {
+    super(name);
+  }
 
-	/**
-	 * <p>
-	 * The test method: tests out the following 2 asserts:
-	 * </p>
-	 * 
-	 * <ul>
-	 * <li>There are 3 outlinks read from the sample rss file</li>
-	 * <li>The 3 outlinks read are in fact the correct outlinks from the sample
-	 * file</li>
-	 * </ul>
-	 */
-	public void testIt() throws ProtocolException, ParseException {
-		String urlString;
-		Protocol protocol;
-		Content content;
-		Parse parse;
+  /**
+   * <p>
+   * The test method: tests out the following 2 asserts:
+   * </p>
+   * 
+   * <ul>
+   * <li>There are 3 outlinks read from the sample rss file</li>
+   * <li>The 3 outlinks read are in fact the correct outlinks from the sample
+   * file</li>
+   * </ul>
+   */
+  public void testIt() throws ProtocolException, ParseException {
+    String urlString;
+    Protocol protocol;
+    Content content;
+    Parse parse;
 
-		Configuration conf = NutchConfiguration.create();
-		for (int i = 0; i < sampleFiles.length; i++) {
-			urlString = "file:" + sampleDir + fileSeparator + sampleFiles[i];
+    Configuration conf = NutchConfiguration.create();
+    for (int i = 0; i < sampleFiles.length; i++) {
+      urlString = "file:" + sampleDir + fileSeparator + sampleFiles[i];
 
-			protocol = new ProtocolFactory(conf).getProtocol(urlString);
-			content = protocol.getProtocolOutput(new Text(urlString),
-					new CrawlDatum()).getContent();
-			parse = new ParseUtil(conf).parseByExtensionId("parse-tika",
-					content).get(content.getUrl());
+      protocol = new ProtocolFactory(conf).getProtocol(urlString);
+      content = protocol.getProtocolOutput(new Text(urlString),
+          new CrawlDatum()).getContent();
+      parse = new ParseUtil(conf).parseByExtensionId("parse-tika", content)
+          .get(content.getUrl());
 
-			// check that there are 2 outlinks:
-			// unlike the original parse-rss
-			// tika ignores the URL and description of the channel
+      // check that there are 2 outlinks:
+      // unlike the original parse-rss
+      // tika ignores the URL and description of the channel
 
-			// http://test.channel.com
-			// http://www-scf.usc.edu/~mattmann/
-			// http://www.nutch.org
+      // http://test.channel.com
+      // http://www-scf.usc.edu/~mattmann/
+      // http://www.nutch.org
 
-			ParseData theParseData = parse.getData();
+      ParseData theParseData = parse.getData();
 
-			Outlink[] theOutlinks = theParseData.getOutlinks();
+      Outlink[] theOutlinks = theParseData.getOutlinks();
 
-			assertTrue("There aren't 2 outlinks read!",
-					theOutlinks.length == 2);
+      assertTrue("There aren't 2 outlinks read!", theOutlinks.length == 2);
 
-			// now check to make sure that those are the two outlinks
-			boolean hasLink1 = false, hasLink2 = false;
+      // now check to make sure that those are the two outlinks
+      boolean hasLink1 = false, hasLink2 = false;
 
-			for (int j = 0; j < theOutlinks.length; j++) {
-				if (theOutlinks[j].getToUrl().equals(
-						"http://www-scf.usc.edu/~mattmann/")) {
-					hasLink1 = true;
-				}
+      for (int j = 0; j < theOutlinks.length; j++) {
+        if (theOutlinks[j].getToUrl().equals(
+            "http://www-scf.usc.edu/~mattmann/")) {
+          hasLink1 = true;
+        }
 
-				if (theOutlinks[j].getToUrl().equals("http://www.nutch.org/")) {
-					hasLink2 = true;
-				}
-			}
+        if (theOutlinks[j].getToUrl().equals("http://www.nutch.org/")) {
+          hasLink2 = true;
+        }
+      }
 
-			if (!hasLink1 || !hasLink2) {
-				fail("Outlinks read from sample rss file are not correct!");
-			}
-		}
-	}
+      if (!hasLink1 || !hasLink2) {
+        fail("Outlinks read from sample rss file are not correct!");
+      }
+    }
+  }
 
 }
Index: src/plugin/parse-tika/src/test/org/apache/nutch/tika/TestPdfParser.java
===================================================================
--- src/plugin/parse-tika/src/test/org/apache/nutch/tika/TestPdfParser.java	(revision 1188252)
+++ src/plugin/parse-tika/src/test/org/apache/nutch/tika/TestPdfParser.java	(working copy)
@@ -33,33 +33,32 @@
 
 import junit.framework.TestCase;
 
-/** 
+/**
  * Unit tests for PdfParser.
- *
+ * 
  * @author John Xing
  */
 public class TestPdfParser extends TestCase {
 
   private String fileSeparator = System.getProperty("file.separator");
   // This system property is defined in ./src/plugin/build-plugin.xml
-  private String sampleDir = System.getProperty("test.data",".");
+  private String sampleDir = System.getProperty("test.data", ".");
   // Make sure sample files are copied to "test.data" as specified in
   // ./src/plugin/parse-pdf/build.xml during plugin compilation.
   // Check ./src/plugin/parse-pdf/sample/README.txt for what they are.
-  private String[] sampleFiles = {
-      "pdftest.pdf",
-      "encrypted.pdf"
-  };
+  private String[] sampleFiles = { "pdftest.pdf", "encrypted.pdf" };
 
   private String expectedText = "A VERY SMALL PDF FILE";
 
-  public TestPdfParser(String name) { 
-    super(name); 
+  public TestPdfParser(String name) {
+    super(name);
   }
 
-  protected void setUp() {}
+  protected void setUp() {
+  }
 
-  protected void tearDown() {}
+  protected void tearDown() {
+  }
 
   public void testIt() throws ProtocolException, ParseException {
     String urlString;
@@ -72,8 +71,10 @@
 
       Configuration conf = NutchConfiguration.create();
       protocol = new ProtocolFactory(conf).getProtocol(urlString);
-      content = protocol.getProtocolOutput(new Text(urlString), new CrawlDatum()).getContent();
-      parse = new ParseUtil(conf).parseByExtensionId("parse-tika", content).get(content.getUrl());
+      content = protocol.getProtocolOutput(new Text(urlString),
+          new CrawlDatum()).getContent();
+      parse = new ParseUtil(conf).parseByExtensionId("parse-tika", content)
+          .get(content.getUrl());
 
       int index = parse.getText().indexOf(expectedText);
       assertTrue(index > 0);
Index: src/plugin/parse-tika/src/test/org/apache/nutch/tika/TestMSWordParser.java
===================================================================
--- src/plugin/parse-tika/src/test/org/apache/nutch/tika/TestMSWordParser.java	(revision 1188252)
+++ src/plugin/parse-tika/src/test/org/apache/nutch/tika/TestMSWordParser.java	(working copy)
@@ -37,27 +37,27 @@
 
 import junit.framework.TestCase;
 
-/** 
+/**
  * Unit tests for MSWordParser.
- *
+ * 
  * @author John Xing
  */
 public class TestMSWordParser extends TestCase {
 
   private String fileSeparator = System.getProperty("file.separator");
   // This system property is defined in ./src/plugin/build-plugin.xml
-  private String sampleDir = System.getProperty("test.data",".");
+  private String sampleDir = System.getProperty("test.data", ".");
   // Make sure sample files are copied to "test.data" as specified in
   // ./src/plugin/parse-msword/build.xml during plugin compilation.
   // Check ./src/plugin/parse-msword/sample/README.txt for what they are.
-  private String[] sampleFiles = {"word97.doc"};
+  private String[] sampleFiles = { "word97.doc" };
 
   private String expectedText = "This is a sample doc file prepared for nutch.";
-  
+
   private Configuration conf;
 
-  public TestMSWordParser(String name) { 
-    super(name); 
+  public TestMSWordParser(String name) {
+    super(name);
   }
 
   protected void setUp() {
@@ -65,28 +65,34 @@
     conf.set("file.content.limit", "-1");
   }
 
-  protected void tearDown() {}
+  protected void tearDown() {
+  }
 
-  public String getTextContent(String fileName) throws ProtocolException, ParseException {
+  public String getTextContent(String fileName) throws ProtocolException,
+      ParseException {
     String urlString = "file:" + sampleDir + fileSeparator + fileName;
     Protocol protocol = new ProtocolFactory(conf).getProtocol(urlString);
-    Content content = protocol.getProtocolOutput(new Text(urlString), new CrawlDatum()).getContent();
-    Parse parse = new ParseUtil(conf).parseByExtensionId("parse-tika", content).get(content.getUrl());
+    Content content = protocol.getProtocolOutput(new Text(urlString),
+        new CrawlDatum()).getContent();
+    Parse parse = new ParseUtil(conf).parseByExtensionId("parse-tika", content)
+        .get(content.getUrl());
     return parse.getText();
   }
-  
+
   public void testIt() throws ProtocolException, ParseException {
-    for (int i=0; i<sampleFiles.length; i++) {
+    for (int i = 0; i < sampleFiles.length; i++) {
       String found = getTextContent(sampleFiles[i]);
-      assertTrue("text found : '"+found+"'",found.startsWith(expectedText));
+      assertTrue("text found : '" + found + "'", found.startsWith(expectedText));
     }
   }
 
   public void testOpeningDocs() throws ProtocolException, ParseException {
     String[] filenames = new File(sampleDir).list();
-      for (int i = 0; i < filenames.length; i++) {
-    	if (filenames[i].endsWith(".doc")==false) continue;
-        assertTrue("cann't read content of " + filenames[i], getTextContent(filenames[i]).length() > 0);
-      }      
+    for (int i = 0; i < filenames.length; i++) {
+      if (filenames[i].endsWith(".doc") == false)
+        continue;
+      assertTrue("cann't read content of " + filenames[i],
+          getTextContent(filenames[i]).length() > 0);
+    }
   }
 }
Index: src/plugin/parse-tika/src/test/org/apache/nutch/tika/TestRTFParser.java
===================================================================
--- src/plugin/parse-tika/src/test/org/apache/nutch/tika/TestRTFParser.java	(revision 1188252)
+++ src/plugin/parse-tika/src/test/org/apache/nutch/tika/TestRTFParser.java	(working copy)
@@ -44,48 +44,45 @@
  */
 public class TestRTFParser extends TestCase {
 
-	private String fileSeparator = System.getProperty("file.separator");
-	// This system property is defined in ./src/plugin/build-plugin.xml
-	private String sampleDir = System.getProperty("test.data", ".");
-	// Make sure sample files are copied to "test.data" as specified in
-	// ./src/plugin/parse-rtf/build.xml during plugin compilation.
-	// Check ./src/plugin/parse-rtf/sample/README.txt for what they are.
-	private String rtfFile = "test.rtf";
+  private String fileSeparator = System.getProperty("file.separator");
+  // This system property is defined in ./src/plugin/build-plugin.xml
+  private String sampleDir = System.getProperty("test.data", ".");
+  // Make sure sample files are copied to "test.data" as specified in
+  // ./src/plugin/parse-rtf/build.xml during plugin compilation.
+  // Check ./src/plugin/parse-rtf/sample/README.txt for what they are.
+  private String rtfFile = "test.rtf";
 
-	public TestRTFParser(String name) {
-		super(name);
-	}
+  public TestRTFParser(String name) {
+    super(name);
+  }
 
-	protected void setUp() {
-	}
+  protected void setUp() {
+  }
 
-	protected void tearDown() {
-	}
+  protected void tearDown() {
+  }
 
-	public void testIt() throws ProtocolException, ParseException {
-	  /* Temporarily disabled - see Tika-748
-
-		String urlString;
-		Protocol protocol;
-		Content content;
-		Parse parse;
-
-		Configuration conf = NutchConfiguration.create();
-		urlString = "file:" + sampleDir + fileSeparator + rtfFile;
-		protocol = new ProtocolFactory(conf).getProtocol(urlString);
-		content = protocol.getProtocolOutput(new Text(urlString),
-				new CrawlDatum()).getContent();
-		parse = new ParseUtil(conf).parseByExtensionId("parse-tika", content)
-				.get(content.getUrl());
-		String text = parse.getText();
-		assertEquals("The quick brown fox jumps over the lazy dog", text.trim());
-
-		String title = parse.getData().getTitle();
-		Metadata meta = parse.getData().getParseMeta();
-
-		// METADATA extraction is not yet supported in Tika
-		// assertEquals("test rft document", title);
-		// assertEquals("tests", meta.get(DublinCore.SUBJECT));
-  */
-	}
+  public void testIt() throws ProtocolException, ParseException {
+    /*
+     * Temporarily disabled - see Tika-748
+     * 
+     * String urlString; Protocol protocol; Content content; Parse parse;
+     * 
+     * Configuration conf = NutchConfiguration.create(); urlString = "file:" +
+     * sampleDir + fileSeparator + rtfFile; protocol = new
+     * ProtocolFactory(conf).getProtocol(urlString); content =
+     * protocol.getProtocolOutput(new Text(urlString), new
+     * CrawlDatum()).getContent(); parse = new
+     * ParseUtil(conf).parseByExtensionId("parse-tika", content)
+     * .get(content.getUrl()); String text = parse.getText();
+     * assertEquals("The quick brown fox jumps over the lazy dog", text.trim());
+     * 
+     * String title = parse.getData().getTitle(); Metadata meta =
+     * parse.getData().getParseMeta();
+     * 
+     * // METADATA extraction is not yet supported in Tika //
+     * assertEquals("test rft document", title); // assertEquals("tests",
+     * meta.get(DublinCore.SUBJECT));
+     */
+  }
 }
Index: src/plugin/parse-tika/src/test/org/apache/nutch/tika/TestOOParser.java
===================================================================
--- src/plugin/parse-tika/src/test/org/apache/nutch/tika/TestOOParser.java	(revision 1188252)
+++ src/plugin/parse-tika/src/test/org/apache/nutch/tika/TestOOParser.java	(working copy)
@@ -32,29 +32,30 @@
 
 import junit.framework.TestCase;
 
-/** 
+/**
  * Unit tests for OOParser.
- *
+ * 
  * @author Andrzej Bialecki
  */
 public class TestOOParser extends TestCase {
 
   private String fileSeparator = System.getProperty("file.separator");
   // This system property is defined in ./src/plugin/build-plugin.xml
-  private String sampleDir = System.getProperty("test.data",".");
+  private String sampleDir = System.getProperty("test.data", ".");
   // Make sure sample files are copied to "test.data" as specified in
   // ./src/plugin/parse-oo/build.xml during plugin compilation.
-  private String[] sampleFiles = {"ootest.odt", "ootest.sxw"};
+  private String[] sampleFiles = { "ootest.odt", "ootest.sxw" };
 
   private String sampleText = "ootest.txt";
-  
+
   private String expectedText;
 
-  public TestOOParser(String name) { 
+  public TestOOParser(String name) {
     super(name);
     try {
       // read the test string
-      FileInputStream fis = new FileInputStream(sampleDir + fileSeparator + sampleText);
+      FileInputStream fis = new FileInputStream(sampleDir + fileSeparator
+          + sampleText);
       StringBuffer sb = new StringBuffer();
       int len = 0;
       InputStreamReader isr = new InputStreamReader(fis, "UTF-8");
@@ -71,9 +72,11 @@
     }
   }
 
-  protected void setUp() {}
+  protected void setUp() {
+  }
 
-  protected void tearDown() {}
+  protected void tearDown() {
+  }
 
   public void testIt() throws ProtocolException, ParseException {
     String urlString;
@@ -83,24 +86,28 @@
     Protocol protocol;
     ProtocolFactory factory = new ProtocolFactory(conf);
 
-    System.out.println("Expected : "+expectedText);
-    
-    for (int i=0; i<sampleFiles.length; i++) {
+    System.out.println("Expected : " + expectedText);
+
+    for (int i = 0; i < sampleFiles.length; i++) {
       urlString = "file:" + sampleDir + fileSeparator + sampleFiles[i];
 
-      if (sampleFiles[i].startsWith("ootest")==false) continue;
-      
+      if (sampleFiles[i].startsWith("ootest") == false)
+        continue;
+
       protocol = factory.getProtocol(urlString);
-      content = protocol.getProtocolOutput(new Text(urlString), new CrawlDatum()).getContent();
-      parse = new ParseUtil(conf).parseByExtensionId("parse-tika", content).get(content.getUrl());
-      
+      content = protocol.getProtocolOutput(new Text(urlString),
+          new CrawlDatum()).getContent();
+      parse = new ParseUtil(conf).parseByExtensionId("parse-tika", content)
+          .get(content.getUrl());
+
       String text = parse.getText().replaceAll("[ \t\r\n]+", " ").trim();
 
-      // simply test for the presence of a text - the ordering of the elements may differ from what was expected
+      // simply test for the presence of a text - the ordering of the
+      // elements may differ from what was expected
       // in the previous tests
-      assertTrue(text!=null && text.length() > 0);
-      
-      System.out.println("Found "+sampleFiles[i]+": "+text);
+      assertTrue(text != null && text.length() > 0);
+
+      System.out.println("Found " + sampleFiles[i] + ": " + text);
     }
   }
 
Index: src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/TikaParser.java
===================================================================
--- src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/TikaParser.java	(revision 1188252)
+++ src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/TikaParser.java	(working copy)
@@ -50,174 +50,173 @@
 
 public class TikaParser implements org.apache.nutch.parse.Parser {
 
-	public static final Logger LOG = LoggerFactory.getLogger(TikaParser.class);
+  public static final Logger LOG = LoggerFactory.getLogger(TikaParser.class);
 
-	private Configuration conf;
-	private TikaConfig tikaConfig = null;
-	private DOMContentUtils utils;
-	private HtmlParseFilters htmlParseFilters;
-	private String cachingPolicy;
+  private Configuration conf;
+  private TikaConfig tikaConfig = null;
+  private DOMContentUtils utils;
+  private HtmlParseFilters htmlParseFilters;
+  private String cachingPolicy;
 
-	public ParseResult getParse(Content content) {
-		String mimeType = content.getContentType();
+  public ParseResult getParse(Content content) {
+    String mimeType = content.getContentType();
 
-		URL base;
-		try {
-			base = new URL(content.getBaseUrl());
-		} catch (MalformedURLException e) {
-			return new ParseStatus(e).getEmptyParseResult(content.getUrl(),
-					getConf());
-		}
+    URL base;
+    try {
+      base = new URL(content.getBaseUrl());
+    } catch (MalformedURLException e) {
+      return new ParseStatus(e)
+          .getEmptyParseResult(content.getUrl(), getConf());
+    }
 
-		// get the right parser using the mime type as a clue
-		Parser parser = tikaConfig.getParser(mimeType);
-		byte[] raw = content.getContent();
+    // get the right parser using the mime type as a clue
+    Parser parser = tikaConfig.getParser(mimeType);
+    byte[] raw = content.getContent();
 
-		if (parser == null) {
-			String message = "Can't retrieve Tika parser for mime-type "
-					+ mimeType;
-			LOG.error(message);
-			return new ParseStatus(ParseStatus.FAILED, message)
-					.getEmptyParseResult(content.getUrl(), getConf());
-		}
+    if (parser == null) {
+      String message = "Can't retrieve Tika parser for mime-type " + mimeType;
+      LOG.error(message);
+      return new ParseStatus(ParseStatus.FAILED, message).getEmptyParseResult(
+          content.getUrl(), getConf());
+    }
 
-		LOG.debug("Using Tika parser " + parser.getClass().getName()
-				+ " for mime-type " + mimeType);
+    LOG.debug("Using Tika parser " + parser.getClass().getName()
+        + " for mime-type " + mimeType);
 
-		Metadata tikamd = new Metadata();
+    Metadata tikamd = new Metadata();
 
-		HTMLDocumentImpl doc = new HTMLDocumentImpl();
-		doc.setErrorChecking(false);
-		DocumentFragment root = doc.createDocumentFragment();
-		DOMBuilder domhandler = new DOMBuilder(doc, root);
-		ParseContext context = new ParseContext();
-		try {
-		  parser.parse(new ByteArrayInputStream(raw), domhandler, tikamd,context);
-		} catch (Exception e) {
-			LOG.error("Error parsing "+content.getUrl(),e);
-			return new ParseStatus(ParseStatus.FAILED, e.getMessage())
-					.getEmptyParseResult(content.getUrl(), getConf());
-		}
+    HTMLDocumentImpl doc = new HTMLDocumentImpl();
+    doc.setErrorChecking(false);
+    DocumentFragment root = doc.createDocumentFragment();
+    DOMBuilder domhandler = new DOMBuilder(doc, root);
+    ParseContext context = new ParseContext();
+    try {
+      parser.parse(new ByteArrayInputStream(raw), domhandler, tikamd, context);
+    } catch (Exception e) {
+      LOG.error("Error parsing " + content.getUrl(), e);
+      return new ParseStatus(ParseStatus.FAILED, e.getMessage())
+          .getEmptyParseResult(content.getUrl(), getConf());
+    }
 
-		HTMLMetaTags metaTags = new HTMLMetaTags();
-		String text = "";
-		String title = "";
-		Outlink[] outlinks = new Outlink[0];
-		org.apache.nutch.metadata.Metadata nutchMetadata = new org.apache.nutch.metadata.Metadata();
+    HTMLMetaTags metaTags = new HTMLMetaTags();
+    String text = "";
+    String title = "";
+    Outlink[] outlinks = new Outlink[0];
+    org.apache.nutch.metadata.Metadata nutchMetadata = new org.apache.nutch.metadata.Metadata();
 
-		// we have converted the sax events generated by Tika into a DOM object
-		// so we can now use the usual HTML resources from Nutch
-		// get meta directives
-		HTMLMetaProcessor.getMetaTags(metaTags, root, base);
-		if (LOG.isTraceEnabled()) {
-			LOG.trace("Meta tags for " + base + ": " + metaTags.toString());
-		}
+    // we have converted the sax events generated by Tika into a DOM object
+    // so we can now use the usual HTML resources from Nutch
+    // get meta directives
+    HTMLMetaProcessor.getMetaTags(metaTags, root, base);
+    if (LOG.isTraceEnabled()) {
+      LOG.trace("Meta tags for " + base + ": " + metaTags.toString());
+    }
 
-		// check meta directives
-		if (!metaTags.getNoIndex()) { // okay to index
-			StringBuffer sb = new StringBuffer();
-			if (LOG.isTraceEnabled()) {
-				LOG.trace("Getting text...");
-			}
-			utils.getText(sb, root); // extract text
-			text = sb.toString();
-			sb.setLength(0);
-			if (LOG.isTraceEnabled()) {
-				LOG.trace("Getting title...");
-			}
-			utils.getTitle(sb, root); // extract title
-			title = sb.toString().trim();
-		}
+    // check meta directives
+    if (!metaTags.getNoIndex()) { // okay to index
+      StringBuffer sb = new StringBuffer();
+      if (LOG.isTraceEnabled()) {
+        LOG.trace("Getting text...");
+      }
+      utils.getText(sb, root); // extract text
+      text = sb.toString();
+      sb.setLength(0);
+      if (LOG.isTraceEnabled()) {
+        LOG.trace("Getting title...");
+      }
+      utils.getTitle(sb, root); // extract title
+      title = sb.toString().trim();
+    }
 
-		if (!metaTags.getNoFollow()) { // okay to follow links
-			ArrayList<Outlink> l = new ArrayList<Outlink>(); // extract outlinks
-			URL baseTag = utils.getBase(root);
-			if (LOG.isTraceEnabled()) {
-				LOG.trace("Getting links...");
-			}
-			utils.getOutlinks(baseTag != null ? baseTag : base, l, root);
-			outlinks = l.toArray(new Outlink[l.size()]);
-			if (LOG.isTraceEnabled()) {
-				LOG.trace("found " + outlinks.length + " outlinks in "
-						+ content.getUrl());
-			}
-		}
+    if (!metaTags.getNoFollow()) { // okay to follow links
+      ArrayList<Outlink> l = new ArrayList<Outlink>(); // extract outlinks
+      URL baseTag = utils.getBase(root);
+      if (LOG.isTraceEnabled()) {
+        LOG.trace("Getting links...");
+      }
+      utils.getOutlinks(baseTag != null ? baseTag : base, l, root);
+      outlinks = l.toArray(new Outlink[l.size()]);
+      if (LOG.isTraceEnabled()) {
+        LOG.trace("found " + outlinks.length + " outlinks in "
+            + content.getUrl());
+      }
+    }
 
-		// populate Nutch metadata with Tika metadata
-		String[] TikaMDNames = tikamd.names();
-		for (String tikaMDName : TikaMDNames) {
-			if (tikaMDName.equalsIgnoreCase(Metadata.TITLE))
-				continue;
-			// TODO what if multivalued?
-			nutchMetadata.add(tikaMDName, tikamd.get(tikaMDName));
-		}
+    // populate Nutch metadata with Tika metadata
+    String[] TikaMDNames = tikamd.names();
+    for (String tikaMDName : TikaMDNames) {
+      if (tikaMDName.equalsIgnoreCase(Metadata.TITLE))
+        continue;
+      // TODO what if multivalued?
+      nutchMetadata.add(tikaMDName, tikamd.get(tikaMDName));
+    }
 
-		// no outlinks? try OutlinkExtractor e.g works for mime types where no
-		// explicit markup for anchors
+    // no outlinks? try OutlinkExtractor e.g works for mime types where no
+    // explicit markup for anchors
 
-		if (outlinks.length == 0) {
-			outlinks = OutlinkExtractor.getOutlinks(text, getConf());
-		}
+    if (outlinks.length == 0) {
+      outlinks = OutlinkExtractor.getOutlinks(text, getConf());
+    }
 
-		ParseStatus status = new ParseStatus(ParseStatus.SUCCESS);
-		if (metaTags.getRefresh()) {
-			status.setMinorCode(ParseStatus.SUCCESS_REDIRECT);
-			status.setArgs(new String[] { metaTags.getRefreshHref().toString(),
-					Integer.toString(metaTags.getRefreshTime()) });
-		}
-		ParseData parseData = new ParseData(status, title, outlinks, content
-				.getMetadata(), nutchMetadata);
-		ParseResult parseResult = ParseResult.createParseResult(content
-				.getUrl(), new ParseImpl(text, parseData));
+    ParseStatus status = new ParseStatus(ParseStatus.SUCCESS);
+    if (metaTags.getRefresh()) {
+      status.setMinorCode(ParseStatus.SUCCESS_REDIRECT);
+      status.setArgs(new String[] { metaTags.getRefreshHref().toString(),
+          Integer.toString(metaTags.getRefreshTime()) });
+    }
+    ParseData parseData = new ParseData(status, title, outlinks,
+        content.getMetadata(), nutchMetadata);
+    ParseResult parseResult = ParseResult.createParseResult(content.getUrl(),
+        new ParseImpl(text, parseData));
 
-		// run filters on parse
-		ParseResult filteredParse = this.htmlParseFilters.filter(content,
-				parseResult, metaTags, root);
-		if (metaTags.getNoCache()) { // not okay to cache
-			for (Map.Entry<org.apache.hadoop.io.Text, Parse> entry : filteredParse)
-				entry.getValue().getData().getParseMeta().set(
-						Nutch.CACHING_FORBIDDEN_KEY, cachingPolicy);
-		}
-		return filteredParse;
-	}
+    // run filters on parse
+    ParseResult filteredParse = this.htmlParseFilters.filter(content,
+        parseResult, metaTags, root);
+    if (metaTags.getNoCache()) { // not okay to cache
+      for (Map.Entry<org.apache.hadoop.io.Text, Parse> entry : filteredParse)
+        entry.getValue().getData().getParseMeta()
+            .set(Nutch.CACHING_FORBIDDEN_KEY, cachingPolicy);
+    }
+    return filteredParse;
+  }
 
-	public void setConf(Configuration conf) {
-		this.conf = conf;
-		this.tikaConfig = null;
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+    this.tikaConfig = null;
 
-		// do we want a custom Tika configuration file
-		// deprecated since Tika 0.7 which is based on 
-		// a service provider based configuration
-		String customConfFile = conf.get("tika.config.file");
-		if (customConfFile != null) {
-			try {
-				// see if a Tika config file can be found in the job file
-				URL customTikaConfig = conf.getResource(customConfFile);
-				if (customTikaConfig != null)
-					tikaConfig = new TikaConfig(customTikaConfig);
-			} catch (Exception e1) {
-				String message = "Problem loading custom Tika configuration from "
-						+ customConfFile;
-				LOG.error(message, e1);
-			}
-		} else {
-			try {
-				tikaConfig = TikaConfig.getDefaultConfig();
-			} catch (Exception e2) {
-				String message = "Problem loading default Tika configuration";
-				LOG.error(message, e2);
-			}
-		}
+    // do we want a custom Tika configuration file
+    // deprecated since Tika 0.7 which is based on
+    // a service provider based configuration
+    String customConfFile = conf.get("tika.config.file");
+    if (customConfFile != null) {
+      try {
+        // see if a Tika config file can be found in the job file
+        URL customTikaConfig = conf.getResource(customConfFile);
+        if (customTikaConfig != null)
+          tikaConfig = new TikaConfig(customTikaConfig);
+      } catch (Exception e1) {
+        String message = "Problem loading custom Tika configuration from "
+            + customConfFile;
+        LOG.error(message, e1);
+      }
+    } else {
+      try {
+        tikaConfig = TikaConfig.getDefaultConfig();
+      } catch (Exception e2) {
+        String message = "Problem loading default Tika configuration";
+        LOG.error(message, e2);
+      }
+    }
 
-		this.htmlParseFilters = new HtmlParseFilters(getConf());
-		this.utils = new DOMContentUtils(conf);
-		this.cachingPolicy = getConf().get("parser.caching.forbidden.policy",
-				Nutch.CACHING_FORBIDDEN_CONTENT);
+    this.htmlParseFilters = new HtmlParseFilters(getConf());
+    this.utils = new DOMContentUtils(conf);
+    this.cachingPolicy = getConf().get("parser.caching.forbidden.policy",
+        Nutch.CACHING_FORBIDDEN_CONTENT);
 
-	}
+  }
 
-	public Configuration getConf() {
-		return this.conf;
-	}
+  public Configuration getConf() {
+    return this.conf;
+  }
 
 }
Index: src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/XMLCharacterRecognizer.java
===================================================================
--- src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/XMLCharacterRecognizer.java	(revision 1188252)
+++ src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/XMLCharacterRecognizer.java	(working copy)
@@ -26,40 +26,42 @@
 package org.apache.nutch.parse.tika;
 
 /**
- * Class used to verify whether the specified <var>ch</var> 
- * conforms to the XML 1.0 definition of whitespace. 
+ * Class used to verify whether the specified <var>ch</var> conforms to the XML
+ * 1.0 definition of whitespace.
  */
-class XMLCharacterRecognizer
-{
+class XMLCharacterRecognizer {
 
   /**
-   * Returns whether the specified <var>ch</var> conforms to the XML 1.0 definition
-   * of whitespace.  Refer to <A href="http://www.w3.org/TR/1998/REC-xml-19980210#NT-S">
-   * the definition of <CODE>S</CODE></A> for details.
-   * @param ch Character to check as XML whitespace.
+   * Returns whether the specified <var>ch</var> conforms to the XML 1.0
+   * definition of whitespace. Refer to <A
+   * href="http://www.w3.org/TR/1998/REC-xml-19980210#NT-S"> the definition of
+   * <CODE>S</CODE></A> for details.
+   * 
+   * @param ch
+   *          Character to check as XML whitespace.
    * @return =true if <var>ch</var> is XML whitespace; otherwise =false.
    */
-  static boolean isWhiteSpace(char ch)
-  {
+  static boolean isWhiteSpace(char ch) {
     return (ch == 0x20) || (ch == 0x09) || (ch == 0xD) || (ch == 0xA);
   }
 
   /**
    * Tell if the string is whitespace.
-   *
-   * @param ch Character array to check as XML whitespace.
-   * @param start Start index of characters in the array
-   * @param length Number of characters in the array 
-   * @return True if the characters in the array are 
-   * XML whitespace; otherwise, false.
+   * 
+   * @param ch
+   *          Character array to check as XML whitespace.
+   * @param start
+   *          Start index of characters in the array
+   * @param length
+   *          Number of characters in the array
+   * @return True if the characters in the array are XML whitespace; otherwise,
+   *         false.
    */
-  static boolean isWhiteSpace(char ch[], int start, int length)
-  {
+  static boolean isWhiteSpace(char ch[], int start, int length) {
 
     int end = start + length;
 
-    for (int s = start; s < end; s++)
-    {
+    for (int s = start; s < end; s++) {
       if (!isWhiteSpace(ch[s]))
         return false;
     }
@@ -69,39 +71,36 @@
 
   /**
    * Tell if the string is whitespace.
-   *
-   * @param buf StringBuffer to check as XML whitespace.
+   * 
+   * @param buf
+   *          StringBuffer to check as XML whitespace.
    * @return True if characters in buffer are XML whitespace, false otherwise
    */
-  static boolean isWhiteSpace(StringBuffer buf)
-  {
+  static boolean isWhiteSpace(StringBuffer buf) {
 
     int n = buf.length();
 
-    for (int i = 0; i < n; i++)
-    {
+    for (int i = 0; i < n; i++) {
       if (!isWhiteSpace(buf.charAt(i)))
         return false;
     }
 
     return true;
   }
-  
+
   /**
    * Tell if the string is whitespace.
-   *
-   * @param s String to check as XML whitespace.
+   * 
+   * @param s
+   *          String to check as XML whitespace.
    * @return True if characters in buffer are XML whitespace, false otherwise
    */
-  static boolean isWhiteSpace(String s)
-  {
+  static boolean isWhiteSpace(String s) {
 
-    if(null != s)
-    {
+    if (null != s) {
       int n = s.length();
-  
-      for (int i = 0; i < n; i++)
-      {
+
+      for (int i = 0; i < n; i++) {
         if (!isWhiteSpace(s.charAt(i)))
           return false;
       }
Index: src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/DOMBuilder.java
===================================================================
--- src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/DOMBuilder.java	(revision 1188252)
+++ src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/DOMBuilder.java	(working copy)
@@ -39,136 +39,125 @@
 import org.xml.sax.ContentHandler;
 import org.xml.sax.Locator;
 import org.xml.sax.ext.LexicalHandler;
+
 /**
- * This class takes SAX events (in addition to some extra events
- * that SAX doesn't handle yet) and adds the result to a document
- * or document fragment.
+ * This class takes SAX events (in addition to some extra events that SAX
+ * doesn't handle yet) and adds the result to a document or document fragment.
  */
-class DOMBuilder
-        implements ContentHandler, LexicalHandler
-{
+class DOMBuilder implements ContentHandler, LexicalHandler {
 
-  /** Root document          */
+  /** Root document */
   public Document m_doc;
 
-  /** Current node           */
+  /** Current node */
   protected Node m_currentNode = null;
 
-  /** First node of document fragment or null if not a DocumentFragment     */
+  /** First node of document fragment or null if not a DocumentFragment */
   public DocumentFragment m_docFrag = null;
 
-  /** Vector of element nodes          */
+  /** Vector of element nodes */
   protected Stack m_elemStack = new Stack();
 
   /**
-   * DOMBuilder instance constructor... it will add the DOM nodes
-   * to the document fragment.
-   *
-   * @param doc Root document
-   * @param node Current node
+   * DOMBuilder instance constructor... it will add the DOM nodes to the
+   * document fragment.
+   * 
+   * @param doc
+   *          Root document
+   * @param node
+   *          Current node
    */
-  DOMBuilder(Document doc, Node node)
-  {
+  DOMBuilder(Document doc, Node node) {
     m_doc = doc;
     m_currentNode = node;
   }
 
   /**
-   * DOMBuilder instance constructor... it will add the DOM nodes
-   * to the document fragment.
-   *
-   * @param doc Root document
-   * @param docFrag Document fragment
+   * DOMBuilder instance constructor... it will add the DOM nodes to the
+   * document fragment.
+   * 
+   * @param doc
+   *          Root document
+   * @param docFrag
+   *          Document fragment
    */
-  DOMBuilder(Document doc, DocumentFragment docFrag)
-  {
+  DOMBuilder(Document doc, DocumentFragment docFrag) {
     m_doc = doc;
     m_docFrag = docFrag;
   }
 
   /**
-   * DOMBuilder instance constructor... it will add the DOM nodes
-   * to the document.
-   *
-   * @param doc Root document
+   * DOMBuilder instance constructor... it will add the DOM nodes to the
+   * document.
+   * 
+   * @param doc
+   *          Root document
    */
-  DOMBuilder(Document doc)
-  {
+  DOMBuilder(Document doc) {
     m_doc = doc;
   }
 
   /**
-   * Get the root node of the DOM being created.  This
-   * is either a Document or a DocumentFragment.
-   *
+   * Get the root node of the DOM being created. This is either a Document or a
+   * DocumentFragment.
+   * 
    * @return The root document or document fragment if not null
    */
-  Node getRootNode()
-  {
+  Node getRootNode() {
     return (null != m_docFrag) ? (Node) m_docFrag : (Node) m_doc;
   }
 
   /**
    * Get the node currently being processed.
-   *
+   * 
    * @return the current node being processed
    */
-  Node getCurrentNode()
-  {
+  Node getCurrentNode() {
     return m_currentNode;
   }
 
   /**
    * Return null since there is no Writer for this class.
-   *
+   * 
    * @return null
    */
-  java.io.Writer getWriter()
-  {
+  java.io.Writer getWriter() {
     return null;
   }
 
   /**
    * Append a node to the current container.
-   *
-   * @param newNode New node to append
+   * 
+   * @param newNode
+   *          New node to append
    */
-  protected void append(Node newNode) throws org.xml.sax.SAXException
-  {
+  protected void append(Node newNode) throws org.xml.sax.SAXException {
 
     Node currentNode = m_currentNode;
 
-    if (null != currentNode)
-    {
+    if (null != currentNode) {
       currentNode.appendChild(newNode);
 
       // System.out.println(newNode.getNodeName());
-    }
-    else if (null != m_docFrag)
-    {
+    } else if (null != m_docFrag) {
       m_docFrag.appendChild(newNode);
-    }
-    else
-    {
+    } else {
       boolean ok = true;
       short type = newNode.getNodeType();
 
-      if (type == Node.TEXT_NODE)
-      {
+      if (type == Node.TEXT_NODE) {
         String data = newNode.getNodeValue();
 
-        if ((null != data) && (data.trim().length() > 0))
-        {
-          throw new org.xml.sax.SAXException("Warning: can't output text before document element!  Ignoring...");
+        if ((null != data) && (data.trim().length() > 0)) {
+          throw new org.xml.sax.SAXException(
+              "Warning: can't output text before document element!  Ignoring...");
         }
 
         ok = false;
-      }
-      else if (type == Node.ELEMENT_NODE)
-      {
-        if (m_doc.getDocumentElement() != null)
-        {
-          throw new org.xml.sax.SAXException("Can't have more than one root on a DOM!");
+      } else if (type == Node.ELEMENT_NODE) {
+        if (m_doc.getDocumentElement() != null) {
+          throw new org.xml.sax.SAXException(
+              "Can't have more than one root on a DOM!");
         }
       }
 
@@ -179,132 +168,139 @@
 
   /**
    * Receive an object for locating the origin of SAX document events.
-   *
-   * <p>SAX parsers are strongly encouraged (though not absolutely
-   * required) to supply a locator: if it does so, it must supply
-   * the locator to the application by invoking this method before
-   * invoking any of the other methods in the ContentHandler
-   * interface.</p>
-   *
-   * <p>The locator allows the application to determine the end
-   * position of any document-related event, even if the parser is
-   * not reporting an error.  Typically, the application will
-   * use this information for reporting its own errors (such as
-   * character content that does not match an application's
-   * business rules).  The information returned by the locator
-   * is probably not sufficient for use with a search engine.</p>
-   *
-   * <p>Note that the locator will return correct information only
-   * during the invocation of the events in this interface.  The
-   * application should not attempt to use it at any other time.</p>
-   *
-   * @param locator An object that can return the location of
-   *                any SAX document event.
+   * 
+   * <p>
+   * SAX parsers are strongly encouraged (though not absolutely required) to
+   * supply a locator: if it does so, it must supply the locator to the
+   * application by invoking this method before invoking any of the other
+   * methods in the ContentHandler interface.
+   * </p>
+   * 
+   * <p>
+   * The locator allows the application to determine the end position of any
+   * document-related event, even if the parser is not reporting an error.
+   * Typically, the application will use this information for reporting its own
+   * errors (such as character content that does not match an application's
+   * business rules). The information returned by the locator is probably not
+   * sufficient for use with a search engine.
+   * </p>
+   * 
+   * <p>
+   * Note that the locator will return correct information only during the
+   * invocation of the events in this interface. The application should not
+   * attempt to use it at any other time.
+   * </p>
+   * 
+   * @param locator
+   *          An object that can return the location of any SAX document event.
    * @see org.xml.sax.Locator
    */
-  public void setDocumentLocator(Locator locator)
-  {
+  public void setDocumentLocator(Locator locator) {
 
     // No action for the moment.
   }
 
   /**
    * Receive notification of the beginning of a document.
-   *
-   * <p>The SAX parser will invoke this method only once, before any
-   * other methods in this interface or in DTDHandler (except for
-   * setDocumentLocator).</p>
+   * 
+   * <p>
+   * The SAX parser will invoke this method only once, before any other methods
+   * in this interface or in DTDHandler (except for setDocumentLocator).
+   * </p>
    */
-  public void startDocument() throws org.xml.sax.SAXException
-  {
+  public void startDocument() throws org.xml.sax.SAXException {
 
     // No action for the moment.
   }
 
   /**
    * Receive notification of the end of a document.
-   *
-   * <p>The SAX parser will invoke this method only once, and it will
-   * be the last method invoked during the parse.  The parser shall
-   * not invoke this method until it has either abandoned parsing
-   * (because of an unrecoverable error) or reached the end of
-   * input.</p>
+   * 
+   * <p>
+   * The SAX parser will invoke this method only once, and it will be the last
+   * method invoked during the parse. The parser shall not invoke this method
+   * until it has either abandoned parsing (because of an unrecoverable error)
+   * or reached the end of input.
+   * </p>
    */
-  public void endDocument() throws org.xml.sax.SAXException
-  {
+  public void endDocument() throws org.xml.sax.SAXException {
 
     // No action for the moment.
   }
 
   /**
    * Receive notification of the beginning of an element.
-   *
-   * <p>The Parser will invoke this method at the beginning of every
-   * element in the XML document; there will be a corresponding
-   * endElement() event for every startElement() event (even when the
-   * element is empty). All of the element's content will be
-   * reported, in order, before the corresponding endElement()
-   * event.</p>
-   *
-   * <p>If the element name has a namespace prefix, the prefix will
-   * still be attached.  Note that the attribute list provided will
-   * contain only attributes with explicit values (specified or
-   * defaulted): #IMPLIED attributes will be omitted.</p>
-   *
-   *
-   * @param ns The namespace of the node
-   * @param localName The local part of the qualified name
-   * @param name The element name.
-   * @param atts The attributes attached to the element, if any.
+   * 
+   * <p>
+   * The Parser will invoke this method at the beginning of every element in the
+   * XML document; there will be a corresponding endElement() event for every
+   * startElement() event (even when the element is empty). All of the element's
+   * content will be reported, in order, before the corresponding endElement()
+   * event.
+   * </p>
+   * 
+   * <p>
+   * If the element name has a namespace prefix, the prefix will still be
+   * attached. Note that the attribute list provided will contain only
+   * attributes with explicit values (specified or defaulted): #IMPLIED
+   * attributes will be omitted.
+   * </p>
+   * 
+   * 
+   * @param ns
+   *          The namespace of the node
+   * @param localName
+   *          The local part of the qualified name
+   * @param name
+   *          The element name.
+   * @param atts
+   *          The attributes attached to the element, if any.
    * @see #endElement
    * @see org.xml.sax.Attributes
    */
-  public void startElement(
-          String ns, String localName, String name, Attributes atts)
-            throws org.xml.sax.SAXException
-  {
+  public void startElement(String ns, String localName, String name,
+      Attributes atts) throws org.xml.sax.SAXException {
 
     Element elem;
 
-	// Note that the namespace-aware call must be used to correctly
-	// construct a Level 2 DOM, even for non-namespaced nodes.
+    // Note that the namespace-aware call must be used to correctly
+    // construct a Level 2 DOM, even for non-namespaced nodes.
     if ((null == ns) || (ns.length() == 0))
-      elem = m_doc.createElementNS(null,name);
+      elem = m_doc.createElementNS(null, name);
     else
       elem = m_doc.createElementNS(ns, name);
 
     append(elem);
 
-    try
-    {
+    try {
       int nAtts = atts.getLength();
 
-      if (0 != nAtts)
-      {
-        for (int i = 0; i < nAtts; i++)
-        {
+      if (0 != nAtts) {
+        for (int i = 0; i < nAtts; i++) {
 
-          //System.out.println("type " + atts.getType(i) + " name " + atts.getLocalName(i) );
+          // System.out.println("type " + atts.getType(i) + " name " +
+          // atts.getLocalName(i) );
           // First handle a possible ID attribute
           if (atts.getType(i).equalsIgnoreCase("ID"))
             setIDAttribute(atts.getValue(i), elem);
 
           String attrNS = atts.getURI(i);
 
-          if("".equals(attrNS))
+          if ("".equals(attrNS))
             attrNS = null; // DOM represents no-namespace as null
 
           // System.out.println("attrNS: "+attrNS+", localName: "+atts.getQName(i)
-          //                   +", qname: "+atts.getQName(i)+", value: "+atts.getValue(i));
+          // +", qname: "+atts.getQName(i)+", value: "+atts.getValue(i));
           // Crimson won't let us set an xmlns: attribute on the DOM.
           String attrQName = atts.getQName(i);
 
-          // In SAX, xmlns: attributes have an empty namespace, while in DOM they should have the xmlns namespace
+          // In SAX, xmlns: attributes have an empty namespace, while
+          // in DOM they should have the xmlns namespace
           if (attrQName.startsWith("xmlns:"))
             attrNS = "http://www.w3.org/2000/xmlns/";
 
           // ALWAYS use the DOM Level 2 call!
-          elem.setAttributeNS(attrNS,attrQName, atts.getValue(i));
+          elem.setAttributeNS(attrNS, attrQName, atts.getValue(i));
         }
       }
 
@@ -315,9 +311,7 @@
       m_currentNode = elem;
 
       // append(elem);
-    }
-    catch(java.lang.Exception de)
-    {
+    } catch (java.lang.Exception de) {
       // de.printStackTrace();
       throw new org.xml.sax.SAXException(de);
     }
@@ -325,74 +319,87 @@
   }
 
   /**
-
-
-
+   * 
+   * 
+   * 
    * Receive notification of the end of an element.
-   *
-   * <p>The SAX parser will invoke this method at the end of every
-   * element in the XML document; there will be a corresponding
-   * startElement() event for every endElement() event (even when the
-   * element is empty).</p>
-   *
-   * <p>If the element name has a namespace prefix, the prefix will
-   * still be attached to the name.</p>
-   *
-   *
-   * @param ns the namespace of the element
-   * @param localName The local part of the qualified name of the element
-   * @param name The element name
+   * 
+   * <p>
+   * The SAX parser will invoke this method at the end of every element in the
+   * XML document; there will be a corresponding startElement() event for every
+   * endElement() event (even when the element is empty).
+   * </p>
+   * 
+   * <p>
+   * If the element name has a namespace prefix, the prefix will still be
+   * attached to the name.
+   * </p>
+   * 
+   * 
+   * @param ns
+   *          the namespace of the element
+   * @param localName
+   *          The local part of the qualified name of the element
+   * @param name
+   *          The element name
    */
   public void endElement(String ns, String localName, String name)
-          throws org.xml.sax.SAXException
-  {
+      throws org.xml.sax.SAXException {
     m_elemStack.pop();
-    m_currentNode = m_elemStack.isEmpty() ? null : (Node)m_elemStack.peek();
+    m_currentNode = m_elemStack.isEmpty() ? null : (Node) m_elemStack.peek();
   }
 
   /**
    * Set an ID string to node association in the ID table.
-   *
-   * @param id The ID string.
-   * @param elem The associated ID.
+   * 
+   * @param id
+   *          The ID string.
+   * @param elem
+   *          The associated ID.
    */
-  public void setIDAttribute(String id, Element elem)
-  {
+  public void setIDAttribute(String id, Element elem) {
 
     // Do nothing. This method is meant to be overiden.
   }
 
   /**
    * Receive notification of character data.
-   *
-   * <p>The Parser will call this method to report each chunk of
-   * character data.  SAX parsers may return all contiguous character
-   * data in a single chunk, or they may split it into several
-   * chunks; however, all of the characters in any single event
-   * must come from the same external entity, so that the Locator
-   * provides useful information.</p>
-   *
-   * <p>The application must not attempt to read from the array
-   * outside of the specified range.</p>
-   *
-   * <p>Note that some parsers will report whitespace using the
-   * ignorableWhitespace() method rather than this one (validating
-   * parsers must do so).</p>
-   *
-   * @param ch The characters from the XML document.
-   * @param start The start position in the array.
-   * @param length The number of characters to read from the array.
+   * 
+   * <p>
+   * The Parser will call this method to report each chunk of character data.
+   * SAX parsers may return all contiguous character data in a single chunk, or
+   * they may split it into several chunks; however, all of the characters in
+   * any single event must come from the same external entity, so that the
+   * Locator provides useful information.
+   * </p>
+   * 
+   * <p>
+   * The application must not attempt to read from the array outside of the
+   * specified range.
+   * </p>
+   * 
+   * <p>
+   * Note that some parsers will report whitespace using the
+   * ignorableWhitespace() method rather than this one (validating parsers must
+   * do so).
+   * </p>
+   * 
+   * @param ch
+   *          The characters from the XML document.
+   * @param start
+   *          The start position in the array.
+   * @param length
+   *          The number of characters to read from the array.
    * @see #ignorableWhitespace
    * @see org.xml.sax.Locator
    */
-  public void characters(char ch[], int start, int length) throws org.xml.sax.SAXException
-  {
-    if(isOutsideDocElem()
-       && XMLCharacterRecognizer.isWhiteSpace(ch, start, length))
-      return;  // avoid DOM006 Hierarchy request error
+  public void characters(char ch[], int start, int length)
+      throws org.xml.sax.SAXException {
+    if (isOutsideDocElem()
+        && XMLCharacterRecognizer.isWhiteSpace(ch, start, length))
+      return; // avoid DOM006 Hierarchy request error
 
-    if (m_inCData)
-    {
+    if (m_inCData) {
       cdata(ch, start, length);
 
       return;
@@ -400,57 +407,55 @@
 
     String s = new String(ch, start, length);
     Node childNode;
-    childNode =  m_currentNode != null ? m_currentNode.getLastChild(): null;
-    if( childNode != null && childNode.getNodeType() == Node.TEXT_NODE ){
-       ((Text)childNode).appendData(s);
+    childNode = m_currentNode != null ? m_currentNode.getLastChild() : null;
+    if (childNode != null && childNode.getNodeType() == Node.TEXT_NODE) {
+      ((Text) childNode).appendData(s);
+    } else {
+      Text text = m_doc.createTextNode(s);
+      append(text);
     }
-    else{
-       Text text = m_doc.createTextNode(s);
-       append(text);
-    }
   }
 
   /**
-   * If available, when the disable-output-escaping attribute is used,
-   * output raw text without escaping.  A PI will be inserted in front
-   * of the node with the name "lotusxsl-next-is-raw" and a value of
-   * "formatter-to-dom".
-   *
-   * @param ch Array containing the characters
-   * @param start Index to start of characters in the array
-   * @param length Number of characters in the array
+   * If available, when the disable-output-escaping attribute is used, output
+   * raw text without escaping. A PI will be inserted in front of the node with
+   * the name "lotusxsl-next-is-raw" and a value of "formatter-to-dom".
+   * 
+   * @param ch
+   *          Array containing the characters
+   * @param start
+   *          Index to start of characters in the array
+   * @param length
+   *          Number of characters in the array
    */
   public void charactersRaw(char ch[], int start, int length)
-          throws org.xml.sax.SAXException
-  {
-    if(isOutsideDocElem()
-       && XMLCharacterRecognizer.isWhiteSpace(ch, start, length))
-      return;  // avoid DOM006 Hierarchy request error
+      throws org.xml.sax.SAXException {
+    if (isOutsideDocElem()
+        && XMLCharacterRecognizer.isWhiteSpace(ch, start, length))
+      return; // avoid DOM006 Hierarchy request error
 
-
     String s = new String(ch, start, length);
 
     append(m_doc.createProcessingInstruction("xslt-next-is-raw",
-                                             "formatter-to-dom"));
+        "formatter-to-dom"));
     append(m_doc.createTextNode(s));
   }
 
   /**
    * Report the beginning of an entity.
-   *
-   * The start and end of the document entity are not reported.
-   * The start and end of the external DTD subset are reported
-   * using the pseudo-name "[dtd]".  All other events must be
-   * properly nested within start/end entity events.
-   *
-   * @param name The name of the entity.  If it is a parameter
-   *        entity, the name will begin with '%'.
+   * 
+   * The start and end of the document entity are not reported. The start and
+   * end of the external DTD subset are reported using the pseudo-name "[dtd]".
+   * All other events must be properly nested within start/end entity events.
+   * 
+   * @param name
+   *          The name of the entity. If it is a parameter entity, the name will
+   *          begin with '%'.
    * @see #endEntity
    * @see org.xml.sax.ext.DeclHandler#internalEntityDecl
    * @see org.xml.sax.ext.DeclHandler#externalEntityDecl
    */
-  public void startEntity(String name) throws org.xml.sax.SAXException
-  {
+  public void startEntity(String name) throws org.xml.sax.SAXException {
 
     // Almost certainly the wrong behavior...
     // entityReference(name);
@@ -458,49 +463,58 @@
 
   /**
    * Report the end of an entity.
-   *
-   * @param name The name of the entity that is ending.
+   * 
+   * @param name
+   *          The name of the entity that is ending.
    * @see #startEntity
    */
-  public void endEntity(String name) throws org.xml.sax.SAXException{}
+  public void endEntity(String name) throws org.xml.sax.SAXException {
+  }
 
   /**
    * Receive notivication of a entityReference.
-   *
-   * @param name name of the entity reference
+   * 
+   * @param name
+   *          name of the entity reference
    */
-  public void entityReference(String name) throws org.xml.sax.SAXException
-  {
+  public void entityReference(String name) throws org.xml.sax.SAXException {
     append(m_doc.createEntityReference(name));
   }
 
   /**
    * Receive notification of ignorable whitespace in element content.
-   *
-   * <p>Validating Parsers must use this method to report each chunk
-   * of ignorable whitespace (see the W3C XML 1.0 recommendation,
-   * section 2.10): non-validating parsers may also use this method
-   * if they are capable of parsing and using content models.</p>
-   *
-   * <p>SAX parsers may return all contiguous whitespace in a single
-   * chunk, or they may split it into several chunks; however, all of
-   * the characters in any single event must come from the same
-   * external entity, so that the Locator provides useful
-   * information.</p>
-   *
-   * <p>The application must not attempt to read from the array
-   * outside of the specified range.</p>
-   *
-   * @param ch The characters from the XML document.
-   * @param start The start position in the array.
-   * @param length The number of characters to read from the array.
+   * 
+   * <p>
+   * Validating Parsers must use this method to report each chunk of ignorable
+   * whitespace (see the W3C XML 1.0 recommendation, section 2.10):
+   * non-validating parsers may also use this method if they are capable of
+   * parsing and using content models.
+   * </p>
+   * 
+   * <p>
+   * SAX parsers may return all contiguous whitespace in a single chunk, or they
+   * may split it into several chunks; however, all of the characters in any
+   * single event must come from the same external entity, so that the Locator
+   * provides useful information.
+   * </p>
+   * 
+   * <p>
+   * The application must not attempt to read from the array outside of the
+   * specified range.
+   * </p>
+   * 
+   * @param ch
+   *          The characters from the XML document.
+   * @param start
+   *          The start position in the array.
+   * @param length
+   *          The number of characters to read from the array.
    * @see #characters
    */
   public void ignorableWhitespace(char ch[], int start, int length)
-          throws org.xml.sax.SAXException
-  {
-    if(isOutsideDocElem())
-      return;  // avoid DOM006 Hierarchy request error
+      throws org.xml.sax.SAXException {
+    if (isOutsideDocElem())
+      return; // avoid DOM006 Hierarchy request error
 
     String s = new String(ch, start, length);
 
@@ -509,232 +523,244 @@
 
   /**
    * Tell if the current node is outside the document element.
-   *
+   * 
    * @return true if the current node is outside the document element.
    */
-   private boolean isOutsideDocElem()
-   {
-      return (null == m_docFrag) && m_elemStack.size() == 0 && (null == m_currentNode || m_currentNode.getNodeType() == Node.DOCUMENT_NODE);
-   }
+  private boolean isOutsideDocElem() {
+    return (null == m_docFrag)
+        && m_elemStack.size() == 0
+        && (null == m_currentNode || m_currentNode.getNodeType() == Node.DOCUMENT_NODE);
+  }
 
   /**
    * Receive notification of a processing instruction.
-   *
-   * <p>The Parser will invoke this method once for each processing
-   * instruction found: note that processing instructions may occur
-   * before or after the main document element.</p>
-   *
-   * <p>A SAX parser should never report an XML declaration (XML 1.0,
-   * section 2.8) or a text declaration (XML 1.0, section 4.3.1)
-   * using this method.</p>
-   *
-   * @param target The processing instruction target.
-   * @param data The processing instruction data, or null if
-   *        none was supplied.
+   * 
+   * <p>
+   * The Parser will invoke this method once for each processing instruction
+   * found: note that processing instructions may occur before or after the main
+   * document element.
+   * </p>
+   * 
+   * <p>
+   * A SAX parser should never report an XML declaration (XML 1.0, section 2.8)
+   * or a text declaration (XML 1.0, section 4.3.1) using this method.
+   * </p>
+   * 
+   * @param target
+   *          The processing instruction target.
+   * @param data
+   *          The processing instruction data, or null if none was supplied.
    */
   public void processingInstruction(String target, String data)
-          throws org.xml.sax.SAXException
-  {
+      throws org.xml.sax.SAXException {
     append(m_doc.createProcessingInstruction(target, data));
   }
 
   /**
    * Report an XML comment anywhere in the document.
-   *
-   * This callback will be used for comments inside or outside the
-   * document element, including comments in the external DTD
-   * subset (if read).
-   *
-   * @param ch An array holding the characters in the comment.
-   * @param start The starting position in the array.
-   * @param length The number of characters to use from the array.
+   * 
+   * This callback will be used for comments inside or outside the document
+   * element, including comments in the external DTD subset (if read).
+   * 
+   * @param ch
+   *          An array holding the characters in the comment.
+   * @param start
+   *          The starting position in the array.
+   * @param length
+   *          The number of characters to use from the array.
    */
-  public void comment(char ch[], int start, int length) throws org.xml.sax.SAXException
-  {
+  public void comment(char ch[], int start, int length)
+      throws org.xml.sax.SAXException {
     // tagsoup sometimes submits invalid values here
-    if (ch == null || start < 0 || length >= (ch.length - start) || length < 0) return;
+    if (ch == null || start < 0 || length >= (ch.length - start) || length < 0)
+      return;
     append(m_doc.createComment(new String(ch, start, length)));
   }
 
-  /** Flag indicating that we are processing a CData section          */
+  /** Flag indicating that we are processing a CData section */
   protected boolean m_inCData = false;
 
   /**
    * Report the start of a CDATA section.
-   *
+   * 
    * @see #endCDATA
    */
-  public void startCDATA() throws org.xml.sax.SAXException
-  {
+  public void startCDATA() throws org.xml.sax.SAXException {
     m_inCData = true;
     append(m_doc.createCDATASection(""));
   }
 
   /**
    * Report the end of a CDATA section.
-   *
+   * 
    * @see #startCDATA
    */
-  public void endCDATA() throws org.xml.sax.SAXException
-  {
+  public void endCDATA() throws org.xml.sax.SAXException {
     m_inCData = false;
   }
 
   /**
    * Receive notification of cdata.
-   *
-   * <p>The Parser will call this method to report each chunk of
-   * character data.  SAX parsers may return all contiguous character
-   * data in a single chunk, or they may split it into several
-   * chunks; however, all of the characters in any single event
-   * must come from the same external entity, so that the Locator
-   * provides useful information.</p>
-   *
-   * <p>The application must not attempt to read from the array
-   * outside of the specified range.</p>
-   *
-   * <p>Note that some parsers will report whitespace using the
-   * ignorableWhitespace() method rather than this one (validating
-   * parsers must do so).</p>
-   *
-   * @param ch The characters from the XML document.
-   * @param start The start position in the array.
-   * @param length The number of characters to read from the array.
+   * 
+   * <p>
+   * The Parser will call this method to report each chunk of character data.
+   * SAX parsers may return all contiguous character data in a single chunk, or
+   * they may split it into several chunks; however, all of the characters in
+   * any single event must come from the same external entity, so that the
+   * Locator provides useful information.
+   * </p>
+   * 
+   * <p>
+   * The application must not attempt to read from the array outside of the
+   * specified range.
+   * </p>
+   * 
+   * <p>
+   * Note that some parsers will report whitespace using the
+   * ignorableWhitespace() method rather than this one (validating parsers must
+   * do so).
+   * </p>
+   * 
+   * @param ch
+   *          The characters from the XML document.
+   * @param start
+   *          The start position in the array.
+   * @param length
+   *          The number of characters to read from the array.
    * @see #ignorableWhitespace
    * @see org.xml.sax.Locator
    */
-  public void cdata(char ch[], int start, int length) throws org.xml.sax.SAXException
-  {
-    if(isOutsideDocElem()
-       && XMLCharacterRecognizer.isWhiteSpace(ch, start, length))
-      return;  // avoid DOM006 Hierarchy request error
+  public void cdata(char ch[], int start, int length)
+      throws org.xml.sax.SAXException {
+    if (isOutsideDocElem()
+        && XMLCharacterRecognizer.isWhiteSpace(ch, start, length))
+      return; // avoid DOM006 Hierarchy request error
 
     String s = new String(ch, start, length);
 
-    // XXX ab@apache.org: modified from the original, to accomodate TagSoup. 
+    // XXX ab@apache.org: modified from the original, to accomodate TagSoup.
     Node n = m_currentNode.getLastChild();
     if (n instanceof CDATASection)
-      ((CDATASection)n).appendData(s);
+      ((CDATASection) n).appendData(s);
     else if (n instanceof Comment)
-      ((Comment)n).appendData(s);
+      ((Comment) n).appendData(s);
   }
 
   /**
    * Report the start of DTD declarations, if any.
-   *
-   * Any declarations are assumed to be in the internal subset
-   * unless otherwise indicated.
-   *
-   * @param name The document type name.
-   * @param publicId The declared public identifier for the
-   *        external DTD subset, or null if none was declared.
-   * @param systemId The declared system identifier for the
-   *        external DTD subset, or null if none was declared.
+   * 
+   * Any declarations are assumed to be in the internal subset unless otherwise
+   * indicated.
+   * 
+   * @param name
+   *          The document type name.
+   * @param publicId
+   *          The declared public identifier for the external DTD subset, or
+   *          null if none was declared.
+   * @param systemId
+   *          The declared system identifier for the external DTD subset, or
+   *          null if none was declared.
    * @see #endDTD
    * @see #startEntity
    */
   public void startDTD(String name, String publicId, String systemId)
-          throws org.xml.sax.SAXException
-  {
+      throws org.xml.sax.SAXException {
 
     // Do nothing for now.
   }
 
   /**
    * Report the end of DTD declarations.
-   *
+   * 
    * @see #startDTD
    */
-  public void endDTD() throws org.xml.sax.SAXException
-  {
+  public void endDTD() throws org.xml.sax.SAXException {
 
     // Do nothing for now.
   }
 
   /**
    * Begin the scope of a prefix-URI Namespace mapping.
-   *
-   * <p>The information from this event is not necessary for
-   * normal Namespace processing: the SAX XML reader will
-   * automatically replace prefixes for element and attribute
-   * names when the http://xml.org/sax/features/namespaces
-   * feature is true (the default).</p>
-   *
-   * <p>There are cases, however, when applications need to
-   * use prefixes in character data or in attribute values,
-   * where they cannot safely be expanded automatically; the
-   * start/endPrefixMapping event supplies the information
-   * to the application to expand prefixes in those contexts
-   * itself, if necessary.</p>
-   *
-   * <p>Note that start/endPrefixMapping events are not
-   * guaranteed to be properly nested relative to each-other:
-   * all startPrefixMapping events will occur before the
-   * corresponding startElement event, and all endPrefixMapping
-   * events will occur after the corresponding endElement event,
-   * but their order is not guaranteed.</p>
-   *
-   * @param prefix The Namespace prefix being declared.
-   * @param uri The Namespace URI the prefix is mapped to.
+   * 
+   * <p>
+   * The information from this event is not necessary for normal Namespace
+   * processing: the SAX XML reader will automatically replace prefixes for
+   * element and attribute names when the http://xml.org/sax/features/namespaces
+   * feature is true (the default).
+   * </p>
+   * 
+   * <p>
+   * There are cases, however, when applications need to use prefixes in
+   * character data or in attribute values, where they cannot safely be expanded
+   * automatically; the start/endPrefixMapping event supplies the information to
+   * the application to expand prefixes in those contexts itself, if necessary.
+   * </p>
+   * 
+   * <p>
+   * Note that start/endPrefixMapping events are not guaranteed to be properly
+   * nested relative to each-other: all startPrefixMapping events will occur
+   * before the corresponding startElement event, and all endPrefixMapping
+   * events will occur after the corresponding endElement event, but their order
+   * is not guaranteed.
+   * </p>
+   * 
+   * @param prefix
+   *          The Namespace prefix being declared.
+   * @param uri
+   *          The Namespace URI the prefix is mapped to.
    * @see #endPrefixMapping
    * @see #startElement
    */
   public void startPrefixMapping(String prefix, String uri)
-          throws org.xml.sax.SAXException
-  {
+      throws org.xml.sax.SAXException {
 
     /*
-    // Not sure if this is needed or wanted
-    // Also, it fails in the stree.
-    if((null != m_currentNode)
-       && (m_currentNode.getNodeType() == Node.ELEMENT_NODE))
-    {
-      String qname;
-      if(((null != prefix) && (prefix.length() == 0))
-         || (null == prefix))
-        qname = "xmlns";
-      else
-        qname = "xmlns:"+prefix;
-
-      Element elem = (Element)m_currentNode;
-      String val = elem.getAttribute(qname); // Obsolete, should be DOM2...?
-      if(val == null)
-      {
-        elem.setAttributeNS("http://www.w3.org/XML/1998/namespace",
-                            qname, uri);
-      }
-    }
-    */
+     * // Not sure if this is needed or wanted // Also, it fails in the stree.
+     * if((null != m_currentNode) && (m_currentNode.getNodeType() ==
+     * Node.ELEMENT_NODE)) { String qname; if(((null != prefix) &&
+     * (prefix.length() == 0)) || (null == prefix)) qname = "xmlns"; else qname
+     * = "xmlns:"+prefix;
+     * 
+     * Element elem = (Element)m_currentNode; String val =
+     * elem.getAttribute(qname); // Obsolete, should be DOM2...? if(val == null)
+     * { elem.setAttributeNS("http://www.w3.org/XML/1998/namespace", qname,
+     * uri); } }
+     */
   }
 
   /**
    * End the scope of a prefix-URI mapping.
-   *
-   * <p>See startPrefixMapping for details.  This event will
-   * always occur after the corresponding endElement event,
-   * but the order of endPrefixMapping events is not otherwise
-   * guaranteed.</p>
-   *
-   * @param prefix The prefix that was being mapping.
+   * 
+   * <p>
+   * See startPrefixMapping for details. This event will always occur after the
+   * corresponding endElement event, but the order of endPrefixMapping events is
+   * not otherwise guaranteed.
+   * </p>
+   * 
+   * @param prefix
+   *          The prefix that was being mapping.
    * @see #startPrefixMapping
    * @see #endElement
    */
-  public void endPrefixMapping(String prefix) throws org.xml.sax.SAXException{}
+  public void endPrefixMapping(String prefix) throws org.xml.sax.SAXException {
+  }
 
   /**
    * Receive notification of a skipped entity.
-   *
-   * <p>The Parser will invoke this method once for each entity
-   * skipped.  Non-validating processors may skip entities if they
-   * have not seen the declarations (because, for example, the
-   * entity was declared in an external DTD subset).  All processors
-   * may skip external entities, depending on the values of the
-   * http://xml.org/sax/features/external-general-entities and the
-   * http://xml.org/sax/features/external-parameter-entities
-   * properties.</p>
-   *
-   * @param name The name of the skipped entity.  If it is a
-   *        parameter entity, the name will begin with '%'.
+   * 
+   * <p>
+   * The Parser will invoke this method once for each entity skipped.
+   * Non-validating processors may skip entities if they have not seen the
+   * declarations (because, for example, the entity was declared in an external
+   * DTD subset). All processors may skip external entities, depending on the
+   * values of the http://xml.org/sax/features/external-general-entities and the
+   * http://xml.org/sax/features/external-parameter-entities properties.
+   * </p>
+   * 
+   * @param name
+   *          The name of the skipped entity. If it is a parameter entity, the
+   *          name will begin with '%'.
    */
-  public void skippedEntity(String name) throws org.xml.sax.SAXException{}
+  public void skippedEntity(String name) throws org.xml.sax.SAXException {
+  }
 }
Index: src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/DOMContentUtils.java
===================================================================
--- src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/DOMContentUtils.java	(revision 1188252)
+++ src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/DOMContentUtils.java	(working copy)
@@ -34,35 +34,35 @@
 /**
  * A collection of methods for extracting content from DOM trees.
  * 
- * This class holds a few utility methods for pulling content out of 
- * DOM nodes, such as getOutlinks, getText, etc.
- *
+ * This class holds a few utility methods for pulling content out of DOM nodes,
+ * such as getOutlinks, getText, etc.
+ * 
  */
 class DOMContentUtils {
 
   private static class LinkParams {
-	private String elName;
-	private String attrName;
-	private int childLen;
-      
-	private LinkParams(String elName, String attrName, int childLen) {
-          this.elName = elName;
-          this.attrName = attrName;
-          this.childLen = childLen;
-      }
-      
-	public String toString() {
-          return "LP[el=" + elName + ",attr=" + attrName + ",len=" + childLen + "]";
-      }
+    private String elName;
+    private String attrName;
+    private int childLen;
+
+    private LinkParams(String elName, String attrName, int childLen) {
+      this.elName = elName;
+      this.attrName = attrName;
+      this.childLen = childLen;
+    }
+
+    public String toString() {
+      return "LP[el=" + elName + ",attr=" + attrName + ",len=" + childLen + "]";
+    }
   }
-  
-  private HashMap<String,LinkParams> linkParams = new HashMap<String,LinkParams>();
+
+  private HashMap<String, LinkParams> linkParams = new HashMap<String, LinkParams>();
   private Configuration conf;
-  
+
   DOMContentUtils(Configuration conf) {
     setConf(conf);
   }
-  
+
   private void setConf(Configuration conf) {
     // forceTags is used to override configurable tag ignoring, later on
     Collection<String> forceTags = new ArrayList<String>(1);
@@ -84,59 +84,57 @@
 
     // remove unwanted link tags from the linkParams map
     String[] ignoreTags = conf.getStrings("parser.html.outlinks.ignore_tags");
-    for ( int i = 0 ; ignoreTags != null && i < ignoreTags.length ; i++ ) {
-      if ( ! forceTags.contains(ignoreTags[i]) )
+    for (int i = 0; ignoreTags != null && i < ignoreTags.length; i++) {
+      if (!forceTags.contains(ignoreTags[i]))
         linkParams.remove(ignoreTags[i]);
     }
   }
-  
+
   /**
-   * This method takes a {@link StringBuffer} and a DOM {@link Node},
-   * and will append all the content text found beneath the DOM node to 
-   * the <code>StringBuffer</code>.
-   *
+   * This method takes a {@link StringBuffer} and a DOM {@link Node}, and will
+   * append all the content text found beneath the DOM node to the
+   * <code>StringBuffer</code>.
+   * 
    * <p>
-   *
-   * If <code>abortOnNestedAnchors</code> is true, DOM traversal will
-   * be aborted and the <code>StringBuffer</code> will not contain
-   * any text encountered after a nested anchor is found.
    * 
+   * If <code>abortOnNestedAnchors</code> is true, DOM traversal will be aborted
+   * and the <code>StringBuffer</code> will not contain any text encountered
+   * after a nested anchor is found.
+   * 
    * <p>
-   *
+   * 
    * @return true if nested anchors were found
    */
-  private boolean getText(StringBuffer sb, Node node, 
-                                      boolean abortOnNestedAnchors) {
+  private boolean getText(StringBuffer sb, Node node,
+      boolean abortOnNestedAnchors) {
     if (getTextHelper(sb, node, abortOnNestedAnchors, 0)) {
       return true;
-    } 
+    }
     return false;
   }
 
-
   /**
-   * This is a convinience method, equivalent to {@link
-   * #getText(StringBuffer,Node,boolean) getText(sb, node, false)}.
+   * This is a convinience method, equivalent to
+   * {@link #getText(StringBuffer,Node,boolean) getText(sb, node, false)}.
    * 
    */
   void getText(StringBuffer sb, Node node) {
     getText(sb, node, false);
   }
 
-  // returns true if abortOnNestedAnchors is true and we find nested 
+  // returns true if abortOnNestedAnchors is true and we find nested
   // anchors
-  private boolean getTextHelper(StringBuffer sb, Node node, 
-                                             boolean abortOnNestedAnchors,
-                                             int anchorDepth) {
+  private boolean getTextHelper(StringBuffer sb, Node node,
+      boolean abortOnNestedAnchors, int anchorDepth) {
     boolean abort = false;
     NodeWalker walker = new NodeWalker(node);
-    
+
     while (walker.hasNext()) {
-    
+
       Node currentNode = walker.nextNode();
       String nodeName = currentNode.getNodeName();
       short nodeType = currentNode.getNodeType();
-      
+
       if ("script".equalsIgnoreCase(nodeName)) {
         walker.skipChildren();
       }
@@ -148,7 +146,7 @@
         if (anchorDepth > 1) {
           abort = true;
           break;
-        }        
+        }
       }
       if (nodeType == Node.COMMENT_NODE) {
         walker.skipChildren();
@@ -159,44 +157,45 @@
         text = text.replaceAll("\\s+", " ");
         text = text.trim();
         if (text.length() > 0) {
-          if (sb.length() > 0) sb.append(' ');
-        	sb.append(text);
+          if (sb.length() > 0)
+            sb.append(' ');
+          sb.append(text);
         }
       }
     }
-    
+
     return abort;
   }
 
   /**
-   * This method takes a {@link StringBuffer} and a DOM {@link Node},
-   * and will append the content text found beneath the first
-   * <code>title</code> node to the <code>StringBuffer</code>.
-   *
+   * This method takes a {@link StringBuffer} and a DOM {@link Node}, and will
+   * append the content text found beneath the first <code>title</code> node to
+   * the <code>StringBuffer</code>.
+   * 
    * @return true if a title node was found, false otherwise
    */
   boolean getTitle(StringBuffer sb, Node node) {
-    
+
     NodeWalker walker = new NodeWalker(node);
-    
+
     while (walker.hasNext()) {
-  
+
       Node currentNode = walker.nextNode();
       String nodeName = currentNode.getNodeName();
       short nodeType = currentNode.getNodeType();
-      
+
       if ("body".equalsIgnoreCase(nodeName)) { // stop after HEAD
         return false;
       }
-  
+
       if (nodeType == Node.ELEMENT_NODE) {
         if ("title".equalsIgnoreCase(nodeName)) {
           getText(sb, currentNode);
           return true;
         }
       }
-    }      
-    
+    }
+
     return false;
   }
 
@@ -204,28 +203,29 @@
   URL getBase(Node node) {
 
     NodeWalker walker = new NodeWalker(node);
-    
+
     while (walker.hasNext()) {
-  
+
       Node currentNode = walker.nextNode();
       String nodeName = currentNode.getNodeName();
       short nodeType = currentNode.getNodeType();
-      
+
       // is this node a BASE tag?
       if (nodeType == Node.ELEMENT_NODE) {
-  
+
         if ("body".equalsIgnoreCase(nodeName)) { // stop after HEAD
           return null;
         }
-  
+
         if ("base".equalsIgnoreCase(nodeName)) {
           NamedNodeMap attrs = currentNode.getAttributes();
-          for (int i= 0; i < attrs.getLength(); i++ ) {
+          for (int i = 0; i < attrs.getLength(); i++) {
             Node attr = attrs.item(i);
             if ("href".equalsIgnoreCase(attr.getNodeName())) {
               try {
                 return new URL(attr.getNodeValue());
-              } catch (MalformedURLException e) {}
+              } catch (MalformedURLException e) {
+              }
             }
           }
         }
@@ -236,10 +236,9 @@
     return null;
   }
 
-
   private boolean hasOnlyWhiteSpace(Node node) {
-    String val= node.getNodeValue();
-    for (int i= 0; i < val.length(); i++) {
+    String val = node.getNodeValue();
+    for (int i = 0; i < val.length(); i++) {
       if (!Character.isWhitespace(val.charAt(i)))
         return false;
     }
@@ -248,50 +247,49 @@
 
   // this only covers a few cases of empty links that are symptomatic
   // of nekohtml's DOM-fixup process...
-  private boolean shouldThrowAwayLink(Node node, NodeList children, 
-                                              int childLen, LinkParams params) {
+  private boolean shouldThrowAwayLink(Node node, NodeList children,
+      int childLen, LinkParams params) {
     if (childLen == 0) {
-      // this has no inner structure 
-      if (params.childLen == 0) return false;
-      else return true;
-    } else if ((childLen == 1) 
-               && (children.item(0).getNodeType() == Node.ELEMENT_NODE)
-               && (params.elName.equalsIgnoreCase(children.item(0).getNodeName()))) { 
+      // this has no inner structure
+      if (params.childLen == 0)
+        return false;
+      else
+        return true;
+    } else if ((childLen == 1)
+        && (children.item(0).getNodeType() == Node.ELEMENT_NODE)
+        && (params.elName.equalsIgnoreCase(children.item(0).getNodeName()))) {
       // single nested link
       return true;
 
     } else if (childLen == 2) {
 
-      Node c0= children.item(0);
-      Node c1= children.item(1);
+      Node c0 = children.item(0);
+      Node c1 = children.item(1);
 
       if ((c0.getNodeType() == Node.ELEMENT_NODE)
           && (params.elName.equalsIgnoreCase(c0.getNodeName()))
-          && (c1.getNodeType() == Node.TEXT_NODE) 
-          && hasOnlyWhiteSpace(c1) ) {
+          && (c1.getNodeType() == Node.TEXT_NODE) && hasOnlyWhiteSpace(c1)) {
         // single link followed by whitespace node
         return true;
       }
 
       if ((c1.getNodeType() == Node.ELEMENT_NODE)
           && (params.elName.equalsIgnoreCase(c1.getNodeName()))
-          && (c0.getNodeType() == Node.TEXT_NODE) 
-          && hasOnlyWhiteSpace(c0) ) {
+          && (c0.getNodeType() == Node.TEXT_NODE) && hasOnlyWhiteSpace(c0)) {
         // whitespace node followed by single link
         return true;
       }
 
     } else if (childLen == 3) {
-      Node c0= children.item(0);
-      Node c1= children.item(1);
-      Node c2= children.item(2);
-      
+      Node c0 = children.item(0);
+      Node c1 = children.item(1);
+      Node c2 = children.item(2);
+
       if ((c1.getNodeType() == Node.ELEMENT_NODE)
           && (params.elName.equalsIgnoreCase(c1.getNodeName()))
-          && (c0.getNodeType() == Node.TEXT_NODE) 
-          && (c2.getNodeType() == Node.TEXT_NODE) 
-          && hasOnlyWhiteSpace(c0)
-          && hasOnlyWhiteSpace(c2) ) {
+          && (c0.getNodeType() == Node.TEXT_NODE)
+          && (c2.getNodeType() == Node.TEXT_NODE) && hasOnlyWhiteSpace(c0)
+          && hasOnlyWhiteSpace(c2)) {
         // single link surrounded by whitespace nodes
         return true;
       }
@@ -299,76 +297,73 @@
 
     return false;
   }
-  
+
   /**
-   * This method finds all anchors below the supplied DOM
-   * <code>node</code>, and creates appropriate {@link Outlink}
-   * records for each (relative to the supplied <code>base</code>
-   * URL), and adds them to the <code>outlinks</code> {@link
-   * ArrayList}.
-   *
+   * This method finds all anchors below the supplied DOM <code>node</code>, and
+   * creates appropriate {@link Outlink} records for each (relative to the
+   * supplied <code>base</code> URL), and adds them to the <code>outlinks</code>
+   * {@link ArrayList}.
+   * 
    * <p>
-   *
-   * Links without inner structure (tags, text, etc) are discarded, as
-   * are links which contain only single nested links and empty text
-   * nodes (this is a common DOM-fixup artifact, at least with
-   * nekohtml).
+   * 
+   * Links without inner structure (tags, text, etc) are discarded, as are links
+   * which contain only single nested links and empty text nodes (this is a
+   * common DOM-fixup artifact, at least with nekohtml).
    */
-  void getOutlinks(URL base, ArrayList<Outlink> outlinks, 
-                                       Node node) {
-    
+  void getOutlinks(URL base, ArrayList<Outlink> outlinks, Node node) {
+
     NodeWalker walker = new NodeWalker(node);
     while (walker.hasNext()) {
-      
+
       Node currentNode = walker.nextNode();
       String nodeName = currentNode.getNodeName();
-      short nodeType = currentNode.getNodeType();      
+      short nodeType = currentNode.getNodeType();
       NodeList children = currentNode.getChildNodes();
-      int childLen = (children != null) ? children.getLength() : 0; 
-      
+      int childLen = (children != null) ? children.getLength() : 0;
+
       if (nodeType == Node.ELEMENT_NODE) {
-        
+
         nodeName = nodeName.toLowerCase();
-        LinkParams params = (LinkParams)linkParams.get(nodeName);
+        LinkParams params = (LinkParams) linkParams.get(nodeName);
         if (params != null) {
           if (!shouldThrowAwayLink(currentNode, children, childLen, params)) {
-  
+
             StringBuffer linkText = new StringBuffer();
             getText(linkText, currentNode, true);
-  
+
             NamedNodeMap attrs = currentNode.getAttributes();
             String target = null;
             boolean noFollow = false;
             boolean post = false;
-            for (int i= 0; i < attrs.getLength(); i++ ) {
+            for (int i = 0; i < attrs.getLength(); i++) {
               Node attr = attrs.item(i);
               String attrName = attr.getNodeName();
               if (params.attrName.equalsIgnoreCase(attrName)) {
                 target = attr.getNodeValue();
-              } else if ("rel".equalsIgnoreCase(attrName) &&
-                         "nofollow".equalsIgnoreCase(attr.getNodeValue())) {
+              } else if ("rel".equalsIgnoreCase(attrName)
+                  && "nofollow".equalsIgnoreCase(attr.getNodeValue())) {
                 noFollow = true;
-              } else if ("method".equalsIgnoreCase(attrName) &&
-                         "post".equalsIgnoreCase(attr.getNodeValue())) {
+              } else if ("method".equalsIgnoreCase(attrName)
+                  && "post".equalsIgnoreCase(attr.getNodeValue())) {
                 post = true;
               }
             }
             if (target != null && !noFollow && !post)
               try {
-                
+
                 URL url = URLUtil.resolveURL(base, target);
-                outlinks.add(new Outlink(url.toString(),
-                                         linkText.toString().trim()));
+                outlinks.add(new Outlink(url.toString(), linkText.toString()
+                    .trim()));
               } catch (MalformedURLException e) {
                 // don't care
               }
           }
           // this should not have any children, skip them
-          if (params.childLen == 0) continue;
+          if (params.childLen == 0)
+            continue;
         }
       }
     }
   }
 
 }
-
Index: src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/HTMLMetaProcessor.java
===================================================================
--- src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/HTMLMetaProcessor.java	(revision 1188252)
+++ src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/HTMLMetaProcessor.java	(working copy)
@@ -23,32 +23,30 @@
 import org.w3c.dom.*;
 
 /**
- * Class for parsing META Directives from DOM trees.  This class
- * handles specifically Robots META directives (all, none, nofollow,
- * noindex), finding BASE HREF tags, and HTTP-EQUIV no-cache
- * instructions. All meta directives are stored in a HTMLMetaTags instance.
+ * Class for parsing META Directives from DOM trees. This class handles
+ * specifically Robots META directives (all, none, nofollow, noindex), finding
+ * BASE HREF tags, and HTTP-EQUIV no-cache instructions. All meta directives are
+ * stored in a HTMLMetaTags instance.
  */
 class HTMLMetaProcessor {
 
   /**
-   * Utility class with indicators for the robots directives "noindex"
-   * and "nofollow", and HTTP-EQUIV/no-cache
+   * Utility class with indicators for the robots directives "noindex" and
+   * "nofollow", and HTTP-EQUIV/no-cache
    */
-  
+
   /**
-   * Sets the indicators in <code>robotsMeta</code> to appropriate
-   * values, based on any META tags found under the given
-   * <code>node</code>.
+   * Sets the indicators in <code>robotsMeta</code> to appropriate values, based
+   * on any META tags found under the given <code>node</code>.
    */
-  static final void getMetaTags (
-    HTMLMetaTags metaTags, Node node, URL currURL) {
+  static final void getMetaTags(HTMLMetaTags metaTags, Node node, URL currURL) {
 
     metaTags.reset();
     getMetaTagsHelper(metaTags, node, currURL);
   }
 
-  private static final void getMetaTagsHelper(
-    HTMLMetaTags metaTags, Node node, URL currURL) {
+  private static final void getMetaTagsHelper(HTMLMetaTags metaTags, Node node,
+      URL currURL) {
 
     if (node.getNodeType() == Node.ELEMENT_NODE) {
 
@@ -63,7 +61,7 @@
         Node equivNode = null;
         Node contentNode = null;
         // Retrieves name, http-equiv and content attribues
-        for (int i=0; i<attrs.getLength(); i++) {
+        for (int i = 0; i < attrs.getLength(); i++) {
           Node attr = attrs.item(i);
           String attrName = attr.getNodeName().toLowerCase();
           if (attrName.equals("name")) {
@@ -74,44 +72,44 @@
             contentNode = attr;
           }
         }
-        
+
         if (nameNode != null) {
           if (contentNode != null) {
             String name = nameNode.getNodeValue().toLowerCase();
-            metaTags.getGeneralTags().setProperty(name, contentNode.getNodeValue());
+            metaTags.getGeneralTags().setProperty(name,
+                contentNode.getNodeValue());
             if ("robots".equals(name)) {
-  
+
               if (contentNode != null) {
-                String directives = 
-                  contentNode.getNodeValue().toLowerCase();
+                String directives = contentNode.getNodeValue().toLowerCase();
                 int index = directives.indexOf("none");
-  
+
                 if (index >= 0) {
                   metaTags.setNoIndex();
                   metaTags.setNoFollow();
                 }
-  
+
                 index = directives.indexOf("all");
                 if (index >= 0) {
                   // do nothing...
                 }
-  
+
                 index = directives.indexOf("noindex");
                 if (index >= 0) {
                   metaTags.setNoIndex();
                 }
-  
+
                 index = directives.indexOf("nofollow");
                 if (index >= 0) {
                   metaTags.setNoFollow();
                 }
-                
+
                 index = directives.indexOf("noarchive");
                 if (index >= 0) {
                   metaTags.setNoCache();
                 }
-              } 
-  
+              }
+
             } // end if (name == robots)
           }
         }
@@ -124,14 +122,15 @@
             if ("pragma".equals(name)) {
               content = content.toLowerCase();
               int index = content.indexOf("no-cache");
-              if (index >= 0) 
+              if (index >= 0)
                 metaTags.setNoCache();
             } else if ("refresh".equals(name)) {
               int idx = content.indexOf(';');
               String time = null;
               if (idx == -1) { // just the refresh time
                 time = content;
-              } else time = content.substring(0, idx);
+              } else
+                time = content.substring(0, idx);
               try {
                 metaTags.setRefreshTime(Integer.parseInt(time));
                 // skip this if we couldn't parse the time
@@ -140,22 +139,30 @@
                 ;
               }
               URL refreshUrl = null;
-              if (metaTags.getRefresh() && idx != -1) { // set the URL
+              if (metaTags.getRefresh() && idx != -1) { // set the
+                // URL
                 idx = content.toLowerCase().indexOf("url=");
-                if (idx == -1) { // assume a mis-formatted entry with just the url
+                if (idx == -1) { // assume a mis-formatted entry
+                  // with just the url
                   idx = content.indexOf(';') + 1;
-                } else idx += 4;
+                } else
+                  idx += 4;
                 if (idx != -1) {
                   String url = content.substring(idx);
                   try {
                     refreshUrl = new URL(url);
                   } catch (Exception e) {
-                    // XXX according to the spec, this has to be an absolute
-                    // XXX url. However, many websites use relative URLs and
+                    // XXX according to the spec, this has
+                    // to be an absolute
+                    // XXX url. However, many websites use
+                    // relative URLs and
                     // XXX expect browsers to handle that.
-                    // XXX Unfortunately, in some cases this may create a
-                    // XXX infinitely recursive paths (a crawler trap)...
-                    // if (!url.startsWith("/")) url = "/" + url;
+                    // XXX Unfortunately, in some cases this
+                    // may create a
+                    // XXX infinitely recursive paths (a
+                    // crawler trap)...
+                    // if (!url.startsWith("/")) url = "/" +
+                    // url;
                     try {
                       refreshUrl = new URL(currURL, url);
                     } catch (Exception e1) {
@@ -166,7 +173,8 @@
               }
               if (metaTags.getRefresh()) {
                 if (refreshUrl == null) {
-                  // apparently only refresh time was present. set the URL
+                  // apparently only refresh time was present.
+                  // set the URL
                   // to the same URL.
                   refreshUrl = currURL;
                 }
@@ -187,13 +195,13 @@
           try {
             if (currURL == null)
               url = new URL(urlString);
-            else 
+            else
               url = new URL(currURL, urlString);
           } catch (Exception e) {
             ;
           }
 
-          if (url != null) 
+          if (url != null)
             metaTags.setBaseHref(url);
         }
 
Index: src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/TikaConfig.java
===================================================================
--- src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/TikaConfig.java	(revision 1188252)
+++ src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/TikaConfig.java	(working copy)
@@ -48,204 +48,195 @@
  */
 public class TikaConfig {
 
-    private final Map<String, Parser> parsers = new HashMap<String, Parser>();
+  private final Map<String, Parser> parsers = new HashMap<String, Parser>();
 
-    private final MimeTypes mimeTypes;
+  private final MimeTypes mimeTypes;
 
-    public TikaConfig(String file) throws TikaException, IOException,
-	    SAXException {
-	this(new File(file));
-    }
+  public TikaConfig(String file) throws TikaException, IOException,
+      SAXException {
+    this(new File(file));
+  }
 
-    public TikaConfig(File file) throws TikaException, IOException,
-	    SAXException {
-	this(getBuilder().parse(file));
-    }
+  public TikaConfig(File file) throws TikaException, IOException, SAXException {
+    this(getBuilder().parse(file));
+  }
 
-    public TikaConfig(URL url) throws TikaException, IOException, SAXException {
-	this(getBuilder().parse(url.toString()));
-    }
+  public TikaConfig(URL url) throws TikaException, IOException, SAXException {
+    this(getBuilder().parse(url.toString()));
+  }
 
-    public TikaConfig(InputStream stream) throws TikaException, IOException,
-	    SAXException {
-	this(getBuilder().parse(stream));
-    }
+  public TikaConfig(InputStream stream) throws TikaException, IOException,
+      SAXException {
+    this(getBuilder().parse(stream));
+  }
 
-    /**
-     * @deprecated This method will be removed in Apache Tika 1.0
-     * @see <a
-     *      href="https://issues.apache.org/jira/browse/TIKA-275">TIKA-275</a>
-     */
-    public TikaConfig(InputStream stream, Parser delegate)
-	    throws TikaException, IOException, SAXException {
-	this(stream);
-    }
+  /**
+   * @deprecated This method will be removed in Apache Tika 1.0
+   * @see <a href="https://issues.apache.org/jira/browse/TIKA-275">TIKA-275</a>
+   */
+  public TikaConfig(InputStream stream, Parser delegate) throws TikaException,
+      IOException, SAXException {
+    this(stream);
+  }
 
-    public TikaConfig(Document document) throws TikaException, IOException {
-	this(document.getDocumentElement());
-    }
+  public TikaConfig(Document document) throws TikaException, IOException {
+    this(document.getDocumentElement());
+  }
 
-    /**
-     * @deprecated This method will be removed in Apache Tika 1.0
-     * @see <a
-     *      href="https://issues.apache.org/jira/browse/TIKA-275">TIKA-275</a>
-     */
-    public TikaConfig(Document document, Parser delegate) throws TikaException,
-	    IOException {
-	this(document);
+  /**
+   * @deprecated This method will be removed in Apache Tika 1.0
+   * @see <a href="https://issues.apache.org/jira/browse/TIKA-275">TIKA-275</a>
+   */
+  public TikaConfig(Document document, Parser delegate) throws TikaException,
+      IOException {
+    this(document);
+  }
+
+  public TikaConfig(Element element) throws TikaException, IOException {
+    Element mtr = getChild(element, "mimeTypeRepository");
+    if (mtr != null && mtr.hasAttribute("resource")) {
+      mimeTypes = MimeTypesFactory.create(mtr.getAttribute("resource"));
+    } else {
+      mimeTypes = MimeTypesFactory.create("tika-mimetypes.xml");
     }
 
-    public TikaConfig(Element element) throws TikaException, IOException {
-	Element mtr = getChild(element, "mimeTypeRepository");
-	if (mtr != null && mtr.hasAttribute("resource")) {
-	    mimeTypes = MimeTypesFactory.create(mtr.getAttribute("resource"));
-	} else {
-	    mimeTypes = MimeTypesFactory.create("tika-mimetypes.xml");
-	}
+    NodeList nodes = element.getElementsByTagName("parser");
+    for (int i = 0; i < nodes.getLength(); i++) {
+      Element node = (Element) nodes.item(i);
+      String name = node.getAttribute("class");
 
-	NodeList nodes = element.getElementsByTagName("parser");
-	for (int i = 0; i < nodes.getLength(); i++) {
-	    Element node = (Element) nodes.item(i);
-	    String name = node.getAttribute("class");
+      try {
+        Class<?> parserClass = Class.forName(name);
+        Object instance = parserClass.newInstance();
+        if (!(instance instanceof Parser)) {
+          throw new TikaException("Configured class is not a Tika Parser: "
+              + name);
+        }
+        Parser parser = (Parser) instance;
 
-	    try {
-		Class<?> parserClass = Class.forName(name);
-		Object instance = parserClass.newInstance();
-		if (!(instance instanceof Parser)) {
-		    throw new TikaException(
-			    "Configured class is not a Tika Parser: " + name);
-		}
-		Parser parser = (Parser) instance;
-
-		NodeList mimes = node.getElementsByTagName("mime");
-		if (mimes.getLength() > 0) {
-		    for (int j = 0; j < mimes.getLength(); j++) {
-			parsers.put(getText(mimes.item(j)).trim(), parser);
-		    }
-		} else {
-		    ParseContext context = new ParseContext();
-		    for (MediaType type : parser.getSupportedTypes(context)) {
-			parsers.put(type.toString(), parser);
-		    }
-		}
-	    } catch (ClassNotFoundException e) {
-		throw new TikaException("Configured parser class not found: "
-			+ name, e);
-	    } catch (IllegalAccessException e) {
-		throw new TikaException("Unable to access a parser class: "
-			+ name, e);
-	    } catch (InstantiationException e) {
-		throw new TikaException(
-			"Unable to instantiate a parser class: " + name, e);
-	    }
-	}
+        NodeList mimes = node.getElementsByTagName("mime");
+        if (mimes.getLength() > 0) {
+          for (int j = 0; j < mimes.getLength(); j++) {
+            parsers.put(getText(mimes.item(j)).trim(), parser);
+          }
+        } else {
+          ParseContext context = new ParseContext();
+          for (MediaType type : parser.getSupportedTypes(context)) {
+            parsers.put(type.toString(), parser);
+          }
+        }
+      } catch (ClassNotFoundException e) {
+        throw new TikaException("Configured parser class not found: " + name, e);
+      } catch (IllegalAccessException e) {
+        throw new TikaException("Unable to access a parser class: " + name, e);
+      } catch (InstantiationException e) {
+        throw new TikaException(
+            "Unable to instantiate a parser class: " + name, e);
+      }
     }
+  }
 
-    public TikaConfig() throws MimeTypeException, IOException {
-	ParseContext context = new ParseContext();
-	Iterator<Parser> iterator = ServiceRegistry.lookupProviders(
-		Parser.class, this.getClass().getClassLoader());
-	while (iterator.hasNext()) {
-	    Parser parser = iterator.next();
-	    for (MediaType type : parser.getSupportedTypes(context)) {
-		parsers.put(type.toString(), parser);
-	    }
-	}
-	mimeTypes = MimeTypesFactory.create("tika-mimetypes.xml");
+  public TikaConfig() throws MimeTypeException, IOException {
+    ParseContext context = new ParseContext();
+    Iterator<Parser> iterator = ServiceRegistry.lookupProviders(Parser.class,
+        this.getClass().getClassLoader());
+    while (iterator.hasNext()) {
+      Parser parser = iterator.next();
+      for (MediaType type : parser.getSupportedTypes(context)) {
+        parsers.put(type.toString(), parser);
+      }
     }
+    mimeTypes = MimeTypesFactory.create("tika-mimetypes.xml");
+  }
 
-    /**
-     * @deprecated This method will be removed in Apache Tika 1.0
-     * @see <a
-     *      href="https://issues.apache.org/jira/browse/TIKA-275">TIKA-275</a>
-     */
-    public TikaConfig(Element element, Parser delegate) throws TikaException,
-	    IOException {
-	this(element);
-    }
+  /**
+   * @deprecated This method will be removed in Apache Tika 1.0
+   * @see <a href="https://issues.apache.org/jira/browse/TIKA-275">TIKA-275</a>
+   */
+  public TikaConfig(Element element, Parser delegate) throws TikaException,
+      IOException {
+    this(element);
+  }
 
-    private String getText(Node node) {
-	if (node.getNodeType() == Node.TEXT_NODE) {
-	    return node.getNodeValue();
-	} else if (node.getNodeType() == Node.ELEMENT_NODE) {
-	    StringBuilder builder = new StringBuilder();
-	    NodeList list = node.getChildNodes();
-	    for (int i = 0; i < list.getLength(); i++) {
-		builder.append(getText(list.item(i)));
-	    }
-	    return builder.toString();
-	} else {
-	    return "";
-	}
+  private String getText(Node node) {
+    if (node.getNodeType() == Node.TEXT_NODE) {
+      return node.getNodeValue();
+    } else if (node.getNodeType() == Node.ELEMENT_NODE) {
+      StringBuilder builder = new StringBuilder();
+      NodeList list = node.getChildNodes();
+      for (int i = 0; i < list.getLength(); i++) {
+        builder.append(getText(list.item(i)));
+      }
+      return builder.toString();
+    } else {
+      return "";
     }
+  }
 
-    /**
-     * Returns the parser instance configured for the given MIME type. Returns
-     * <code>null</code> if the given MIME type is unknown.
-     * 
-     * @param mimeType
-     *            MIME type
-     * @return configured Parser instance, or <code>null</code>
-     */
-    public Parser getParser(String mimeType) {
-	return parsers.get(mimeType);
-    }
+  /**
+   * Returns the parser instance configured for the given MIME type. Returns
+   * <code>null</code> if the given MIME type is unknown.
+   * 
+   * @param mimeType
+   *          MIME type
+   * @return configured Parser instance, or <code>null</code>
+   */
+  public Parser getParser(String mimeType) {
+    return parsers.get(mimeType);
+  }
 
-    public Map<String, Parser> getParsers() {
-	return parsers;
-    }
+  public Map<String, Parser> getParsers() {
+    return parsers;
+  }
 
-    public MimeTypes getMimeRepository() {
-	return mimeTypes;
-    }
+  public MimeTypes getMimeRepository() {
+    return mimeTypes;
+  }
 
-    /**
-     * Provides a default configuration (TikaConfig). Currently creates a new
-     * instance each time it's called; we may be able to have it return a shared
-     * instance once it is completely immutable.
-     * 
-     * @return default configuration
-     */
-    public static TikaConfig getDefaultConfig() {
-	try {
-	    return new TikaConfig();
-	} catch (IOException e) {
-	    throw new RuntimeException("Unable to read default configuration",
-		    e);
-	} catch (TikaException e) {
-	    throw new RuntimeException(
-		    "Unable to access default configuration", e);
-	}
+  /**
+   * Provides a default configuration (TikaConfig). Currently creates a new
+   * instance each time it's called; we may be able to have it return a shared
+   * instance once it is completely immutable.
+   * 
+   * @return default configuration
+   */
+  public static TikaConfig getDefaultConfig() {
+    try {
+      return new TikaConfig();
+    } catch (IOException e) {
+      throw new RuntimeException("Unable to read default configuration", e);
+    } catch (TikaException e) {
+      throw new RuntimeException("Unable to access default configuration", e);
     }
+  }
 
-    /**
-     * @deprecated This method will be removed in Apache Tika 1.0
-     * @see <a
-     *      href="https://issues.apache.org/jira/browse/TIKA-275">TIKA-275</a>
-     */
-    public static TikaConfig getDefaultConfig(Parser delegate)
-	    throws TikaException {
-	return getDefaultConfig();
-    }
+  /**
+   * @deprecated This method will be removed in Apache Tika 1.0
+   * @see <a href="https://issues.apache.org/jira/browse/TIKA-275">TIKA-275</a>
+   */
+  public static TikaConfig getDefaultConfig(Parser delegate)
+      throws TikaException {
+    return getDefaultConfig();
+  }
 
-    private static DocumentBuilder getBuilder() throws TikaException {
-	try {
-	    return DocumentBuilderFactory.newInstance().newDocumentBuilder();
-	} catch (ParserConfigurationException e) {
-	    throw new TikaException("XML parser not available", e);
-	}
+  private static DocumentBuilder getBuilder() throws TikaException {
+    try {
+      return DocumentBuilderFactory.newInstance().newDocumentBuilder();
+    } catch (ParserConfigurationException e) {
+      throw new TikaException("XML parser not available", e);
     }
+  }
 
-    private static Element getChild(Element element, String name) {
-	Node child = element.getFirstChild();
-	while (child != null) {
-	    if (child.getNodeType() == Node.ELEMENT_NODE
-		    && name.equals(child.getNodeName())) {
-		return (Element) child;
-	    }
-	    child = child.getNextSibling();
-	}
-	return null;
+  private static Element getChild(Element element, String name) {
+    Node child = element.getFirstChild();
+    while (child != null) {
+      if (child.getNodeType() == Node.ELEMENT_NODE
+          && name.equals(child.getNodeName())) {
+        return (Element) child;
+      }
+      child = child.getNextSibling();
     }
+    return null;
+  }
 
 }
\ No newline at end of file
Index: src/plugin/lib-regex-filter/src/test/org/apache/nutch/urlfilter/api/RegexURLFilterBaseTest.java
===================================================================
--- src/plugin/lib-regex-filter/src/test/org/apache/nutch/urlfilter/api/RegexURLFilterBaseTest.java	(revision 1188252)
+++ src/plugin/lib-regex-filter/src/test/org/apache/nutch/urlfilter/api/RegexURLFilterBaseTest.java	(working copy)
@@ -34,60 +34,59 @@
 // Nutch imports
 import org.apache.nutch.net.URLFilter;
 
-
 /**
  * JUnit based test of class <code>RegexURLFilterBase</code>.
- *
+ * 
  * @author J&eacute;r&ocirc;me Charron
  */
 public abstract class RegexURLFilterBaseTest extends TestCase {
-  
+
   /** My logger */
-  protected static final Logger LOG = LoggerFactory.getLogger(RegexURLFilterBaseTest.class);  
+  protected static final Logger LOG = LoggerFactory
+      .getLogger(RegexURLFilterBaseTest.class);
 
-  private final static String SEPARATOR = System.getProperty("file.separator");  
+  private final static String SEPARATOR = System.getProperty("file.separator");
   private final static String SAMPLES = System.getProperty("test.data", ".");
-  
+
   public RegexURLFilterBaseTest(String testName) {
     super(testName);
   }
-  
+
   protected abstract URLFilter getURLFilter(Reader rules);
 
   protected void bench(int loops, String file) {
     try {
-      bench(loops,
-            new FileReader(SAMPLES + SEPARATOR + file + ".rules"),
-            new FileReader(SAMPLES + SEPARATOR + file + ".urls"));
+      bench(loops, new FileReader(SAMPLES + SEPARATOR + file + ".rules"),
+          new FileReader(SAMPLES + SEPARATOR + file + ".urls"));
     } catch (Exception e) {
       fail(e.toString());
     }
   }
-  
+
   protected void bench(int loops, Reader rules, Reader urls) {
     long start = System.currentTimeMillis();
     try {
       URLFilter filter = getURLFilter(rules);
       FilteredURL[] expected = readURLFile(urls);
-      for (int i=0; i<loops; i++) {
+      for (int i = 0; i < loops; i++) {
         test(filter, expected);
       }
     } catch (Exception e) {
       fail(e.toString());
     }
-    LOG.info("bench time (" + loops + ") " +
-             (System.currentTimeMillis()-start) + "ms");
+    LOG.info("bench time (" + loops + ") "
+        + (System.currentTimeMillis() - start) + "ms");
   }
-  
+
   protected void test(String file) {
     try {
       test(new FileReader(SAMPLES + SEPARATOR + file + ".rules"),
-           new FileReader(SAMPLES + SEPARATOR + file + ".urls"));
+          new FileReader(SAMPLES + SEPARATOR + file + ".urls"));
     } catch (Exception e) {
       fail(e.toString());
     }
   }
-  
+
   protected void test(Reader rules, Reader urls) {
     try {
       test(getURLFilter(rules), readURLFile(urls));
@@ -95,9 +94,9 @@
       fail(e.toString());
     }
   }
-  
+
   protected void test(URLFilter filter, FilteredURL[] expected) {
-    for (int i=0; i<expected.length; i++) {
+    for (int i = 0; i < expected.length; i++) {
       String result = filter.filter(expected[i].url);
       if (result != null) {
         assertTrue(expected[i].url, expected[i].sign);
@@ -106,37 +105,37 @@
       }
     }
   }
-  
+
   private static FilteredURL[] readURLFile(Reader reader) throws IOException {
     BufferedReader in = new BufferedReader(reader);
     List list = new ArrayList();
     String line;
-    while((line=in.readLine()) != null) {
+    while ((line = in.readLine()) != null) {
       if (line.length() != 0) {
         list.add(new FilteredURL(line));
       }
     }
     return (FilteredURL[]) list.toArray(new FilteredURL[list.size()]);
   }
-    
+
   private static class FilteredURL {
-  
+
     boolean sign;
     String url;
 
     FilteredURL(String line) {
       switch (line.charAt(0)) {
-      case '+' : 
+      case '+':
         sign = true;
         break;
-      case '-' :
+      case '-':
         sign = false;
         break;
-      default :
+      default:
         // Simply ignore...
       }
       url = line.substring(1);
     }
   }
-  
+
 }
Index: src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/RegexRule.java
===================================================================
--- src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/RegexRule.java	(revision 1188252)
+++ src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/RegexRule.java	(working copy)
@@ -18,10 +18,9 @@
 
 import org.apache.nutch.net.*;
 
-
 /**
  * A generic regular expression rule.
- *
+ * 
  * @author J&eacute;r&ocirc;me Charron
  */
 public abstract class RegexRule {
@@ -31,13 +30,15 @@
 
   /**
    * Constructs a new regular expression rule.
-   *
-   * @param sign specifies if this rule must filter-in or filter-out.
-   *        A <code>true</code> value means that any url matching this rule
-   *        must be accepted, a <code>false</code> value means that any url
-   *        matching this rule must be rejected.
-   * @param regex is the regular expression used for matching (see
-   *        {@link #match(String)} method).
+   * 
+   * @param sign
+   *          specifies if this rule must filter-in or filter-out. A
+   *          <code>true</code> value means that any url matching this rule must
+   *          be accepted, a <code>false</code> value means that any url
+   *          matching this rule must be rejected.
+   * @param regex
+   *          is the regular expression used for matching (see
+   *          {@link #match(String)} method).
    */
   protected RegexRule(boolean sign, String regex) {
     this.sign = sign;
@@ -46,19 +47,22 @@
 
   /**
    * Return if this rule is used for filtering-in or out.
-   *
+   * 
    * @return <code>true</code> if any url matching this rule must be accepted,
    *         otherwise <code>false</code>.
    */
-  protected boolean accept() { return sign; }
-  
+  protected boolean accept() {
+    return sign;
+  }
+
   /**
    * Checks if a url matches this rule.
-   * @param url is the url to check.
-   * @return <code>true</code> if the specified url matches this rule,
-   *         otherwise <code>false</code>.
+   * 
+   * @param url
+   *          is the url to check.
+   * @return <code>true</code> if the specified url matches this rule, otherwise
+   *         <code>false</code>.
    */
   protected abstract boolean match(String url);
 
 }
-
Index: src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/RegexURLFilterBase.java
===================================================================
--- src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/RegexURLFilterBase.java	(revision 1188252)
+++ src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/RegexURLFilterBase.java	(working copy)
@@ -37,28 +37,32 @@
 // Nutch imports
 import org.apache.nutch.net.*;
 
-
 /**
- * Generic {@link org.apache.nutch.net.URLFilter URL filter} based on
- * regular expressions.
- *
- * <p>The regular expressions rules are expressed in a file. The file of rules
- * is provided by each implementation using the
- * {@link #getRulesFile(Configuration)} method.</p>
+ * Generic {@link org.apache.nutch.net.URLFilter URL filter} based on regular
+ * expressions.
  * 
- * <p>The format of this file is made of many rules (one per line):<br/>
+ * <p>
+ * The regular expressions rules are expressed in a file. The file of rules is
+ * provided by each implementation using the
+ * {@link #getRulesFile(Configuration)} method.
+ * </p>
+ * 
+ * <p>
+ * The format of this file is made of many rules (one per line):<br/>
  * <code>
  * [+-]&lt;regex&gt;
  * </code><br/>
- * where plus (<code>+</code>)means go ahead and index it and minus 
- * (<code>-</code>)means no.</p>
- *
+ * where plus (<code>+</code>)means go ahead and index it and minus (
+ * <code>-</code>)means no.
+ * </p>
+ * 
  * @author J&eacute;r&ocirc;me Charron
  */
 public abstract class RegexURLFilterBase implements URLFilter {
 
   /** My logger */
-  private final static Logger LOG = LoggerFactory.getLogger(RegexURLFilterBase.class);
+  private final static Logger LOG = LoggerFactory
+      .getLogger(RegexURLFilterBase.class);
 
   /** An array of applicable rules */
   private RegexRule[] rules;
@@ -66,24 +70,28 @@
   /** The current configuration */
   private Configuration conf;
 
-
   /**
    * Constructs a new empty RegexURLFilterBase
    */
-  public RegexURLFilterBase() { }
+  public RegexURLFilterBase() {
+  }
 
   /**
    * Constructs a new RegexURLFilter and init it with a file of rules.
-   * @param filename is the name of rules file.
+   * 
+   * @param filename
+   *          is the name of rules file.
    */
-  public RegexURLFilterBase(File filename)
-    throws IOException, IllegalArgumentException {
+  public RegexURLFilterBase(File filename) throws IOException,
+      IllegalArgumentException {
     this(new FileReader(filename));
   }
-  
+
   /**
    * Constructs a new RegexURLFilter and inits it with a list of rules.
-   * @param rules string with a list of rules, one rule per line
+   * 
+   * @param rules
+   *          string with a list of rules, one rule per line
    * @throws IOException
    * @throws IllegalArgumentException
    */
@@ -94,68 +102,82 @@
 
   /**
    * Constructs a new RegexURLFilter and init it with a Reader of rules.
-   * @param reader is a reader of rules.
+   * 
+   * @param reader
+   *          is a reader of rules.
    */
-  protected RegexURLFilterBase(Reader reader)
-    throws IOException, IllegalArgumentException {
+  protected RegexURLFilterBase(Reader reader) throws IOException,
+      IllegalArgumentException {
     rules = readRules(reader);
   }
-  
+
   /**
    * Creates a new {@link RegexRule}.
-   * @param sign of the regular expression.
-   *        A <code>true</code> value means that any URL matching this rule
-   *        must be included, whereas a <code>false</code>
-   *        value means that any URL matching this rule must be excluded.
-   * @param regex is the regular expression associated to this rule.
+   * 
+   * @param sign
+   *          of the regular expression. A <code>true</code> value means that
+   *          any URL matching this rule must be included, whereas a
+   *          <code>false</code> value means that any URL matching this rule
+   *          must be excluded.
+   * @param regex
+   *          is the regular expression associated to this rule.
    */
   protected abstract RegexRule createRule(boolean sign, String regex);
-  
+
   /**
-   * Returns the name of the file of rules to use for
-   * a particular implementation.
-   * @param conf is the current configuration.
+   * Returns the name of the file of rules to use for a particular
+   * implementation.
+   * 
+   * @param conf
+   *          is the current configuration.
    * @return the name of the resource containing the rules to use.
    */
-  protected abstract Reader getRulesReader(Configuration conf) throws IOException;
-  
-  
-  /* -------------------------- *
-   * <implementation:URLFilter> *
-   * -------------------------- */
-  
+  protected abstract Reader getRulesReader(Configuration conf)
+      throws IOException;
+
+  /*
+   * -------------------------- * <implementation:URLFilter> *
+   * --------------------------
+   */
+
   // Inherited Javadoc
   public synchronized String filter(String url) {
-    for (int i=0; i<rules.length; i++) {
+    for (int i = 0; i < rules.length; i++) {
       if (rules[i].match(url)) {
         return rules[i].accept() ? url : null;
       }
-    };
+    }
+    ;
     return null;
   }
 
-  /* --------------------------- *
-   * </implementation:URLFilter> *
-   * --------------------------- */
-  
-  
-  /* ----------------------------- *
-   * <implementation:Configurable> *
-   * ----------------------------- */
-  
+  /*
+   * --------------------------- * </implementation:URLFilter> *
+   * ---------------------------
+   */
+
+  /*
+   * ----------------------------- * <implementation:Configurable> *
+   * -----------------------------
+   */
+
   public void setConf(Configuration conf) {
     this.conf = conf;
     Reader reader = null;
     try {
       reader = getRulesReader(conf);
     } catch (Exception e) {
-      if (LOG.isErrorEnabled()) { LOG.error(e.getMessage()); }
-      throw new RuntimeException(e.getMessage(), e);      
+      if (LOG.isErrorEnabled()) {
+        LOG.error(e.getMessage());
+      }
+      throw new RuntimeException(e.getMessage(), e);
     }
     try {
       rules = readRules(reader);
     } catch (IOException e) {
-      if (LOG.isErrorEnabled()) { LOG.error(e.getMessage()); }
+      if (LOG.isErrorEnabled()) {
+        LOG.error(e.getMessage());
+      }
       throw new RuntimeException(e.getMessage(), e);
     }
   }
@@ -163,45 +185,51 @@
   public Configuration getConf() {
     return this.conf;
   }
-  
-  /* ------------------------------ *
-   * </implementation:Configurable> *
-   * ------------------------------ */
-  
 
+  /*
+   * ------------------------------ * </implementation:Configurable> *
+   * ------------------------------
+   */
+
   /**
    * Read the specified file of rules.
-   * @param reader is a reader of regular expressions rules.
+   * 
+   * @param reader
+   *          is a reader of regular expressions rules.
    * @return the corresponding {@RegexRule rules}.
    */
-  private RegexRule[] readRules(Reader reader)
-    throws IOException, IllegalArgumentException {
+  private RegexRule[] readRules(Reader reader) throws IOException,
+      IllegalArgumentException {
 
     BufferedReader in = new BufferedReader(reader);
     List rules = new ArrayList();
     String line;
-       
-    while((line=in.readLine())!=null) {
+
+    while ((line = in.readLine()) != null) {
       if (line.length() == 0) {
         continue;
       }
-      char first=line.charAt(0);
-      boolean sign=false;
+      char first = line.charAt(0);
+      boolean sign = false;
       switch (first) {
-      case '+' : 
-        sign=true;
+      case '+':
+        sign = true;
         break;
-      case '-' :
-        sign=false;
+      case '-':
+        sign = false;
         break;
-      case ' ' : case '\n' : case '#' :           // skip blank & comment lines
+      case ' ':
+      case '\n':
+      case '#': // skip blank & comment lines
         continue;
-      default :
-        throw new IOException("Invalid first character: "+line);
+      default:
+        throw new IOException("Invalid first character: " + line);
       }
 
       String regex = line.substring(1);
-      if (LOG.isTraceEnabled()) { LOG.trace("Adding rule [" + regex + "]"); }
+      if (LOG.isTraceEnabled()) {
+        LOG.trace("Adding rule [" + regex + "]");
+      }
       RegexRule rule = createRule(sign, regex);
       rules.add(rule);
     }
@@ -210,18 +238,20 @@
 
   /**
    * Filter the standard input using a RegexURLFilterBase.
-   * @param filter is the RegexURLFilterBase to use for filtering the
-   *        standard input.
-   * @param args some optional parameters (not used).
+   * 
+   * @param filter
+   *          is the RegexURLFilterBase to use for filtering the standard input.
+   * @param args
+   *          some optional parameters (not used).
    */
   public static void main(RegexURLFilterBase filter, String args[])
-    throws IOException, IllegalArgumentException {
+      throws IOException, IllegalArgumentException {
 
     BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
     String line;
-    while((line=in.readLine())!=null) {
+    while ((line = in.readLine()) != null) {
       String out = filter.filter(line);
-      if (out!=null) {
+      if (out != null) {
         System.out.print("+");
         System.out.println(out);
       } else {
Index: src/plugin/feed/src/test/org/apache/nutch/parse/feed/TestFeedParser.java
===================================================================
--- src/plugin/feed/src/test/org/apache/nutch/parse/feed/TestFeedParser.java	(revision 1188252)
+++ src/plugin/feed/src/test/org/apache/nutch/parse/feed/TestFeedParser.java	(working copy)
@@ -44,7 +44,7 @@
  * 
  * @author mattmann
  * 
- * Test Suite for the {@link FeedParser}.
+ *         Test Suite for the {@link FeedParser}.
  * 
  */
 public class TestFeedParser extends TestCase {
@@ -106,18 +106,17 @@
 
       assertEquals(3, parseResult.size());
 
-      boolean hasLink1 = false, hasLink2 = false, hasLink3=false;
+      boolean hasLink1 = false, hasLink2 = false, hasLink3 = false;
 
       for (Iterator<Map.Entry<Text, Parse>> j = parseResult.iterator(); j
           .hasNext();) {
         Map.Entry<Text, Parse> entry = j.next();
-        if (entry.getKey().toString().equals(
-            "http://www-scf.usc.edu/~mattmann/")) {
+        if (entry.getKey().toString()
+            .equals("http://www-scf.usc.edu/~mattmann/")) {
           hasLink1 = true;
         } else if (entry.getKey().toString().equals("http://www.nutch.org/")) {
           hasLink2 = true;
-        }
-        else if(entry.getKey().toString().equals(urlString)){
+        } else if (entry.getKey().toString().equals(urlString)) {
           hasLink3 = true;
         }
 
@@ -131,5 +130,4 @@
     }
 
   }
-
 }
Index: src/plugin/feed/src/java/org/apache/nutch/parse/feed/FeedParser.java
===================================================================
--- src/plugin/feed/src/java/org/apache/nutch/parse/feed/FeedParser.java	(revision 1188252)
+++ src/plugin/feed/src/java/org/apache/nutch/parse/feed/FeedParser.java	(working copy)
@@ -66,10 +66,10 @@
  * @author mattmann
  * @since NUTCH-444
  * 
- * <p>
- * A new RSS/ATOM Feed{@link Parser} that rapidly parses all referenced links
- * and content present in the feed.
- * </p>
+ *        <p>
+ *        A new RSS/ATOM Feed{@link Parser} that rapidly parses all referenced
+ *        links and content present in the feed.
+ *        </p>
  * 
  */
 public class FeedParser implements Parser {
@@ -99,8 +99,8 @@
    *          A {@link Content} object representing the feed that is being
    *          parsed by this {@link Parser}.
    * 
-   * @return A {@link ParseResult} containing all {@link Parse}d feeds that
-   *         were present in the feed file that this {@link Parser} dealt with.
+   * @return A {@link ParseResult} containing all {@link Parse}d feeds that were
+   *         present in the feed file that this {@link Parser} dealt with.
    * 
    */
   public ParseResult getParse(Content content) {
@@ -111,8 +111,8 @@
     detector.autoDetectClues(content, true);
     String encoding = detector.guessEncoding(content, defaultEncoding);
     try {
-      InputSource input = new InputSource(new ByteArrayInputStream(content
-          .getContent()));
+      InputSource input = new InputSource(new ByteArrayInputStream(
+          content.getContent()));
       input.setEncoding(encoding);
       SyndFeedInput feedInput = new SyndFeedInput();
       feed = feedInput.build(input);
@@ -171,8 +171,8 @@
     this.parserFactory = new ParserFactory(conf);
     this.normalizers = new URLNormalizers(conf, URLNormalizers.SCOPE_OUTLINK);
     this.filters = new URLFilters(conf);
-    this.defaultEncoding =
-      conf.get("parser.character.encoding.default", "windows-1252");
+    this.defaultEncoding = conf.get("parser.character.encoding.default",
+        "windows-1252");
   }
 
   /**
@@ -275,9 +275,9 @@
       ParseData data = parse.getData();
       data.getContentMeta().remove(Response.CONTENT_TYPE);
       mergeMetadata(data.getParseMeta(), parseMeta);
-      parseResult.put(link, new ParseText(parse.getText()), new ParseData(
-          ParseStatus.STATUS_SUCCESS, title, data.getOutlinks(), data
-              .getContentMeta(), data.getParseMeta()));
+      parseResult.put(link, new ParseText(parse.getText()),
+          new ParseData(ParseStatus.STATUS_SUCCESS, title, data.getOutlinks(),
+              data.getContentMeta(), data.getParseMeta()));
     } else {
       contentMeta.remove(Response.CONTENT_TYPE);
       parseResult.put(link, new ParseText(text), new ParseData(
Index: src/plugin/feed/src/java/org/apache/nutch/indexer/feed/FeedIndexingFilter.java
===================================================================
--- src/plugin/feed/src/java/org/apache/nutch/indexer/feed/FeedIndexingFilter.java	(revision 1188252)
+++ src/plugin/feed/src/java/org/apache/nutch/indexer/feed/FeedIndexingFilter.java	(working copy)
@@ -38,78 +38,77 @@
  * @author mattmann
  * @since NUTCH-444
  * 
- * An {@link IndexingFilter} implementation to pull out the
- * relevant extracted {@link Metadata} fields from the RSS feeds
- * and into the index.
- *
+ *        An {@link IndexingFilter} implementation to pull out the relevant
+ *        extracted {@link Metadata} fields from the RSS feeds and into the
+ *        index.
+ * 
  */
 public class FeedIndexingFilter implements IndexingFilter {
-  
+
   public static final String dateFormatStr = "yyyyMMddHHmm";
-  
+
   private Configuration conf;
-  
+
   private final static String PUBLISHED_DATE = "publishedDate";
-  
+
   private final static String UPDATED_DATE = "updatedDate";
-  
+
   /**
    * Extracts out the relevant fields:
    * 
    * <ul>
-   *  <li>FEED_AUTHOR</li>
-   *  <li>FEED_TAGS</li>
-   *  <li>FEED_PUBLISHED</li>
-   *  <li>FEED_UPDATED</li>
-   *  <li>FEED</li>
+   * <li>FEED_AUTHOR</li>
+   * <li>FEED_TAGS</li>
+   * <li>FEED_PUBLISHED</li>
+   * <li>FEED_UPDATED</li>
+   * <li>FEED</li>
    * </ul>
    * 
-   * And sends them to the {@link Indexer} for indexing within the Nutch
-   * index.
-   *  
+   * And sends them to the {@link Indexer} for indexing within the Nutch index.
+   * 
    */
-  public NutchDocument filter(NutchDocument doc, Parse parse, Text url, CrawlDatum datum,
-                         Inlinks inlinks) throws IndexingException {
+  public NutchDocument filter(NutchDocument doc, Parse parse, Text url,
+      CrawlDatum datum, Inlinks inlinks) throws IndexingException {
     ParseData parseData = parse.getData();
     Metadata parseMeta = parseData.getParseMeta();
-    
+
     String[] authors = parseMeta.getValues(Feed.FEED_AUTHOR);
     String[] tags = parseMeta.getValues(Feed.FEED_TAGS);
     String published = parseMeta.get(Feed.FEED_PUBLISHED);
     String updated = parseMeta.get(Feed.FEED_UPDATED);
     String feed = parseMeta.get(Feed.FEED);
-    
+
     if (authors != null) {
       for (String author : authors) {
         doc.add(Feed.FEED_AUTHOR, author);
       }
     }
-    
+
     if (tags != null) {
       for (String tag : tags) {
         doc.add(Feed.FEED_TAGS, tag);
       }
     }
-    
+
     if (feed != null)
       doc.add(Feed.FEED, feed);
-    
+
     if (published != null) {
       Date date = new Date(Long.parseLong(published));
       doc.add(PUBLISHED_DATE, date);
     }
-    
+
     if (updated != null) {
       Date date = new Date(Long.parseLong(updated));
       doc.add(UPDATED_DATE, date);
     }
-        
+
     return doc;
   }
 
   /**
-   * @return the {@link Configuration} object used to configure
-   * this {@link IndexingFilter}.
+   * @return the {@link Configuration} object used to configure this
+   *         {@link IndexingFilter}.
    */
   public Configuration getConf() {
     return conf;
@@ -119,8 +118,9 @@
    * Sets the {@link Configuration} object used to configure this
    * {@link IndexingFilter}.
    * 
-   * @param conf The {@link Configuration} object used to configure
-   * this {@link IndexingFilter}.
+   * @param conf
+   *          The {@link Configuration} object used to configure this
+   *          {@link IndexingFilter}.
    */
   public void setConf(Configuration conf) {
     this.conf = conf;
Index: src/plugin/language-identifier/src/test/org/apache/nutch/analysis/lang/TestHTMLLanguageParser.java
===================================================================
--- src/plugin/language-identifier/src/test/org/apache/nutch/analysis/lang/TestHTMLLanguageParser.java	(revision 1188252)
+++ src/plugin/language-identifier/src/test/org/apache/nutch/analysis/lang/TestHTMLLanguageParser.java	(working copy)
@@ -16,8 +16,6 @@
  */
 package org.apache.nutch.analysis.lang;
 
-
-
 // JUnit imports
 import java.io.BufferedReader;
 import java.io.InputStreamReader;
@@ -32,7 +30,6 @@
 import org.apache.nutch.util.NutchConfiguration;
 import org.apache.tika.language.LanguageIdentifier;
 
-
 public class TestHTMLLanguageParser extends TestCase {
 
   private static String URL = "http://foo.bar/";
@@ -48,7 +45,7 @@
   String metalanguages[] = { "fi", "en", "en" };
 
   /**
-   * Test parsing of language identifiers from html 
+   * Test parsing of language identifiers from html
    **/
   public void testMetaHTMLParsing() {
 
@@ -58,7 +55,8 @@
       for (int t = 0; t < docs.length; t++) {
         Content content = getContent(docs[t]);
         Parse parse = parser.parse(content).get(content.getUrl());
-        assertEquals(metalanguages[t], (String) parse.getData().getParseMeta().get(Metadata.LANGUAGE));
+        assertEquals(metalanguages[t], (String) parse.getData().getParseMeta()
+            .get(Metadata.LANGUAGE));
       }
     } catch (Exception e) {
       e.printStackTrace(System.out);
@@ -69,65 +67,38 @@
 
   /** Test of <code>LanguageParser.parseLanguage(String)</code> method. */
   public void testParseLanguage() {
-    String tests[][] = {
-      { "(SCHEME=ISO.639-1) sv", "sv" },
-      { "(SCHEME=RFC1766) sv-FI", "sv" },
-      { "(SCHEME=Z39.53) SWE", "sv" },
-      { "EN_US, SV, EN, EN_UK", "en" },
-      { "English Swedish", "en" },
-      { "English, swedish", "en" },
-      { "English,Swedish", "en" },
-      { "Other (Svenska)", "sv" },
-      { "SE", "se" },
-      { "SV", "sv" },
-      { "SV charset=iso-8859-1", "sv" },
-      { "SV-FI", "sv" },
-      { "SV; charset=iso-8859-1", "sv" },
-      { "SVE", "sv" },
-      { "SW", "sw" },
-      { "SWE", "sv" },
-      { "SWEDISH", "sv" },
-      { "Sv", "sv" },
-      { "Sve", "sv" },
-      { "Svenska", "sv" },
-      { "Swedish", "sv" },
-      { "Swedish, svenska", "sv" },
-      { "en, sv", "en" },
-      { "sv", "sv" },
-      { "sv, be, dk, de, fr, no, pt, ch, fi, en", "sv" },
-      { "sv,en", "sv" },
-      { "sv-FI", "sv" },
-      { "sv-SE", "sv" },
-      { "sv-en", "sv" },
-      { "sv-fi", "sv" },
-      { "sv-se", "sv" },
-      { "sv; Content-Language: sv", "sv" },
-      { "sv_SE", "sv" },
-      { "sve", "sv" },
-      { "svenska, swedish, engelska, english", "sv" },
-      { "sw", "sw" },
-      { "swe", "sv" },
-      { "swe.SPR.", "sv" },
-      { "sweden", "sv" },
-      { "swedish", "sv" },
-      { "swedish,", "sv" },
-      { "text/html; charset=sv-SE", "sv" },
-      { "text/html; sv", "sv" },
-      { "torp, stuga, uthyres, bed & breakfast", null }
-    };
-    
-    for (int i=0; i<44; i++) {
-      assertEquals(tests[i][1], HTMLLanguageParser.LanguageParser.parseLanguage(tests[i][0]));
+    String tests[][] = { { "(SCHEME=ISO.639-1) sv", "sv" },
+        { "(SCHEME=RFC1766) sv-FI", "sv" }, { "(SCHEME=Z39.53) SWE", "sv" },
+        { "EN_US, SV, EN, EN_UK", "en" }, { "English Swedish", "en" },
+        { "English, swedish", "en" }, { "English,Swedish", "en" },
+        { "Other (Svenska)", "sv" }, { "SE", "se" }, { "SV", "sv" },
+        { "SV charset=iso-8859-1", "sv" }, { "SV-FI", "sv" },
+        { "SV; charset=iso-8859-1", "sv" }, { "SVE", "sv" }, { "SW", "sw" },
+        { "SWE", "sv" }, { "SWEDISH", "sv" }, { "Sv", "sv" }, { "Sve", "sv" },
+        { "Svenska", "sv" }, { "Swedish", "sv" }, { "Swedish, svenska", "sv" },
+        { "en, sv", "en" }, { "sv", "sv" },
+        { "sv, be, dk, de, fr, no, pt, ch, fi, en", "sv" }, { "sv,en", "sv" },
+        { "sv-FI", "sv" }, { "sv-SE", "sv" }, { "sv-en", "sv" },
+        { "sv-fi", "sv" }, { "sv-se", "sv" },
+        { "sv; Content-Language: sv", "sv" }, { "sv_SE", "sv" },
+        { "sve", "sv" }, { "svenska, swedish, engelska, english", "sv" },
+        { "sw", "sw" }, { "swe", "sv" }, { "swe.SPR.", "sv" },
+        { "sweden", "sv" }, { "swedish", "sv" }, { "swedish,", "sv" },
+        { "text/html; charset=sv-SE", "sv" }, { "text/html; sv", "sv" },
+        { "torp, stuga, uthyres, bed & breakfast", null } };
+
+    for (int i = 0; i < 44; i++) {
+      assertEquals(tests[i][1],
+          HTMLLanguageParser.LanguageParser.parseLanguage(tests[i][0]));
     }
   }
-  
-  
+
   private Content getContent(String text) {
     Metadata meta = new Metadata();
     meta.add("Content-Type", "text/html");
-    return new Content(URL, BASE, text.getBytes(), "text/html", meta, NutchConfiguration.create());
+    return new Content(URL, BASE, text.getBytes(), "text/html", meta,
+        NutchConfiguration.create());
   }
-  
 
   public void testLanguageIndentifier() {
     try {
Index: src/plugin/language-identifier/src/java/org/apache/nutch/analysis/lang/LanguageIndexingFilter.java
===================================================================
--- src/plugin/language-identifier/src/java/org/apache/nutch/analysis/lang/LanguageIndexingFilter.java	(revision 1188252)
+++ src/plugin/language-identifier/src/java/org/apache/nutch/analysis/lang/LanguageIndexingFilter.java	(working copy)
@@ -16,7 +16,6 @@
  */
 package org.apache.nutch.analysis.lang;
 
-
 // Nutch imports
 import org.apache.nutch.crawl.CrawlDatum;
 import org.apache.nutch.crawl.Inlinks;
@@ -31,29 +30,27 @@
 // Hadoop imports
 import org.apache.hadoop.conf.Configuration;
 
-
 /**
- * An {@link org.apache.nutch.indexer.IndexingFilter} that 
- * add a <code>lang</code> (language) field to the document.
- *
+ * An {@link org.apache.nutch.indexer.IndexingFilter} that add a
+ * <code>lang</code> (language) field to the document.
+ * 
  * It tries to find the language of the document by:
  * <ul>
- *   <li>First, checking if {@link HTMLLanguageParser} add some language
- *       information</li>
- *   <li>Then, checking if a <code>Content-Language</code> HTTP header can be
- *       found</li>
- *   <li>Finaly by analyzing the document content</li>
+ * <li>First, checking if {@link HTMLLanguageParser} add some language
+ * information</li>
+ * <li>Then, checking if a <code>Content-Language</code> HTTP header can be
+ * found</li>
+ * <li>Finaly by analyzing the document content</li>
  * </ul>
- *   
+ * 
  * @author Sami Siren
  * @author Jerome Charron
  */
 public class LanguageIndexingFilter implements IndexingFilter {
-  
 
   private Configuration conf;
 
-/**
+  /**
    * Constructs a new Language Indexing Filter.
    */
   public LanguageIndexingFilter() {
@@ -61,15 +58,15 @@
   }
 
   // Inherited JavaDoc
-  public NutchDocument filter(NutchDocument doc, Parse parse, Text url, CrawlDatum datum, Inlinks inlinks)
-    throws IndexingException {
+  public NutchDocument filter(NutchDocument doc, Parse parse, Text url,
+      CrawlDatum datum, Inlinks inlinks) throws IndexingException {
 
     // check if LANGUAGE found, possibly put there by HTMLLanguageParser
     String lang = parse.getData().getParseMeta().get(Metadata.LANGUAGE);
 
     // check if HTTP-header tels us the language
     if (lang == null) {
-        lang = parse.getData().getContentMeta().get(Response.CONTENT_LANGUAGE);
+      lang = parse.getData().getContentMeta().get(Response.CONTENT_LANGUAGE);
     }
 
     if (lang == null || lang.length() == 0) {
Index: src/plugin/language-identifier/src/java/org/apache/nutch/analysis/lang/HTMLLanguageParser.java
===================================================================
--- src/plugin/language-identifier/src/java/org/apache/nutch/analysis/lang/HTMLLanguageParser.java	(revision 1188252)
+++ src/plugin/language-identifier/src/java/org/apache/nutch/analysis/lang/HTMLLanguageParser.java	(working copy)
@@ -41,289 +41,280 @@
 
 public class HTMLLanguageParser implements HtmlParseFilter {
 
-    public static final Logger LOG = LoggerFactory
-            .getLogger(HTMLLanguageParser.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(HTMLLanguageParser.class);
 
-    private int detect = -1, identify = -1;
+  private int detect = -1, identify = -1;
 
-    private int contentMaxlength = -1;
+  private int contentMaxlength = -1;
 
-    private boolean onlyCertain = false;
+  private boolean onlyCertain = false;
 
-    /* A static Map of ISO-639 language codes */
-    private static Map<String, String> LANGUAGES_MAP = new HashMap<String, String>();
-    static {
-        try {
-            Properties p = new Properties();
-            p.load(HTMLLanguageParser.class
-                    .getResourceAsStream("langmappings.properties"));
-            Enumeration<?> keys = p.keys();
-            while (keys.hasMoreElements()) {
-                String key = (String) keys.nextElement();
-                String[] values = p.getProperty(key).split(",", -1);
-                LANGUAGES_MAP.put(key, key);
-                for (int i = 0; i < values.length; i++) {
-                    LANGUAGES_MAP.put(values[i].trim().toLowerCase(), key);
-                }
-            }
-        } catch (Exception e) {
-            if (LOG.isErrorEnabled()) {
-                LOG.error(e.toString());
-            }
+  /* A static Map of ISO-639 language codes */
+  private static Map<String, String> LANGUAGES_MAP = new HashMap<String, String>();
+  static {
+    try {
+      Properties p = new Properties();
+      p.load(HTMLLanguageParser.class
+          .getResourceAsStream("langmappings.properties"));
+      Enumeration<?> keys = p.keys();
+      while (keys.hasMoreElements()) {
+        String key = (String) keys.nextElement();
+        String[] values = p.getProperty(key).split(",", -1);
+        LANGUAGES_MAP.put(key, key);
+        for (int i = 0; i < values.length; i++) {
+          LANGUAGES_MAP.put(values[i].trim().toLowerCase(), key);
         }
+      }
+    } catch (Exception e) {
+      if (LOG.isErrorEnabled()) {
+        LOG.error(e.toString());
+      }
     }
+  }
 
-    private Configuration conf;
+  private Configuration conf;
 
-    /**
-     * Scan the HTML document looking at possible indications of content
-     * language<br>
-     * <li>1. html lang attribute
-     * (http://www.w3.org/TR/REC-html40/struct/dirlang.html#h-8.1) <li>2. meta
-     * dc.language
-     * (http://dublincore.org/documents/2000/07/16/usageguide/qualified
-     * -html.shtml#language) <li>3. meta http-equiv (content-language)
-     * (http://www.w3.org/TR/REC-html40/struct/global.html#h-7.4.4.2) <br>
-     */
-    public ParseResult filter(Content content, ParseResult parseResult,
-            HTMLMetaTags metaTags, DocumentFragment doc) {
-        String lang = null;
+  /**
+   * Scan the HTML document looking at possible indications of content language<br>
+   * <li>1. html lang attribute
+   * (http://www.w3.org/TR/REC-html40/struct/dirlang.html#h-8.1) <li>2. meta
+   * dc.language
+   * (http://dublincore.org/documents/2000/07/16/usageguide/qualified
+   * -html.shtml#language) <li>3. meta http-equiv (content-language)
+   * (http://www.w3.org/TR/REC-html40/struct/global.html#h-7.4.4.2) <br>
+   */
+  public ParseResult filter(Content content, ParseResult parseResult,
+      HTMLMetaTags metaTags, DocumentFragment doc) {
+    String lang = null;
 
-        Parse parse = parseResult.get(content.getUrl());
+    Parse parse = parseResult.get(content.getUrl());
 
-        if (detect >= 0 && identify < 0) {
-            lang = detectLanguage(parse, doc);
-        } else if (detect < 0 && identify >= 0) {
-            lang = identifyLanguage(parse);
-        } else if (detect < identify) {
-            lang = detectLanguage(parse, doc);
-            if (lang == null) {
-                lang = identifyLanguage(parse);
-            }
-        } else if (identify < detect) {
-            lang = identifyLanguage(parse);
-            if (lang == null) {
-                lang = detectLanguage(parse, doc);
-            }
-        } else {
-            LOG.warn("No configuration for language extraction policy is provided");
-            return parseResult;
-        }
+    if (detect >= 0 && identify < 0) {
+      lang = detectLanguage(parse, doc);
+    } else if (detect < 0 && identify >= 0) {
+      lang = identifyLanguage(parse);
+    } else if (detect < identify) {
+      lang = detectLanguage(parse, doc);
+      if (lang == null) {
+        lang = identifyLanguage(parse);
+      }
+    } else if (identify < detect) {
+      lang = identifyLanguage(parse);
+      if (lang == null) {
+        lang = detectLanguage(parse, doc);
+      }
+    } else {
+      LOG.warn("No configuration for language extraction policy is provided");
+      return parseResult;
+    }
 
-        if (lang != null) {
-            parse.getData().getParseMeta().set(Metadata.LANGUAGE, lang);
-            return parseResult;
-        }
-
-        return parseResult;
+    if (lang != null) {
+      parse.getData().getParseMeta().set(Metadata.LANGUAGE, lang);
+      return parseResult;
     }
 
-    /** Try to find the document's language from page headers and metadata */
-    private String detectLanguage(Parse page, DocumentFragment doc) {
-        String lang = getLanguageFromMetadata(page.getData().getParseMeta());
-        if (lang == null) {
-            LanguageParser parser = new LanguageParser(doc);
-            lang = parser.getLanguage();
-        }
+    return parseResult;
+  }
 
-        if (lang != null) {
-            return lang;
-        }
+  /** Try to find the document's language from page headers and metadata */
+  private String detectLanguage(Parse page, DocumentFragment doc) {
+    String lang = getLanguageFromMetadata(page.getData().getParseMeta());
+    if (lang == null) {
+      LanguageParser parser = new LanguageParser(doc);
+      lang = parser.getLanguage();
+    }
 
-        lang = page.getData().getContentMeta().get(Response.CONTENT_LANGUAGE);
-
-        return lang;
+    if (lang != null) {
+      return lang;
     }
 
-    /** Use statistical language identification to extract page language */
-    private String identifyLanguage(Parse parse) {
-        StringBuilder text = new StringBuilder();
-        if (parse == null)
-            return null;
+    lang = page.getData().getContentMeta().get(Response.CONTENT_LANGUAGE);
 
-        String title = parse.getData().getTitle();
-        if (title != null) {
-            text.append(title.toString());
-        }
+    return lang;
+  }
 
-        String content = parse.getText();
-        if (content != null) {
-            text.append(" ").append(content.toString());
-        }
+  /** Use statistical language identification to extract page language */
+  private String identifyLanguage(Parse parse) {
+    StringBuilder text = new StringBuilder();
+    if (parse == null)
+      return null;
 
-        // trim content?
-        String titleandcontent = text.toString();
+    String title = parse.getData().getTitle();
+    if (title != null) {
+      text.append(title.toString());
+    }
 
-        if (this.contentMaxlength != -1
-                && titleandcontent.length() > this.contentMaxlength)
-            titleandcontent = titleandcontent.substring(0, contentMaxlength);
+    String content = parse.getText();
+    if (content != null) {
+      text.append(" ").append(content.toString());
+    }
 
-        LanguageIdentifier identifier = new LanguageIdentifier(titleandcontent);
+    // trim content?
+    String titleandcontent = text.toString();
 
-        if (onlyCertain) {
-            if (identifier.isReasonablyCertain())
-                return identifier.getLanguage();
-            else
-                return null;
-        }
+    if (this.contentMaxlength != -1
+        && titleandcontent.length() > this.contentMaxlength)
+      titleandcontent = titleandcontent.substring(0, contentMaxlength);
+
+    LanguageIdentifier identifier = new LanguageIdentifier(titleandcontent);
+
+    if (onlyCertain) {
+      if (identifier.isReasonablyCertain())
         return identifier.getLanguage();
+      else
+        return null;
     }
+    return identifier.getLanguage();
+  }
 
-    // Check in the metadata whether the language has already been stored there
-    // by Tika
-    private static String getLanguageFromMetadata(Metadata meta) {
-        if (meta == null)
-            return null;
-        // dublin core
-        String lang = meta.get("dc.language");
-        if (lang != null)
-            return lang;
-        // meta content-language
-        lang = meta.get("content-language");
-        if (lang != null)
-            return lang;
-        // lang attribute
-        return meta.get("lang");
-    }
+  // Check in the metadata whether the language has already been stored there
+  // by Tika
+  private static String getLanguageFromMetadata(Metadata meta) {
+    if (meta == null)
+      return null;
+    // dublin core
+    String lang = meta.get("dc.language");
+    if (lang != null)
+      return lang;
+    // meta content-language
+    lang = meta.get("content-language");
+    if (lang != null)
+      return lang;
+    // lang attribute
+    return meta.get("lang");
+  }
 
-    static class LanguageParser {
+  static class LanguageParser {
 
-        private String dublinCore = null;
-        private String htmlAttribute = null;
-        private String httpEquiv = null;
-        private String language = null;
+    private String dublinCore = null;
+    private String htmlAttribute = null;
+    private String httpEquiv = null;
+    private String language = null;
 
-        LanguageParser(Node node) {
-            parse(node);
-            if (htmlAttribute != null) {
-                language = htmlAttribute;
-            } else if (dublinCore != null) {
-                language = dublinCore;
-            } else {
-                language = httpEquiv;
-            }
-        }
+    LanguageParser(Node node) {
+      parse(node);
+      if (htmlAttribute != null) {
+        language = htmlAttribute;
+      } else if (dublinCore != null) {
+        language = dublinCore;
+      } else {
+        language = httpEquiv;
+      }
+    }
 
-        String getLanguage() {
-            return language;
-        }
+    String getLanguage() {
+      return language;
+    }
 
-        void parse(Node node) {
+    void parse(Node node) {
 
-            NodeWalker walker = new NodeWalker(node);
-            while (walker.hasNext()) {
+      NodeWalker walker = new NodeWalker(node);
+      while (walker.hasNext()) {
 
-                Node currentNode = walker.nextNode();
-                String nodeName = currentNode.getNodeName();
-                short nodeType = currentNode.getNodeType();
+        Node currentNode = walker.nextNode();
+        String nodeName = currentNode.getNodeName();
+        short nodeType = currentNode.getNodeType();
 
-                if (nodeType == Node.ELEMENT_NODE) {
+        if (nodeType == Node.ELEMENT_NODE) {
 
-                    // Check for the lang HTML attribute
-                    if (htmlAttribute == null) {
-                        htmlAttribute = parseLanguage(((Element) currentNode)
-                                .getAttribute("lang"));
-                    }
+          // Check for the lang HTML attribute
+          if (htmlAttribute == null) {
+            htmlAttribute = parseLanguage(((Element) currentNode)
+                .getAttribute("lang"));
+          }
 
-                    // Check for Meta
-                    if ("meta".equalsIgnoreCase(nodeName)) {
-                        NamedNodeMap attrs = currentNode.getAttributes();
+          // Check for Meta
+          if ("meta".equalsIgnoreCase(nodeName)) {
+            NamedNodeMap attrs = currentNode.getAttributes();
 
-                        // Check for the dc.language Meta
-                        if (dublinCore == null) {
-                            for (int i = 0; i < attrs.getLength(); i++) {
-                                Node attrnode = attrs.item(i);
-                                if ("name".equalsIgnoreCase(attrnode
-                                        .getNodeName())) {
-                                    if ("dc.language".equalsIgnoreCase(attrnode
-                                            .getNodeValue())) {
-                                        Node valueattr = attrs
-                                                .getNamedItem("content");
-                                        if (valueattr != null) {
-                                            dublinCore = parseLanguage(valueattr
-                                                    .getNodeValue());
-                                        }
-                                    }
-                                }
-                            }
-                        }
-
-                        // Check for the http-equiv content-language
-                        if (httpEquiv == null) {
-                            for (int i = 0; i < attrs.getLength(); i++) {
-                                Node attrnode = attrs.item(i);
-                                if ("http-equiv".equalsIgnoreCase(attrnode
-                                        .getNodeName())) {
-                                    if ("content-language".equals(attrnode
-                                            .getNodeValue().toLowerCase())) {
-                                        Node valueattr = attrs
-                                                .getNamedItem("content");
-                                        if (valueattr != null) {
-                                            httpEquiv = parseLanguage(valueattr
-                                                    .getNodeValue());
-                                        }
-                                    }
-                                }
-                            }
-                        }
+            // Check for the dc.language Meta
+            if (dublinCore == null) {
+              for (int i = 0; i < attrs.getLength(); i++) {
+                Node attrnode = attrs.item(i);
+                if ("name".equalsIgnoreCase(attrnode.getNodeName())) {
+                  if ("dc.language".equalsIgnoreCase(attrnode.getNodeValue())) {
+                    Node valueattr = attrs.getNamedItem("content");
+                    if (valueattr != null) {
+                      dublinCore = parseLanguage(valueattr.getNodeValue());
                     }
+                  }
                 }
+              }
+            }
 
-                if ((dublinCore != null) && (htmlAttribute != null)
-                        && (httpEquiv != null)) {
-                    return;
+            // Check for the http-equiv content-language
+            if (httpEquiv == null) {
+              for (int i = 0; i < attrs.getLength(); i++) {
+                Node attrnode = attrs.item(i);
+                if ("http-equiv".equalsIgnoreCase(attrnode.getNodeName())) {
+                  if ("content-language".equals(attrnode.getNodeValue()
+                      .toLowerCase())) {
+                    Node valueattr = attrs.getNamedItem("content");
+                    if (valueattr != null) {
+                      httpEquiv = parseLanguage(valueattr.getNodeValue());
+                    }
+                  }
                 }
+              }
             }
+          }
         }
 
-        /**
-         * Parse a language string and return an ISO 639 primary code, or
-         * <code>null</code> if something wrong occurs, or if no language is
-         * found.
-         */
-        final static String parseLanguage(String lang) {
+        if ((dublinCore != null) && (htmlAttribute != null)
+            && (httpEquiv != null)) {
+          return;
+        }
+      }
+    }
 
-            if (lang == null) {
-                return null;
-            }
+    /**
+     * Parse a language string and return an ISO 639 primary code, or
+     * <code>null</code> if something wrong occurs, or if no language is found.
+     */
+    final static String parseLanguage(String lang) {
 
-            String code = null;
-            String language = null;
+      if (lang == null) {
+        return null;
+      }
 
-            // First, split multi-valued values
-            String langs[] = lang.split(",| |;|\\.|\\(|\\)|=", -1);
+      String code = null;
+      String language = null;
 
-            int i = 0;
-            while ((language == null) && (i < langs.length)) {
-                // Then, get the primary code
-                code = langs[i].split("-")[0];
-                code = code.split("_")[0];
-                // Find the ISO 639 code
-                language = (String) LANGUAGES_MAP.get(code.toLowerCase());
-                i++;
-            }
+      // First, split multi-valued values
+      String langs[] = lang.split(",| |;|\\.|\\(|\\)|=", -1);
 
-            return language;
-        }
+      int i = 0;
+      while ((language == null) && (i < langs.length)) {
+        // Then, get the primary code
+        code = langs[i].split("-")[0];
+        code = code.split("_")[0];
+        // Find the ISO 639 code
+        language = (String) LANGUAGES_MAP.get(code.toLowerCase());
+        i++;
+      }
 
+      return language;
     }
 
-    public void setConf(Configuration conf) {
-        this.conf = conf;
-        contentMaxlength = conf.getInt("lang.analyze.max.length", -1);
-        onlyCertain = conf.getBoolean("lang.identification.only.certain", false);
-        String[] policy = conf.getStrings("lang.extraction.policy");
-        for (int i = 0; i < policy.length; i++) {
-            if (policy[i].equals("detect")) {
-                detect = i;
-            } else if (policy[i].equals("identify")) {
-                identify = i;
-            }
-        }
-    }
+  }
 
-    public Configuration getConf() {
-        return this.conf;
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+    contentMaxlength = conf.getInt("lang.analyze.max.length", -1);
+    onlyCertain = conf.getBoolean("lang.identification.only.certain", false);
+    String[] policy = conf.getStrings("lang.extraction.policy");
+    for (int i = 0; i < policy.length; i++) {
+      if (policy[i].equals("detect")) {
+        detect = i;
+      } else if (policy[i].equals("identify")) {
+        identify = i;
+      }
     }
+  }
 
+  public Configuration getConf() {
+    return this.conf;
+  }
+
 }
\ No newline at end of file
Index: src/plugin/subcollection/src/test/org/apache/nutch/collection/TestSubcollection.java
===================================================================
--- src/plugin/subcollection/src/test/org/apache/nutch/collection/TestSubcollection.java	(revision 1188252)
+++ src/plugin/subcollection/src/test/org/apache/nutch/collection/TestSubcollection.java	(working copy)
@@ -25,29 +25,31 @@
 import junit.framework.TestCase;
 
 public class TestSubcollection extends TestCase {
-  
-  /**Test filtering logic
+
+  /**
+   * Test filtering logic
    * 
    * @throws Exception
    */
   public void testFilter() throws Exception {
-    Subcollection sc=new Subcollection(NutchConfiguration.create());
+    Subcollection sc = new Subcollection(NutchConfiguration.create());
     sc.setWhiteList("www.nutch.org\nwww.apache.org");
     sc.setBlackList("jpg\nwww.apache.org/zecret/");
-    
-    //matches whitelist
-    assertEquals("http://www.apache.org/index.html", sc.filter("http://www.apache.org/index.html"));
-    
-    //matches blacklist
+
+    // matches whitelist
+    assertEquals("http://www.apache.org/index.html",
+        sc.filter("http://www.apache.org/index.html"));
+
+    // matches blacklist
     assertEquals(null, sc.filter("http://www.apache.org/zecret/index.html"));
     assertEquals(null, sc.filter("http://www.apache.org/img/image.jpg"));
-    
-    //no match
+
+    // no match
     assertEquals(null, sc.filter("http://www.google.com/"));
   }
-  
-  public void testInput(){
-    StringBuffer xml=new StringBuffer();
+
+  public void testInput() {
+    StringBuffer xml = new StringBuffer();
     xml.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
     xml.append("<!-- just a comment -->");
     xml.append("<subcollections>");
@@ -63,44 +65,45 @@
     xml.append("</blacklist>");
     xml.append("</subcollection>");
     xml.append("</subcollections>");
-    
-    InputStream is=new ByteArrayInputStream(xml.toString().getBytes());
-    
-    CollectionManager cm=new CollectionManager();
+
+    InputStream is = new ByteArrayInputStream(xml.toString().getBytes());
+
+    CollectionManager cm = new CollectionManager();
     cm.parse(is);
-    
-    Collection c=cm.getAll();
-    
+
+    Collection c = cm.getAll();
+
     // test that size matches
-    assertEquals(1,c.size());
-    
-    Subcollection collection=(Subcollection)c.toArray()[0];
-    
-    //test collection id
+    assertEquals(1, c.size());
+
+    Subcollection collection = (Subcollection) c.toArray()[0];
+
+    // test collection id
     assertEquals("nutch", collection.getId());
-    
-    //test collection name
+
+    // test collection name
     assertEquals("nutch collection", collection.getName());
 
-    //test whitelist
-    assertEquals(2,collection.whiteList.size());
-    
-    String wlUrl=(String)collection.whiteList.get(0);
+    // test whitelist
+    assertEquals(2, collection.whiteList.size());
+
+    String wlUrl = (String) collection.whiteList.get(0);
     assertEquals("http://lucene.apache.org/nutch/", wlUrl);
 
-    wlUrl=(String)collection.whiteList.get(1);
+    wlUrl = (String) collection.whiteList.get(1);
     assertEquals("http://wiki.apache.org/nutch/", wlUrl);
-    
-    //matches whitelist
-    assertEquals("http://lucene.apache.org/nutch/", collection.filter("http://lucene.apache.org/nutch/"));
 
-    //test blacklist
-    assertEquals(1,collection.blackList.size());
+    // matches whitelist
+    assertEquals("http://lucene.apache.org/nutch/",
+        collection.filter("http://lucene.apache.org/nutch/"));
 
-    String blUrl=(String)collection.blackList.get(0);
+    // test blacklist
+    assertEquals(1, collection.blackList.size());
+
+    String blUrl = (String) collection.blackList.get(0);
     assertEquals("http://www.xxx.yyy", blUrl);
 
-    //no match
+    // no match
     assertEquals(null, collection.filter("http://www.google.com/"));
   }
 }
Index: src/plugin/subcollection/src/java/org/apache/nutch/collection/Subcollection.java
===================================================================
--- src/plugin/subcollection/src/java/org/apache/nutch/collection/Subcollection.java	(revision 1188252)
+++ src/plugin/subcollection/src/java/org/apache/nutch/collection/Subcollection.java	(working copy)
@@ -31,30 +31,30 @@
  * SubCollection represents a subset of index, you can define url patterns that
  * will indicate that particular page (url) is part of SubCollection.
  */
-public class Subcollection extends Configured implements URLFilter{
-  
-  public static final String TAG_COLLECTIONS="subcollections";
-  public static final String TAG_COLLECTION="subcollection";
-  public static final String TAG_WHITELIST="whitelist";
-  public static final String TAG_BLACKLIST="blacklist";
-  public static final String TAG_NAME="name";
-  public static final String TAG_ID="id";
+public class Subcollection extends Configured implements URLFilter {
 
+  public static final String TAG_COLLECTIONS = "subcollections";
+  public static final String TAG_COLLECTION = "subcollection";
+  public static final String TAG_WHITELIST = "whitelist";
+  public static final String TAG_BLACKLIST = "blacklist";
+  public static final String TAG_NAME = "name";
+  public static final String TAG_ID = "id";
+
   ArrayList blackList = new ArrayList();
 
   ArrayList whiteList = new ArrayList();
 
-  /** 
+  /**
    * SubCollection identifier
    */
   String id;
 
-  /** 
+  /**
    * SubCollection name
    */
   String name;
 
-  /** 
+  /**
    * SubCollection whitelist as String
    */
   String wlString;
@@ -64,21 +64,24 @@
    */
   String blString;
 
-  /** public Constructor
+  /**
+   * public Constructor
    * 
-   * @param id id of SubCollection
-   * @param name name of SubCollection
+   * @param id
+   *          id of SubCollection
+   * @param name
+   *          name of SubCollection
    */
   public Subcollection(String id, String name, Configuration conf) {
     this(conf);
-    this.id=id;
+    this.id = id;
     this.name = name;
   }
 
-  public Subcollection(Configuration conf){
+  public Subcollection(Configuration conf) {
     super(conf);
   }
-  
+
   /**
    * @return Returns the name
    */
@@ -203,7 +206,8 @@
   /**
    * Set contents of blacklist from String
    * 
-   * @param list the blacklist contents
+   * @param list
+   *          the blacklist contents
    */
   public void setBlackList(String list) {
     this.blString = list;
@@ -213,7 +217,8 @@
   /**
    * Set contents of whitelist from String
    * 
-   * @param list the whitelist contents
+   * @param list
+   *          the whitelist contents
    */
   public void setWhiteList(String list) {
     this.wlString = list;
Index: src/plugin/subcollection/src/java/org/apache/nutch/collection/CollectionManager.java
===================================================================
--- src/plugin/subcollection/src/java/org/apache/nutch/collection/CollectionManager.java	(revision 1188252)
+++ src/plugin/subcollection/src/java/org/apache/nutch/collection/CollectionManager.java	(working copy)
@@ -53,22 +53,24 @@
   transient Map collectionMap = new HashMap();
 
   transient URL configfile;
-  
+
   public CollectionManager(Configuration conf) {
     super(conf);
     init();
   }
-  
-  /** 
+
+  /**
    * Used for testing
    */
-  protected CollectionManager(){
+  protected CollectionManager() {
     super(NutchConfiguration.create());
   }
 
-  protected void init(){
+  protected void init() {
     try {
-      if (LOG.isInfoEnabled()) { LOG.info("initializing CollectionManager"); }
+      if (LOG.isInfoEnabled()) {
+        LOG.info("initializing CollectionManager");
+      }
       // initialize known subcollections
       configfile = getConf().getResource(
           getConf().get("subcollections.config", DEFAULT_FILE_NAME));
@@ -94,7 +96,7 @@
       if (LOG.isInfoEnabled()) {
         LOG.info("file has " + nodeList.getLength() + " elements");
       }
-      
+
       for (int i = 0; i < nodeList.getLength(); i++) {
         Element scElem = (Element) nodeList.item(i);
         Subcollection subCol = new Subcollection(getConf());
@@ -105,18 +107,18 @@
       LOG.info("Cannot find collections");
     }
   }
-  
+
   public static CollectionManager getCollectionManager(Configuration conf) {
     String key = "collectionmanager";
     ObjectCache objectCache = ObjectCache.get(conf);
-    CollectionManager impl = (CollectionManager)objectCache.getObject(key);
+    CollectionManager impl = (CollectionManager) objectCache.getObject(key);
     if (impl == null) {
       try {
         if (LOG.isInfoEnabled()) {
           LOG.info("Instantiating CollectionManager");
         }
-        impl=new CollectionManager(conf);
-        objectCache.setObject(key,impl);
+        impl = new CollectionManager(conf);
+        objectCache.setObject(key, impl);
       } catch (Exception e) {
         throw new RuntimeException("Couldn't create CollectionManager", e);
       }
@@ -182,10 +184,10 @@
         collections.add(subCol.name);
       }
     }
-    if (LOG.isTraceEnabled()) { 
-      LOG.trace("subcollections:" + Arrays.toString(collections.toArray())); 
+    if (LOG.isTraceEnabled()) {
+      LOG.trace("subcollections:" + Arrays.toString(collections.toArray()));
     }
-    
+
     return collections;
   }
 
@@ -205,8 +207,8 @@
    */
   public void save() throws IOException {
     try {
-      final FileOutputStream fos = new FileOutputStream(new File(configfile
-          .getFile()));
+      final FileOutputStream fos = new FileOutputStream(new File(
+          configfile.getFile()));
       final Document doc = new DocumentImpl();
       final Element collections = doc
           .createElement(Subcollection.TAG_COLLECTIONS);
Index: src/plugin/subcollection/src/java/org/apache/nutch/indexer/subcollection/SubcollectionIndexingFilter.java
===================================================================
--- src/plugin/subcollection/src/java/org/apache/nutch/indexer/subcollection/SubcollectionIndexingFilter.java	(revision 1188252)
+++ src/plugin/subcollection/src/java/org/apache/nutch/indexer/subcollection/SubcollectionIndexingFilter.java	(working copy)
@@ -34,13 +34,13 @@
 import org.apache.nutch.crawl.CrawlDatum;
 import org.apache.nutch.crawl.Inlinks;
 
+public class SubcollectionIndexingFilter extends Configured implements
+    IndexingFilter {
 
-public class SubcollectionIndexingFilter extends Configured implements IndexingFilter {
-
-  public SubcollectionIndexingFilter(){
+  public SubcollectionIndexingFilter() {
     super(NutchConfiguration.create());
   }
-  
+
   public SubcollectionIndexingFilter(Configuration conf) {
     super(conf);
   }
@@ -53,7 +53,8 @@
   /**
    * Logger
    */
-  public static final Logger LOG = LoggerFactory.getLogger(SubcollectionIndexingFilter.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(SubcollectionIndexingFilter.class);
 
   /**
    * "Mark" document to be a part of subcollection
@@ -62,12 +63,14 @@
    * @param url
    */
   private void addSubCollectionField(NutchDocument doc, String url) {
-    for (String collname: CollectionManager.getCollectionManager(getConf()).getSubCollections(url)) {
+    for (String collname : CollectionManager.getCollectionManager(getConf())
+        .getSubCollections(url)) {
       doc.add(FIELD_NAME, collname);
     }
   }
 
-  public NutchDocument filter(NutchDocument doc, Parse parse, Text url, CrawlDatum datum, Inlinks inlinks) throws IndexingException {
+  public NutchDocument filter(NutchDocument doc, Parse parse, Text url,
+      CrawlDatum datum, Inlinks inlinks) throws IndexingException {
     String sUrl = url.toString();
     addSubCollectionField(doc, sUrl);
     return doc;
Index: src/plugin/index-more/src/test/org/apache/nutch/indexer/more/TestMoreIndexingFilter.java
===================================================================
--- src/plugin/index-more/src/test/org/apache/nutch/indexer/more/TestMoreIndexingFilter.java	(revision 1188252)
+++ src/plugin/index-more/src/test/org/apache/nutch/indexer/more/TestMoreIndexingFilter.java	(working copy)
@@ -39,7 +39,7 @@
     assertContentType(conf, "text/html", "text/html");
     assertContentType(conf, "text/html; charset=UTF-8", "text/html");
   }
-  
+
   public void testGetParts() {
     String[] parts = MoreIndexingFilter.getParts("text/html");
     assertParts(parts, 2, "text", "html");
@@ -48,7 +48,7 @@
   /**
    * @since NUTCH-901
    */
-  public void testNoParts(){
+  public void testNoParts() {
     Configuration conf = NutchConfiguration.create();
     conf.setBoolean("moreIndexingFilter.indexMimeTypeParts", false);
     MoreIndexingFilter filter = new MoreIndexingFilter();
@@ -56,18 +56,18 @@
     assertNotNull(filter);
     NutchDocument doc = new NutchDocument();
     ParseImpl parse = new ParseImpl("foo bar", new ParseData());
-    
-    try{
-        filter.filter(doc, parse, new Text("http://nutch.apache.org/index.html"), new CrawlDatum(), new Inlinks());
+
+    try {
+      filter.filter(doc, parse, new Text("http://nutch.apache.org/index.html"),
+          new CrawlDatum(), new Inlinks());
+    } catch (Exception e) {
+      e.printStackTrace();
+      fail(e.getMessage());
     }
-    catch(Exception e){
-        e.printStackTrace();
-        fail(e.getMessage());
-    }
     assertNotNull(doc);
     assertTrue(doc.getFieldNames().contains("type"));
     assertEquals(1, doc.getField("type").getValues().size());
-    assertEquals("text/html", doc.getFieldValue("type"));    
+    assertEquals("text/html", doc.getFieldValue("type"));
   }
 
   private void assertParts(String[] parts, int count, String... expected) {
@@ -76,15 +76,17 @@
       assertEquals(expected[i], parts[i]);
     }
   }
-  
-  private void assertContentType(Configuration conf, String source, String expected) throws IndexingException {
+
+  private void assertContentType(Configuration conf, String source,
+      String expected) throws IndexingException {
     Metadata metadata = new Metadata();
     metadata.add(Response.CONTENT_TYPE, source);
     MoreIndexingFilter filter = new MoreIndexingFilter();
     filter.setConf(conf);
-    NutchDocument doc = filter.filter(new NutchDocument(), new ParseImpl("text", new ParseData(
-        new ParseStatus(), "title", new Outlink[0], metadata)), new Text(
-        "http://www.example.com/"), new CrawlDatum(), new Inlinks());
+    NutchDocument doc = filter.filter(new NutchDocument(), new ParseImpl(
+        "text", new ParseData(new ParseStatus(), "title", new Outlink[0],
+            metadata)), new Text("http://www.example.com/"), new CrawlDatum(),
+        new Inlinks());
     assertEquals("mime type not detected", expected, doc.getFieldValue("type"));
   }
 }
Index: src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java
===================================================================
--- src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java	(revision 1188252)
+++ src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java	(working copy)
@@ -16,7 +16,6 @@
  */
 package org.apache.nutch.indexer.more;
 
-
 import org.apache.oro.text.regex.Perl5Compiler;
 import org.apache.oro.text.regex.Perl5Matcher;
 import org.apache.oro.text.regex.Perl5Pattern;
@@ -54,32 +53,31 @@
 
 import org.apache.commons.lang.time.DateUtils;
 
-
 /**
- * Add (or reset) a few metaData properties as respective fields
- * (if they are available), so that they can be displayed by more.jsp
- * (called by search.jsp).
- *
- * content-type is indexed to support query by type:
- * last-modifed is indexed to support query by date:
- *
+ * Add (or reset) a few metaData properties as respective fields (if they are
+ * available), so that they can be displayed by more.jsp (called by search.jsp).
+ * 
+ * content-type is indexed to support query by type: last-modifed is indexed to
+ * support query by date:
+ * 
  * Still need to make content-length searchable!
- *
+ * 
  * @author John Xing
  */
 
 public class MoreIndexingFilter implements IndexingFilter {
-  public static final Logger LOG = LoggerFactory.getLogger(MoreIndexingFilter.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(MoreIndexingFilter.class);
 
   /** A flag that tells if magic resolution must be performed */
   private boolean MAGIC;
 
   /** Get the MimeTypes resolver instance. */
-  private MimeUtil MIME; 
-  
-  public NutchDocument filter(NutchDocument doc, Parse parse, Text url, CrawlDatum datum, Inlinks inlinks)
-    throws IndexingException {
+  private MimeUtil MIME;
 
+  public NutchDocument filter(NutchDocument doc, Parse parse, Text url,
+      CrawlDatum datum, Inlinks inlinks) throws IndexingException {
+
     String url_s = url.toString();
 
     addTime(doc, parse.getData(), url_s, datum);
@@ -89,22 +87,22 @@
 
     return doc;
   }
-    
-  // Add time related meta info.  Add last-modified if present.  Index date as
+
+  // Add time related meta info. Add last-modified if present. Index date as
   // last-modified, or, if that's not present, use fetch time.
-  private NutchDocument addTime(NutchDocument doc, ParseData data,
-                           String url, CrawlDatum datum) {
+  private NutchDocument addTime(NutchDocument doc, ParseData data, String url,
+      CrawlDatum datum) {
     long time = -1;
 
     String lastModified = data.getMeta(Metadata.LAST_MODIFIED);
-    if (lastModified != null) {                   // try parse last-modified
-      time = getTime(lastModified,url);           // use as time
-                                                  // store as string
+    if (lastModified != null) { // try parse last-modified
+      time = getTime(lastModified, url); // use as time
+      // store as string
       doc.add("lastModified", new Date(time));
     }
 
-    if (time == -1) {                             // if no last-modified
-      time = datum.getFetchTime();                // use fetch time
+    if (time == -1) { // if no last-modified
+      time = datum.getFetchTime(); // use fetch time
     }
 
     // un-stored, indexed and un-tokenized
@@ -118,43 +116,29 @@
     try {
       time = HttpDateFormat.toLong(date);
     } catch (ParseException e) {
-	// try to parse it as date in alternative format
-	try {
-	    Date parsedDate = DateUtils.parseDate(date,
-		  new String [] {
-		      "EEE MMM dd HH:mm:ss yyyy",
-		      "EEE MMM dd HH:mm:ss yyyy zzz",
-		      "EEE MMM dd HH:mm:ss zzz yyyy",
-		      "EEE, MMM dd HH:mm:ss yyyy zzz",
-		      "EEE, dd MMM yyyy HH:mm:ss zzz",
-		      "EEE,dd MMM yyyy HH:mm:ss zzz",
-		      "EEE, dd MMM yyyy HH:mm:sszzz",
-		      "EEE, dd MMM yyyy HH:mm:ss",
-		      "EEE, dd-MMM-yy HH:mm:ss zzz",
-		      "yyyy/MM/dd HH:mm:ss.SSS zzz",
-		      "yyyy/MM/dd HH:mm:ss.SSS",
-		      "yyyy/MM/dd HH:mm:ss zzz",
-		      "yyyy/MM/dd",
-		      "yyyy.MM.dd HH:mm:ss",
-		      "yyyy-MM-dd HH:mm",
-		      "MMM dd yyyy HH:mm:ss. zzz",
-		      "MMM dd yyyy HH:mm:ss zzz",
-		      "dd.MM.yyyy HH:mm:ss zzz",
-		      "dd MM yyyy HH:mm:ss zzz",
-		      "dd.MM.yyyy; HH:mm:ss",
-		      "dd.MM.yyyy HH:mm:ss",
-		      "dd.MM.yyyy zzz",
-		      "yyyy-MM-dd'T'HH:mm:ss'Z'"
-		  });
-	    time = parsedDate.getTime();
-            // if (LOG.isWarnEnabled()) {
-	    //   LOG.warn(url + ": parsed date: " + date +" to:"+time);
-            // }
-	} catch (Exception e2) {
-            if (LOG.isWarnEnabled()) {
-	      LOG.warn(url + ": can't parse erroneous date: " + date);
-            }
-	}
+      // try to parse it as date in alternative format
+      try {
+        Date parsedDate = DateUtils.parseDate(date, new String[] {
+            "EEE MMM dd HH:mm:ss yyyy", "EEE MMM dd HH:mm:ss yyyy zzz",
+            "EEE MMM dd HH:mm:ss zzz yyyy", "EEE, MMM dd HH:mm:ss yyyy zzz",
+            "EEE, dd MMM yyyy HH:mm:ss zzz", "EEE,dd MMM yyyy HH:mm:ss zzz",
+            "EEE, dd MMM yyyy HH:mm:sszzz", "EEE, dd MMM yyyy HH:mm:ss",
+            "EEE, dd-MMM-yy HH:mm:ss zzz", "yyyy/MM/dd HH:mm:ss.SSS zzz",
+            "yyyy/MM/dd HH:mm:ss.SSS", "yyyy/MM/dd HH:mm:ss zzz", "yyyy/MM/dd",
+            "yyyy.MM.dd HH:mm:ss", "yyyy-MM-dd HH:mm",
+            "MMM dd yyyy HH:mm:ss. zzz", "MMM dd yyyy HH:mm:ss zzz",
+            "dd.MM.yyyy HH:mm:ss zzz", "dd MM yyyy HH:mm:ss zzz",
+            "dd.MM.yyyy; HH:mm:ss", "dd.MM.yyyy HH:mm:ss", "dd.MM.yyyy zzz",
+            "yyyy-MM-dd'T'HH:mm:ss'Z'" });
+        time = parsedDate.getTime();
+        // if (LOG.isWarnEnabled()) {
+        // LOG.warn(url + ": parsed date: " + date +" to:"+time);
+        // }
+      } catch (Exception e2) {
+        if (LOG.isWarnEnabled()) {
+          LOG.warn(url + ": can't parse erroneous date: " + date);
+        }
+      }
     }
     return time;
   }
@@ -201,42 +185,42 @@
       // (using MimeTypes.getMimeType(byte[], String), but I don't know
       // which field it is?
       // if (MAGIC) {
-      //   contentType = MIME.getMimeType(url, content);
+      // contentType = MIME.getMimeType(url, content);
       // } else {
-      //   contentType = MIME.getMimeType(url);
+      // contentType = MIME.getMimeType(url);
       // }
       mimeType = MIME.getMimeType(url);
     } else {
       mimeType = MIME.forName(MimeUtil.cleanMimeType(contentType));
     }
-        
+
     // Checks if we solved the content-type.
     if (mimeType == null) {
       return doc;
     }
 
     contentType = mimeType.getName();
-    
+
     doc.add("type", contentType);
 
     // Check if we need to split the content type in sub parts
     if (conf.getBoolean("moreIndexingFilter.indexMimeTypeParts", true)) {
       String[] parts = getParts(contentType.toString());
 
-      for(String part: parts) {
+      for (String part : parts) {
         doc.add("type", part);
       }
     }
-    
+
     // leave this for future improvement
-    //MimeTypeParameterList parameterList = mimeType.getParameters()
+    // MimeTypeParameterList parameterList = mimeType.getParameters()
 
     return doc;
   }
 
-  
   /**
    * Utility method for splitting mime type into type and subtype.
+   * 
    * @param mimeType
    * @return
    */
@@ -254,15 +238,14 @@
   private PatternMatcher matcher = new Perl5Matcher();
 
   private Configuration conf;
-  static Perl5Pattern patterns[] = {null, null};
+  static Perl5Pattern patterns[] = { null, null };
   static {
     Perl5Compiler compiler = new Perl5Compiler();
     try {
       // order here is important
-      patterns[0] =
-        (Perl5Pattern) compiler.compile("\\bfilename=['\"](.+)['\"]");
-      patterns[1] =
-        (Perl5Pattern) compiler.compile("\\bfilename=(\\S+)\\b");
+      patterns[0] = (Perl5Pattern) compiler
+          .compile("\\bfilename=['\"](.+)['\"]");
+      patterns[1] = (Perl5Pattern) compiler.compile("\\bfilename=(\\S+)\\b");
     } catch (MalformedPatternException e) {
       // just ignore
     }
@@ -274,8 +257,8 @@
       return doc;
 
     MatchResult result;
-    for (int i=0; i<patterns.length; i++) {
-      if (matcher.contains(contentDisposition,patterns[i])) {
+    for (int i = 0; i < patterns.length; i++) {
+      if (matcher.contains(contentDisposition, patterns[i])) {
         result = matcher.getMatch();
         doc.add("title", result.group(1));
         break;
Index: src/plugin/parse-ext/src/test/org/apache/nutch/parse/ext/TestExtParser.java
===================================================================
--- src/plugin/parse-ext/src/test/org/apache/nutch/parse/ext/TestExtParser.java	(revision 1188252)
+++ src/plugin/parse-ext/src/test/org/apache/nutch/parse/ext/TestExtParser.java	(working copy)
@@ -38,15 +38,14 @@
 import java.io.FileOutputStream;
 import java.io.IOException;
 
-/** 
- * Unit tests for ExtParser.
- * First creates a temp file with fixed content, then fetch
- * and parse it using external command 'cat' and 'md5sum' alternately
- * for 10 times. Doing so also does a light stress test for class
- * CommandRunner.java (as used in ExtParser.java).
- *
+/**
+ * Unit tests for ExtParser. First creates a temp file with fixed content, then
+ * fetch and parse it using external command 'cat' and 'md5sum' alternately for
+ * 10 times. Doing so also does a light stress test for class CommandRunner.java
+ * (as used in ExtParser.java).
+ * 
  * Warning: currently only do test on linux platform.
- *
+ * 
  * @author John Xing
  */
 public class TestExtParser extends TestCase {
@@ -59,8 +58,8 @@
   // echo -n "nutch rocks nutch rocks nutch rocks" | md5sum
   private String expectedMD5sum = "df46711a1a48caafc98b1c3b83aa1526";
 
-  public TestExtParser(String name) { 
-    super(name); 
+  public TestExtParser(String name) {
+    super(name);
   }
 
   protected void setUp() throws ProtocolException, IOException {
@@ -71,10 +70,11 @@
       File tempDir = new File(path);
       if (!tempDir.exists())
         tempDir.mkdir();
-      tempFile = File.createTempFile("nutch.test.plugin.ExtParser.",".txt",tempDir);
+      tempFile = File.createTempFile("nutch.test.plugin.ExtParser.", ".txt",
+          tempDir);
     } else {
       // otherwise in java.io.tmpdir
-      tempFile = File.createTempFile("nutch.test.plugin.ExtParser.",".txt");
+      tempFile = File.createTempFile("nutch.test.plugin.ExtParser.", ".txt");
     }
     urlString = tempFile.toURL().toString();
 
@@ -83,8 +83,10 @@
     fos.close();
 
     // get nutch content
-    Protocol protocol = new ProtocolFactory(NutchConfiguration.create()).getProtocol(urlString);
-    content = protocol.getProtocolOutput(new Text(urlString), new CrawlDatum()).getContent();
+    Protocol protocol = new ProtocolFactory(NutchConfiguration.create())
+        .getProtocol(urlString);
+    content = protocol.getProtocolOutput(new Text(urlString), new CrawlDatum())
+        .getContent();
     protocol = null;
   }
 
@@ -93,8 +95,8 @@
     content = null;
 
     // clean temp file
-    //if (tempFile != null && tempFile.exists())
-    //  tempFile.delete();
+    // if (tempFile != null && tempFile.exists())
+    // tempFile.delete();
   }
 
   public void testIt() throws ParseException {
@@ -102,24 +104,27 @@
 
     // now test only on linux platform
     if (!System.getProperty("os.name").equalsIgnoreCase("linux")) {
-      System.err.println("Current OS is "+System.getProperty("os.name")+".");
+      System.err
+          .println("Current OS is " + System.getProperty("os.name") + ".");
       System.err.println("No test is run on OS other than linux.");
       return;
     }
 
     Configuration conf = NutchConfiguration.create();
     // loop alternately, total 10*2 times of invoking external command
-    for (int i=0; i<10; i++) {
+    for (int i = 0; i < 10; i++) {
       // check external parser that does 'cat'
       contentType = "application/vnd.nutch.example.cat";
       content.setContentType(contentType);
-      parse = new ParseUtil(conf).parseByExtensionId("parse-ext", content).get(content.getUrl());
-      assertEquals(expectedText,parse.getText());
+      parse = new ParseUtil(conf).parseByExtensionId("parse-ext", content).get(
+          content.getUrl());
+      assertEquals(expectedText, parse.getText());
 
       // check external parser that does 'md5sum'
       contentType = "application/vnd.nutch.example.md5sum";
       content.setContentType(contentType);
-      parse = new ParseUtil(conf).parseByExtensionId("parse-ext", content).get(content.getUrl());
+      parse = new ParseUtil(conf).parseByExtensionId("parse-ext", content).get(
+          content.getUrl());
       assertTrue(parse.getText().startsWith(expectedMD5sum));
     }
   }
Index: src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/ExtParser.java
===================================================================
--- src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/ExtParser.java	(revision 1188252)
+++ src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/ExtParser.java	(working copy)
@@ -52,20 +52,23 @@
 
 public class ExtParser implements Parser {
 
-  public static final Logger LOG = LoggerFactory.getLogger("org.apache.nutch.parse.ext");
+  public static final Logger LOG = LoggerFactory
+      .getLogger("org.apache.nutch.parse.ext");
 
   static final int BUFFER_SIZE = 4096;
 
   static final int TIMEOUT_DEFAULT = 30; // in seconds
 
-  // handy map from String contentType to String[] {command, timeoutString, encoding}
+  // handy map from String contentType to String[] {command, timeoutString,
+  // encoding}
   Hashtable TYPE_PARAMS_MAP = new Hashtable();
 
-  private Configuration conf;  
+  private Configuration conf;
 
   private boolean loaded = false;
 
-  public ExtParser () { }
+  public ExtParser() {
+  }
 
   public ParseResult getParse(Content content) {
 
@@ -74,14 +77,15 @@
     String[] params = (String[]) TYPE_PARAMS_MAP.get(contentType);
     if (params == null)
       return new ParseStatus(ParseStatus.FAILED,
-                      "No external command defined for contentType: " + contentType).getEmptyParseResult(content.getUrl(), getConf());
+          "No external command defined for contentType: " + contentType)
+          .getEmptyParseResult(content.getUrl(), getConf());
 
     String command = params[0];
     int timeout = Integer.parseInt(params[1]);
     String encoding = params[2];
 
     if (LOG.isTraceEnabled()) {
-      LOG.trace("Use "+command+ " with timeout="+timeout+"secs");
+      LOG.trace("Use " + command + " with timeout=" + timeout + "secs");
     }
 
     String text = null;
@@ -93,19 +97,19 @@
 
       String contentLength = content.getMetadata().get(Response.CONTENT_LENGTH);
       if (contentLength != null
-            && raw.length != Integer.parseInt(contentLength)) {
-          return new ParseStatus(ParseStatus.FAILED, ParseStatus.FAILED_TRUNCATED,
-                "Content truncated at " + raw.length
-            +" bytes. Parser can't handle incomplete "
-            + contentType + " file.").getEmptyParseResult(content.getUrl(), getConf());
+          && raw.length != Integer.parseInt(contentLength)) {
+        return new ParseStatus(ParseStatus.FAILED,
+            ParseStatus.FAILED_TRUNCATED, "Content truncated at " + raw.length
+                + " bytes. Parser can't handle incomplete " + contentType
+                + " file.").getEmptyParseResult(content.getUrl(), getConf());
       }
 
       ByteArrayOutputStream os = new ByteArrayOutputStream(BUFFER_SIZE);
-      ByteArrayOutputStream es = new ByteArrayOutputStream(BUFFER_SIZE/4);
+      ByteArrayOutputStream es = new ByteArrayOutputStream(BUFFER_SIZE / 4);
 
       CommandRunner cr = new CommandRunner();
 
-      cr.setCommand(command+ " " +contentType);
+      cr.setCommand(command + " " + contentType);
       cr.setInputStream(new ByteArrayInputStream(raw));
       cr.setStdOutputStream(os);
       cr.setStdErrorStream(es);
@@ -115,14 +119,15 @@
       cr.evaluate();
 
       if (cr.getExitValue() != 0)
-        return new ParseStatus(ParseStatus.FAILED,
-                        "External command " + command
-                        + " failed with error: " + es.toString()).getEmptyParseResult(content.getUrl(), getConf());
+        return new ParseStatus(ParseStatus.FAILED, "External command "
+            + command + " failed with error: " + es.toString())
+            .getEmptyParseResult(content.getUrl(), getConf());
 
       text = os.toString(encoding);
 
     } catch (Exception e) { // run time exception
-      return new ParseStatus(e).getEmptyParseResult(content.getUrl(), getConf());
+      return new ParseStatus(e)
+          .getEmptyParseResult(content.getUrl(), getConf());
     }
 
     if (text == null)
@@ -135,15 +140,15 @@
     Outlink[] outlinks = OutlinkExtractor.getOutlinks(text, getConf());
 
     ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS, title,
-                                        outlinks, content.getMetadata());
-    return ParseResult.createParseResult(content.getUrl(), 
-                                         new ParseImpl(text, parseData));
+        outlinks, content.getMetadata());
+    return ParseResult.createParseResult(content.getUrl(), new ParseImpl(text,
+        parseData));
   }
-  
+
   public void setConf(Configuration conf) {
     this.conf = conf;
-    Extension[] extensions = PluginRepository.get(conf).getExtensionPoint(
-        "org.apache.nutch.parse.Parser").getExtensions();
+    Extension[] extensions = PluginRepository.get(conf)
+        .getExtensionPoint("org.apache.nutch.parse.Parser").getExtensions();
 
     String contentType, command, timeoutString, encoding;
 
@@ -165,13 +170,14 @@
       // null encoding means default
       encoding = extension.getAttribute("encoding");
       if (encoding == null)
-          encoding = Charset.defaultCharset().name();
+        encoding = Charset.defaultCharset().name();
 
       timeoutString = extension.getAttribute("timeout");
       if (timeoutString == null || timeoutString.equals(""))
         timeoutString = "" + TIMEOUT_DEFAULT;
 
-      TYPE_PARAMS_MAP.put(contentType, new String[] { command, timeoutString, encoding });
+      TYPE_PARAMS_MAP.put(contentType, new String[] { command, timeoutString,
+          encoding });
     }
   }
 
Index: src/plugin/urlnormalizer-pass/src/test/org/apache/nutch/net/urlnormalizer/pass/TestPassURLNormalizer.java
===================================================================
--- src/plugin/urlnormalizer-pass/src/test/org/apache/nutch/net/urlnormalizer/pass/TestPassURLNormalizer.java	(revision 1188252)
+++ src/plugin/urlnormalizer-pass/src/test/org/apache/nutch/net/urlnormalizer/pass/TestPassURLNormalizer.java	(working copy)
@@ -16,7 +16,6 @@
  */
 package org.apache.nutch.net.urlnormalizer.pass;
 
-
 import java.net.MalformedURLException;
 
 import org.apache.hadoop.conf.Configuration;
@@ -29,7 +28,7 @@
 
   public void testPassURLNormalizer() {
     Configuration conf = NutchConfiguration.create();
-    
+
     PassURLNormalizer normalizer = new PassURLNormalizer();
     normalizer.setConf(conf);
     String url = "http://www.example.com/test/..//";
@@ -39,7 +38,7 @@
     } catch (MalformedURLException mue) {
       fail(mue.toString());
     }
-    
+
     assertEquals(url, result);
   }
 }
Index: src/plugin/urlnormalizer-pass/src/java/org/apache/nutch/net/urlnormalizer/pass/PassURLNormalizer.java
===================================================================
--- src/plugin/urlnormalizer-pass/src/java/org/apache/nutch/net/urlnormalizer/pass/PassURLNormalizer.java	(revision 1188252)
+++ src/plugin/urlnormalizer-pass/src/java/org/apache/nutch/net/urlnormalizer/pass/PassURLNormalizer.java	(working copy)
@@ -24,15 +24,17 @@
 
 /**
  * This URLNormalizer doesn't change urls. It is sometimes useful if for a given
- * scope at least one normalizer must be defined but no transformations are required.
+ * scope at least one normalizer must be defined but no transformations are
+ * required.
  * 
  * @author Andrzej Bialecki
  */
 public class PassURLNormalizer implements URLNormalizer {
 
   private Configuration conf;
-  
-  public String normalize(String urlString, String scope) throws MalformedURLException {
+
+  public String normalize(String urlString, String scope)
+      throws MalformedURLException {
     return urlString;
   }
 
@@ -41,7 +43,7 @@
   }
 
   public void setConf(Configuration conf) {
-    this.conf = conf;    
+    this.conf = conf;
   }
 
 }
Index: src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestRobotsMetaProcessor.java
===================================================================
--- src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestRobotsMetaProcessor.java	(revision 1188252)
+++ src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestRobotsMetaProcessor.java	(working copy)
@@ -31,124 +31,100 @@
 
 /** Unit tests for HTMLMetaProcessor. */
 public class TestRobotsMetaProcessor extends TestCase {
-  public TestRobotsMetaProcessor(String name) { 
-    super(name); 
+  public TestRobotsMetaProcessor(String name) {
+    super(name);
   }
 
   /*
+   * 
+   * some sample tags:
+   * 
+   * <meta name="robots" content="index,follow"> <meta name="robots"
+   * content="noindex,follow"> <meta name="robots" content="index,nofollow">
+   * <meta name="robots" content="noindex,nofollow">
+   * 
+   * <META HTTP-EQUIV="Pragma" CONTENT="no-cache">
+   */
 
-  some sample tags:
+  public static String[] tests = {
+      "<html><head><title>test page</title>"
+          + "<META NAME=\"ROBOTS\" CONTENT=\"NONE\"> "
+          + "<META HTTP-EQUIV=\"PRAGMA\" CONTENT=\"NO-CACHE\"> "
+          + "</head><body>" + " some text" + "</body></html>",
 
-  <meta name="robots" content="index,follow">
-  <meta name="robots" content="noindex,follow">
-  <meta name="robots" content="index,nofollow">
-  <meta name="robots" content="noindex,nofollow">
+      "<html><head><title>test page</title>"
+          + "<meta name=\"robots\" content=\"all\"> "
+          + "<meta http-equiv=\"pragma\" content=\"no-cache\"> "
+          + "</head><body>" + " some text" + "</body></html>",
 
-  <META HTTP-EQUIV="Pragma" CONTENT="no-cache">
+      "<html><head><title>test page</title>"
+          + "<MeTa NaMe=\"RoBoTs\" CoNtEnT=\"nOnE\"> "
+          + "<MeTa HtTp-EqUiV=\"pRaGmA\" cOnTeNt=\"No-CaChE\"> "
+          + "</head><body>" + " some text" + "</body></html>",
 
-  */
+      "<html><head><title>test page</title>"
+          + "<meta name=\"robots\" content=\"none\"> " + "</head><body>"
+          + " some text" + "</body></html>",
 
+      "<html><head><title>test page</title>"
+          + "<meta name=\"robots\" content=\"noindex,nofollow\"> "
+          + "</head><body>" + " some text" + "</body></html>",
 
-  public static String[] tests= 
-  {
-    "<html><head><title>test page</title>"
-    + "<META NAME=\"ROBOTS\" CONTENT=\"NONE\"> "
-    + "<META HTTP-EQUIV=\"PRAGMA\" CONTENT=\"NO-CACHE\"> "
-    + "</head><body>"
-    + " some text"
-    + "</body></html>",
+      "<html><head><title>test page</title>"
+          + "<meta name=\"robots\" content=\"noindex,follow\"> "
+          + "</head><body>" + " some text" + "</body></html>",
 
-    "<html><head><title>test page</title>"
-    + "<meta name=\"robots\" content=\"all\"> "
-    + "<meta http-equiv=\"pragma\" content=\"no-cache\"> "
-    + "</head><body>"
-    + " some text"
-    + "</body></html>",
+      "<html><head><title>test page</title>"
+          + "<meta name=\"robots\" content=\"index,nofollow\"> "
+          + "</head><body>" + " some text" + "</body></html>",
 
-    "<html><head><title>test page</title>"
-    + "<MeTa NaMe=\"RoBoTs\" CoNtEnT=\"nOnE\"> "
-    + "<MeTa HtTp-EqUiV=\"pRaGmA\" cOnTeNt=\"No-CaChE\"> "
-    + "</head><body>"
-    + " some text"
-    + "</body></html>",
+      "<html><head><title>test page</title>"
+          + "<meta name=\"robots\" content=\"index,follow\"> "
+          + "<base href=\"http://www.nutch.org/\">" + "</head><body>"
+          + " some text" + "</body></html>",
 
-    "<html><head><title>test page</title>"
-    + "<meta name=\"robots\" content=\"none\"> "
-    + "</head><body>"
-    + " some text"
-    + "</body></html>",
+      "<html><head><title>test page</title>" + "<meta name=\"robots\"> "
+          + "<base href=\"http://www.nutch.org/base/\">" + "</head><body>"
+          + " some text" + "</body></html>",
 
-    "<html><head><title>test page</title>"
-    + "<meta name=\"robots\" content=\"noindex,nofollow\"> "
-    + "</head><body>"
-    + " some text"
-    + "</body></html>",
-
-    "<html><head><title>test page</title>"
-    + "<meta name=\"robots\" content=\"noindex,follow\"> "
-    + "</head><body>"
-    + " some text"
-    + "</body></html>",
-
-    "<html><head><title>test page</title>"
-    + "<meta name=\"robots\" content=\"index,nofollow\"> "
-    + "</head><body>"
-    + " some text"
-    + "</body></html>",
-
-    "<html><head><title>test page</title>"
-    + "<meta name=\"robots\" content=\"index,follow\"> "
-    + "<base href=\"http://www.nutch.org/\">"
-    + "</head><body>"
-    + " some text"
-    + "</body></html>",
-
-    "<html><head><title>test page</title>"
-    + "<meta name=\"robots\"> "
-    + "<base href=\"http://www.nutch.org/base/\">"
-    + "</head><body>"
-    + " some text"
-    + "</body></html>",
-
   };
 
-  public static final boolean[][] answers= {
-    {true, true, true},     // NONE
-    {false, false, true},   // all
-    {true, true, true},     // nOnE
-    {true, true, false},    // none
-    {true, true, false},    // noindex,nofollow
-    {true, false, false},   // noindex,follow
-    {false, true, false},   // index,nofollow
-    {false, false, false},  // index,follow
-    {false, false, false},  // missing!
+  public static final boolean[][] answers = { { true, true, true }, // NONE
+      { false, false, true }, // all
+      { true, true, true }, // nOnE
+      { true, true, false }, // none
+      { true, true, false }, // noindex,nofollow
+      { true, false, false }, // noindex,follow
+      { false, true, false }, // index,nofollow
+      { false, false, false }, // index,follow
+      { false, false, false }, // missing!
   };
 
   private URL[][] currURLsAndAnswers;
 
   public void testRobotsMetaProcessor() {
-    DOMFragmentParser parser= new DOMFragmentParser();;
+    DOMFragmentParser parser = new DOMFragmentParser();
+    ;
 
-    try { 
-      currURLsAndAnswers= new URL[][] {
-        {new URL("http://www.nutch.org"), null},
-        {new URL("http://www.nutch.org"), null},
-        {new URL("http://www.nutch.org"), null},
-        {new URL("http://www.nutch.org"), null},
-        {new URL("http://www.nutch.org"), null},
-        {new URL("http://www.nutch.org"), null},
-        {new URL("http://www.nutch.org"), null},
-        {new URL("http://www.nutch.org/foo/"), 
-         new URL("http://www.nutch.org/")},
-        {new URL("http://www.nutch.org"), 
-         new URL("http://www.nutch.org/base/")}
-      };
+    try {
+      currURLsAndAnswers = new URL[][] {
+          { new URL("http://www.nutch.org"), null },
+          { new URL("http://www.nutch.org"), null },
+          { new URL("http://www.nutch.org"), null },
+          { new URL("http://www.nutch.org"), null },
+          { new URL("http://www.nutch.org"), null },
+          { new URL("http://www.nutch.org"), null },
+          { new URL("http://www.nutch.org"), null },
+          { new URL("http://www.nutch.org/foo/"),
+              new URL("http://www.nutch.org/") },
+          { new URL("http://www.nutch.org"),
+              new URL("http://www.nutch.org/base/") } };
     } catch (Exception e) {
       assertTrue("couldn't make test URLs!", false);
     }
 
-    for (int i= 0; i < tests.length; i++) {
-      byte[] bytes= tests[i].getBytes();
+    for (int i = 0; i < tests.length; i++) {
+      byte[] bytes = tests[i].getBytes();
 
       DocumentFragment node = new HTMLDocumentImpl().createDocumentFragment();
 
@@ -158,24 +134,22 @@
         e.printStackTrace();
       }
 
-      HTMLMetaTags robotsMeta= new HTMLMetaTags();
-      HTMLMetaProcessor.getMetaTags(robotsMeta, node, 
-                                                  currURLsAndAnswers[i][0]);
+      HTMLMetaTags robotsMeta = new HTMLMetaTags();
+      HTMLMetaProcessor.getMetaTags(robotsMeta, node, currURLsAndAnswers[i][0]);
 
       assertTrue("got index wrong on test " + i,
-                 robotsMeta.getNoIndex() == answers[i][0]);
+          robotsMeta.getNoIndex() == answers[i][0]);
       assertTrue("got follow wrong on test " + i,
-                 robotsMeta.getNoFollow() == answers[i][1]);
+          robotsMeta.getNoFollow() == answers[i][1]);
       assertTrue("got cache wrong on test " + i,
-                 robotsMeta.getNoCache() == answers[i][2]);
-      assertTrue("got base href wrong on test " + i + " (got "
-                 + robotsMeta.getBaseHref() + ")",
-                 ( (robotsMeta.getBaseHref() == null)
-                    && (currURLsAndAnswers[i][1] == null) )
-                 || ( (robotsMeta.getBaseHref() != null)
-                      && robotsMeta.getBaseHref().equals(
-                        currURLsAndAnswers[i][1]) ) );
-      
+          robotsMeta.getNoCache() == answers[i][2]);
+      assertTrue(
+          "got base href wrong on test " + i + " (got "
+              + robotsMeta.getBaseHref() + ")",
+          ((robotsMeta.getBaseHref() == null) && (currURLsAndAnswers[i][1] == null))
+              || ((robotsMeta.getBaseHref() != null) && robotsMeta
+                  .getBaseHref().equals(currURLsAndAnswers[i][1])));
+
     }
   }
 
Index: src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestDOMContentUtils.java
===================================================================
--- src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestDOMContentUtils.java	(revision 1188252)
+++ src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestDOMContentUtils.java	(working copy)
@@ -34,336 +34,257 @@
 import org.w3c.dom.*;
 import org.apache.html.dom.*;
 
-/** 
+/**
  * Unit tests for DOMContentUtils.
  */
 public class TestDOMContentUtils extends TestCase {
 
-  private static final String[] testPages= { 
-    new String("<html><head><title> title </title><script> script </script>"
-               + "</head><body> body <a href=\"http://www.nutch.org\">"
-               + " anchor </a><!--comment-->"
-               + "</body></html>"),
-    new String("<html><head><title> title </title><script> script </script>"
-               + "</head><body> body <a href=\"/\">"
-               + " home </a><!--comment-->"
-               + "<style> style </style>"
-               + " <a href=\"bot.html\">"
-               + " bots </a>"
-               + "</body></html>"),
-    new String("<html><head><title> </title>"
-               + "</head><body> "
-               + "<a href=\"/\"> separate this "
-               + "<a href=\"ok\"> from this"
-               + "</a></a>"
-               + "</body></html>"),
-    // this one relies on certain neko fixup behavior, possibly
-    // distributing the anchors into the LI's-but not the other
-    // anchors (outside of them, instead)!  So you get a tree that
-    // looks like:
-    // ... <li> <a href=/> home </a> </li>
-    //     <li> <a href=/> <a href="1"> 1 </a> </a> </li>
-    //     <li> <a href=/> <a href="1"> <a href="2"> 2 </a> </a> </a> </li>
-    new String("<html><head><title> my title </title>"
-               + "</head><body> body "
-               + "<ul>"
-               + "<li> <a href=\"/\"> home"
-               + "<li> <a href=\"1\"> 1"
-               + "<li> <a href=\"2\"> 2"
-               + "</ul>"
-               + "</body></html>"),
-    // test frameset link extraction. The invalid frame in the middle will be
-    // fixed to a third standalone frame.
-    new String("<html><head><title> my title </title>"
-               + "</head><frameset rows=\"20,*\"> "
-               + "<frame src=\"top.html\">"
-               + "</frame>"
-               + "<frameset cols=\"20,*\">"
-               + "<frame src=\"left.html\">"
-               + "<frame src=\"invalid.html\"/>"
-               + "</frame>"
-               + "<frame src=\"right.html\">"
-               + "</frame>"
-               + "</frameset>"
-               + "</frameset>"
-               + "</body></html>"),
-    // test <area> and <iframe> link extraction + url normalization
-    new String("<html><head><title> my title </title>"
-               + "</head><body>"
-               + "<img src=\"logo.gif\" usemap=\"#green\" border=\"0\">"
-			   + "<map name=\"green\">"
-			   + "<area shape=\"polygon\" coords=\"19,44,45,11,87\" href=\"../index.html\">"
-			   + "<area shape=\"rect\" coords=\"128,132,241,179\" href=\"#bottom\">"
-			   + "<area shape=\"circle\" coords=\"68,211,35\" href=\"../bot.html\">"
-			   + "</map>"
-               + "<a name=\"bottom\"/><h1> the bottom </h1> "
-               + "<iframe src=\"../docs/index.html\"/>"
-               + "</body></html>"),
-    // test whitespace processing for plain text extraction
-    new String("<html><head>\n <title> my\t\n  title\r\n </title>\n"
-               + " </head>\n"
-               + " <body>\n"
-               + "    <h1> Whitespace\ttest  </h1> \n"
-               + "\t<a href=\"../index.html\">\n  \twhitespace  test\r\n\t</a>  \t\n"
-               + "    <p> This is<span> a whitespace<span></span> test</span>. Newlines\n"
-               + "should appear as space too.</p><p>Tabs\tare spaces too.\n</p>"
-               + "    This\t<b>is a</b> break -&gt;<br>and the line after<i> break</i>.<br>\n"
-               + "<table>"
-               + "    <tr><td>one</td><td>two</td><td>three</td></tr>\n"
-               + "    <tr><td>space here </td><td> space there</td><td>no space</td></tr>"
-               + "\t<tr><td>one\r\ntwo</td><td>two\tthree</td><td>three\r\tfour</td></tr>\n"
-               + "</table>put some text here<Br>and there."
-               + "<h2>End\tthis\rmadness\n!</h2>\r\n"
-               + "         .        .        .         ."
-               + "</body>  </html>"),
+  private static final String[] testPages = {
+      new String("<html><head><title> title </title><script> script </script>"
+          + "</head><body> body <a href=\"http://www.nutch.org\">"
+          + " anchor </a><!--comment-->" + "</body></html>"),
+      new String("<html><head><title> title </title><script> script </script>"
+          + "</head><body> body <a href=\"/\">" + " home </a><!--comment-->"
+          + "<style> style </style>" + " <a href=\"bot.html\">" + " bots </a>"
+          + "</body></html>"),
+      new String("<html><head><title> </title>" + "</head><body> "
+          + "<a href=\"/\"> separate this " + "<a href=\"ok\"> from this"
+          + "</a></a>" + "</body></html>"),
+      // this one relies on certain neko fixup behavior, possibly
+      // distributing the anchors into the LI's-but not the other
+      // anchors (outside of them, instead)! So you get a tree that
+      // looks like:
+      // ... <li> <a href=/> home </a> </li>
+      // <li> <a href=/> <a href="1"> 1 </a> </a> </li>
+      // <li> <a href=/> <a href="1"> <a href="2"> 2 </a> </a> </a> </li>
+      new String("<html><head><title> my title </title>"
+          + "</head><body> body " + "<ul>" + "<li> <a href=\"/\"> home"
+          + "<li> <a href=\"1\"> 1" + "<li> <a href=\"2\"> 2" + "</ul>"
+          + "</body></html>"),
+      // test frameset link extraction. The invalid frame in the middle
+      // will be
+      // fixed to a third standalone frame.
+      new String("<html><head><title> my title </title>"
+          + "</head><frameset rows=\"20,*\"> " + "<frame src=\"top.html\">"
+          + "</frame>" + "<frameset cols=\"20,*\">"
+          + "<frame src=\"left.html\">" + "<frame src=\"invalid.html\"/>"
+          + "</frame>" + "<frame src=\"right.html\">" + "</frame>"
+          + "</frameset>" + "</frameset>" + "</body></html>"),
+      // test <area> and <iframe> link extraction + url normalization
+      new String(
+          "<html><head><title> my title </title>"
+              + "</head><body>"
+              + "<img src=\"logo.gif\" usemap=\"#green\" border=\"0\">"
+              + "<map name=\"green\">"
+              + "<area shape=\"polygon\" coords=\"19,44,45,11,87\" href=\"../index.html\">"
+              + "<area shape=\"rect\" coords=\"128,132,241,179\" href=\"#bottom\">"
+              + "<area shape=\"circle\" coords=\"68,211,35\" href=\"../bot.html\">"
+              + "</map>" + "<a name=\"bottom\"/><h1> the bottom </h1> "
+              + "<iframe src=\"../docs/index.html\"/>" + "</body></html>"),
+      // test whitespace processing for plain text extraction
+      new String(
+          "<html><head>\n <title> my\t\n  title\r\n </title>\n"
+              + " </head>\n"
+              + " <body>\n"
+              + "    <h1> Whitespace\ttest  </h1> \n"
+              + "\t<a href=\"../index.html\">\n  \twhitespace  test\r\n\t</a>  \t\n"
+              + "    <p> This is<span> a whitespace<span></span> test</span>. Newlines\n"
+              + "should appear as space too.</p><p>Tabs\tare spaces too.\n</p>"
+              + "    This\t<b>is a</b> break -&gt;<br>and the line after<i> break</i>.<br>\n"
+              + "<table>"
+              + "    <tr><td>one</td><td>two</td><td>three</td></tr>\n"
+              + "    <tr><td>space here </td><td> space there</td><td>no space</td></tr>"
+              + "\t<tr><td>one\r\ntwo</td><td>two\tthree</td><td>three\r\tfour</td></tr>\n"
+              + "</table>put some text here<Br>and there."
+              + "<h2>End\tthis\rmadness\n!</h2>\r\n"
+              + "         .        .        .         ." + "</body>  </html>"),
 
-    // test that <a rel=nofollow> links are not returned
-    new String("<html><head></head><body>"
-               + "<a href=\"http://www.nutch.org\" rel=\"nofollow\"> ignore </a>"
-               + "<a rel=\"nofollow\" href=\"http://www.nutch.org\"> ignore </a>"
-               + "</body></html>"),
-    // test that POST form actions are skipped
-    new String("<html><head></head><body>"
-            + "<form method='POST' action='/search.jsp'><input type=text>"
-            + "<input type=submit><p>test1</p></form>"
-            + "<form method='GET' action='/dummy.jsp'><input type=text>"
-            + "<input type=submit><p>test2</p></form></body></html>"),
-    // test that all form actions are skipped
-    new String("<html><head></head><body>"
-            + "<form method='POST' action='/search.jsp'><input type=text>"
-            + "<input type=submit><p>test1</p></form>"
-            + "<form method='GET' action='/dummy.jsp'><input type=text>"
-            + "<input type=submit><p>test2</p></form></body></html>"),
-    new String("<html><head><title> title </title>"
-      + "</head><body>"
-      + "<a href=\";x\">anchor1</a>"
-      + "<a href=\"g;x\">anchor2</a>"
-      + "<a href=\"g;x?y#s\">anchor3</a>"
-      + "</body></html>"),  
-    new String("<html><head><title> title </title>"
-        + "</head><body>"
-        + "<a href=\"g\">anchor1</a>"
-        + "<a href=\"g?y#s\">anchor2</a>"
-        + "<a href=\"?y=1\">anchor3</a>"
-        + "<a href=\"?y=1#s\">anchor4</a>"
-        + "<a href=\"?y=1;somethingelse\">anchor5</a>"
-        + "</body></html>"), 
-    new String("<html><head><title> title </title>"
-        + "</head><body>"
-        + "<a href=\"g\"><!--no anchor--></a>"
-        + "<a href=\"g1\"> <!--whitespace-->  </a>"
-        + "<a href=\"g2\">  <img src=test.gif alt='bla bla'> </a>"
-        + "</body></html>"), 
-  };
-  
+      // test that <a rel=nofollow> links are not returned
+      new String("<html><head></head><body>"
+          + "<a href=\"http://www.nutch.org\" rel=\"nofollow\"> ignore </a>"
+          + "<a rel=\"nofollow\" href=\"http://www.nutch.org\"> ignore </a>"
+          + "</body></html>"),
+      // test that POST form actions are skipped
+      new String("<html><head></head><body>"
+          + "<form method='POST' action='/search.jsp'><input type=text>"
+          + "<input type=submit><p>test1</p></form>"
+          + "<form method='GET' action='/dummy.jsp'><input type=text>"
+          + "<input type=submit><p>test2</p></form></body></html>"),
+      // test that all form actions are skipped
+      new String("<html><head></head><body>"
+          + "<form method='POST' action='/search.jsp'><input type=text>"
+          + "<input type=submit><p>test1</p></form>"
+          + "<form method='GET' action='/dummy.jsp'><input type=text>"
+          + "<input type=submit><p>test2</p></form></body></html>"),
+      new String("<html><head><title> title </title>" + "</head><body>"
+          + "<a href=\";x\">anchor1</a>" + "<a href=\"g;x\">anchor2</a>"
+          + "<a href=\"g;x?y#s\">anchor3</a>" + "</body></html>"),
+      new String("<html><head><title> title </title>" + "</head><body>"
+          + "<a href=\"g\">anchor1</a>" + "<a href=\"g?y#s\">anchor2</a>"
+          + "<a href=\"?y=1\">anchor3</a>" + "<a href=\"?y=1#s\">anchor4</a>"
+          + "<a href=\"?y=1;somethingelse\">anchor5</a>" + "</body></html>"),
+      new String("<html><head><title> title </title>" + "</head><body>"
+          + "<a href=\"g\"><!--no anchor--></a>"
+          + "<a href=\"g1\"> <!--whitespace-->  </a>"
+          + "<a href=\"g2\">  <img src=test.gif alt='bla bla'> </a>"
+          + "</body></html>"), };
+
   private static int SKIP = 9;
 
-  private static String[] testBaseHrefs= {
-    "http://www.nutch.org",     
-    "http://www.nutch.org/docs/foo.html",     
-    "http://www.nutch.org/docs/",     
-    "http://www.nutch.org/docs/",
-    "http://www.nutch.org/frames/",     
-    "http://www.nutch.org/maps/",
-    "http://www.nutch.org/whitespace/",
-    "http://www.nutch.org//",
-    "http://www.nutch.org/",
-    "http://www.nutch.org/",
-    "http://www.nutch.org/",
-    "http://www.nutch.org/;something",
-    "http://www.nutch.org/"
-  };
-    
-  private static final DocumentFragment testDOMs[]=
-    new DocumentFragment[testPages.length];
+  private static String[] testBaseHrefs = { "http://www.nutch.org",
+      "http://www.nutch.org/docs/foo.html", "http://www.nutch.org/docs/",
+      "http://www.nutch.org/docs/", "http://www.nutch.org/frames/",
+      "http://www.nutch.org/maps/", "http://www.nutch.org/whitespace/",
+      "http://www.nutch.org//", "http://www.nutch.org/",
+      "http://www.nutch.org/", "http://www.nutch.org/",
+      "http://www.nutch.org/;something", "http://www.nutch.org/" };
 
-  private static URL[] testBaseHrefURLs= 
-    new URL[testPages.length];
+  private static final DocumentFragment testDOMs[] = new DocumentFragment[testPages.length];
 
+  private static URL[] testBaseHrefURLs = new URL[testPages.length];
 
-  private static final String[] answerText= {
-    "title body anchor",
-    "title body home bots",
-    "separate this from this",
-    "my title body home 1 2",
-    "my title",
-    "my title the bottom",
-    "my title Whitespace test whitespace test "
-        + "This is a whitespace test . Newlines should appear as space too. "
-        + "Tabs are spaces too. This is a break -> and the line after break . "
-        + "one two three space here space there no space "
-        + "one two two three three four put some text here and there. "
-        + "End this madness ! . . . .",
-    "ignore ignore",
-    "test1 test2",
-    "test1 test2",
-    "title anchor1 anchor2 anchor3",
-    "title anchor1 anchor2 anchor3 anchor4 anchor5",
-    "title"
-  };
+  private static final String[] answerText = {
+      "title body anchor",
+      "title body home bots",
+      "separate this from this",
+      "my title body home 1 2",
+      "my title",
+      "my title the bottom",
+      "my title Whitespace test whitespace test "
+          + "This is a whitespace test . Newlines should appear as space too. "
+          + "Tabs are spaces too. This is a break -> and the line after break . "
+          + "one two three space here space there no space "
+          + "one two two three three four put some text here and there. "
+          + "End this madness ! . . . .", "ignore ignore", "test1 test2",
+      "test1 test2", "title anchor1 anchor2 anchor3",
+      "title anchor1 anchor2 anchor3 anchor4 anchor5", "title" };
 
-  private static final String[] answerTitle= {
-    "title",
-    "title",
-    "",
-    "my title",
-    "my title",
-    "my title",
-    "my title",
-    "",
-    "",
-    "",
-    "title",
-    "title",
-    "title"
-  };
+  private static final String[] answerTitle = { "title", "title", "",
+      "my title", "my title", "my title", "my title", "", "", "", "title",
+      "title", "title" };
 
   // note: should be in page-order
   private static Outlink[][] answerOutlinks;
-  
+
   private static Configuration conf;
   private static DOMContentUtils utils = null;
-  
-  public TestDOMContentUtils(String name) { 
-    super(name); 
+
+  public TestDOMContentUtils(String name) {
+    super(name);
   }
 
   private static void setup() {
     conf = NutchConfiguration.create();
     conf.setBoolean("parser.html.form.use_action", true);
     utils = new DOMContentUtils(conf);
-    DOMFragmentParser parser= new DOMFragmentParser();
-    for (int i= 0; i < testPages.length; i++) {
-        DocumentFragment node= 
-          new HTMLDocumentImpl().createDocumentFragment();
-        try {
-          parser.parse(
-            new InputSource( 
-              new ByteArrayInputStream(testPages[i].getBytes()) ),
+    DOMFragmentParser parser = new DOMFragmentParser();
+    for (int i = 0; i < testPages.length; i++) {
+      DocumentFragment node = new HTMLDocumentImpl().createDocumentFragment();
+      try {
+        parser.parse(
+            new InputSource(new ByteArrayInputStream(testPages[i].getBytes())),
             node);
-          testBaseHrefURLs[i]= new URL(testBaseHrefs[i]);
-        } catch (Exception e) {
-          assertTrue("caught exception: " + e, false);
-        } 
-      testDOMs[i]= node;
+        testBaseHrefURLs[i] = new URL(testBaseHrefs[i]);
+      } catch (Exception e) {
+        assertTrue("caught exception: " + e, false);
+      }
+      testDOMs[i] = node;
     }
     try {
-     answerOutlinks = new Outlink[][]{ 
-         {
-           new Outlink("http://www.nutch.org", "anchor"),
-         },
-         {
-           new Outlink("http://www.nutch.org/", "home"),
-           new Outlink("http://www.nutch.org/docs/bot.html", "bots"),
-         },
-         {
-           new Outlink("http://www.nutch.org/", "separate this"),
-           new Outlink("http://www.nutch.org/docs/ok", "from this"),
-         },
-         {
-           new Outlink("http://www.nutch.org/", "home"),
-           new Outlink("http://www.nutch.org/docs/1", "1"),
-           new Outlink("http://www.nutch.org/docs/2", "2"),
-         },
-         {
-           new Outlink("http://www.nutch.org/frames/top.html", ""),
-           new Outlink("http://www.nutch.org/frames/left.html", ""),
-           new Outlink("http://www.nutch.org/frames/invalid.html", ""),
-           new Outlink("http://www.nutch.org/frames/right.html", ""),
-         },
-         {
-           new Outlink("http://www.nutch.org/maps/logo.gif", ""),
-           new Outlink("http://www.nutch.org/index.html", ""),
-           new Outlink("http://www.nutch.org/maps/#bottom", ""),
-           new Outlink("http://www.nutch.org/bot.html", ""),
-           new Outlink("http://www.nutch.org/docs/index.html", ""),
-         },
-         {
-             new Outlink("http://www.nutch.org/index.html", "whitespace test"),
-         },
-         {
-         },
-         {
-           new Outlink("http://www.nutch.org/dummy.jsp", "test2"),
-         },
-         {
-         },
-         {
-           new Outlink("http://www.nutch.org/;x", "anchor1"),
-           new Outlink("http://www.nutch.org/g;x", "anchor2"),
-           new Outlink("http://www.nutch.org/g;x?y#s", "anchor3")
-         },
-         {
-           // this is tricky - see RFC3986 section 5.4.1 example 7
-           new Outlink("http://www.nutch.org/g", "anchor1"),
-           new Outlink("http://www.nutch.org/g?y#s", "anchor2"),
-           new Outlink("http://www.nutch.org/;something?y=1", "anchor3"),
-           new Outlink("http://www.nutch.org/;something?y=1#s", "anchor4"),
-           new Outlink("http://www.nutch.org/;something?y=1;somethingelse", "anchor5")
-         },
-         {
-           new Outlink("http://www.nutch.org/g", ""),
-           new Outlink("http://www.nutch.org/g1", ""),
-           new Outlink("http://www.nutch.org/g2", "bla bla"),
-           new Outlink("http://www.nutch.org/test.gif", "bla bla"),
-         }
-      };
-   
+      answerOutlinks = new Outlink[][] {
+          { new Outlink("http://www.nutch.org", "anchor"), },
+          { new Outlink("http://www.nutch.org/", "home"),
+              new Outlink("http://www.nutch.org/docs/bot.html", "bots"), },
+          { new Outlink("http://www.nutch.org/", "separate this"),
+              new Outlink("http://www.nutch.org/docs/ok", "from this"), },
+          { new Outlink("http://www.nutch.org/", "home"),
+              new Outlink("http://www.nutch.org/docs/1", "1"),
+              new Outlink("http://www.nutch.org/docs/2", "2"), },
+          { new Outlink("http://www.nutch.org/frames/top.html", ""),
+              new Outlink("http://www.nutch.org/frames/left.html", ""),
+              new Outlink("http://www.nutch.org/frames/invalid.html", ""),
+              new Outlink("http://www.nutch.org/frames/right.html", ""), },
+          { new Outlink("http://www.nutch.org/maps/logo.gif", ""),
+              new Outlink("http://www.nutch.org/index.html", ""),
+              new Outlink("http://www.nutch.org/maps/#bottom", ""),
+              new Outlink("http://www.nutch.org/bot.html", ""),
+              new Outlink("http://www.nutch.org/docs/index.html", ""), },
+          { new Outlink("http://www.nutch.org/index.html", "whitespace test"), },
+          {},
+          { new Outlink("http://www.nutch.org/dummy.jsp", "test2"), },
+          {},
+          { new Outlink("http://www.nutch.org/;x", "anchor1"),
+              new Outlink("http://www.nutch.org/g;x", "anchor2"),
+              new Outlink("http://www.nutch.org/g;x?y#s", "anchor3") },
+          {
+              // this is tricky - see RFC3986 section 5.4.1
+              // example 7
+              new Outlink("http://www.nutch.org/g", "anchor1"),
+              new Outlink("http://www.nutch.org/g?y#s", "anchor2"),
+              new Outlink("http://www.nutch.org/;something?y=1", "anchor3"),
+              new Outlink("http://www.nutch.org/;something?y=1#s", "anchor4"),
+              new Outlink("http://www.nutch.org/;something?y=1;somethingelse",
+                  "anchor5") },
+          { new Outlink("http://www.nutch.org/g", ""),
+              new Outlink("http://www.nutch.org/g1", ""),
+              new Outlink("http://www.nutch.org/g2", "bla bla"),
+              new Outlink("http://www.nutch.org/test.gif", "bla bla"), } };
+
     } catch (MalformedURLException e) {
-        
+
     }
   }
 
   private static boolean equalsIgnoreWhitespace(String s1, String s2) {
-    StringTokenizer st1= new StringTokenizer(s1);
-    StringTokenizer st2= new StringTokenizer(s2);
+    StringTokenizer st1 = new StringTokenizer(s1);
+    StringTokenizer st2 = new StringTokenizer(s2);
 
     while (st1.hasMoreTokens()) {
-      if (!st2.hasMoreTokens()) 
+      if (!st2.hasMoreTokens())
         return false;
-      if ( ! st1.nextToken().equals(st2.nextToken()) )
+      if (!st1.nextToken().equals(st2.nextToken()))
         return false;
     }
-    if (st2.hasMoreTokens()) 
+    if (st2.hasMoreTokens())
       return false;
     return true;
   }
 
   public void testGetText() {
-    if (testDOMs[0] == null) 
+    if (testDOMs[0] == null)
       setup();
-    for (int i= 0; i < testPages.length; i++) {
-      StringBuffer sb= new StringBuffer();
+    for (int i = 0; i < testPages.length; i++) {
+      StringBuffer sb = new StringBuffer();
       utils.getText(sb, testDOMs[i]);
-      String text= sb.toString();
-      assertTrue("expecting text: " + answerText[i] 
-                 + System.getProperty("line.separator") 
-                 + System.getProperty("line.separator") 
-                 + "got text: "+ text, 
-                 equalsIgnoreWhitespace(answerText[i], text));
+      String text = sb.toString();
+      assertTrue(
+          "expecting text: " + answerText[i]
+              + System.getProperty("line.separator")
+              + System.getProperty("line.separator") + "got text: " + text,
+          equalsIgnoreWhitespace(answerText[i], text));
     }
   }
 
   public void testGetTitle() {
-    if (testDOMs[0] == null) 
+    if (testDOMs[0] == null)
       setup();
-    for (int i= 0; i < testPages.length; i++) {
-      StringBuffer sb= new StringBuffer();
+    for (int i = 0; i < testPages.length; i++) {
+      StringBuffer sb = new StringBuffer();
       utils.getTitle(sb, testDOMs[i]);
-      String text= sb.toString();
-      assertTrue("expecting text: " + answerText[i] 
-                 + System.getProperty("line.separator") 
-                 + System.getProperty("line.separator") 
-                 + "got text: "+ text, 
-                 equalsIgnoreWhitespace(answerTitle[i], text));
+      String text = sb.toString();
+      assertTrue(
+          "expecting text: " + answerText[i]
+              + System.getProperty("line.separator")
+              + System.getProperty("line.separator") + "got text: " + text,
+          equalsIgnoreWhitespace(answerTitle[i], text));
     }
   }
 
   public void testGetOutlinks() {
-    if (testDOMs[0] == null) 
+    if (testDOMs[0] == null)
       setup();
-    for (int i= 0; i < testPages.length; i++) {
-      ArrayList outlinks= new ArrayList();
+    for (int i = 0; i < testPages.length; i++) {
+      ArrayList outlinks = new ArrayList();
       if (i == SKIP) {
         conf.setBoolean("parser.html.form.use_action", false);
         utils.setConf(conf);
@@ -372,52 +293,48 @@
         utils.setConf(conf);
       }
       utils.getOutlinks(testBaseHrefURLs[i], outlinks, testDOMs[i]);
-      Outlink[] outlinkArr= new Outlink[outlinks.size()];
-      outlinkArr= (Outlink[]) outlinks.toArray(outlinkArr);
+      Outlink[] outlinkArr = new Outlink[outlinks.size()];
+      outlinkArr = (Outlink[]) outlinks.toArray(outlinkArr);
       compareOutlinks(answerOutlinks[i], outlinkArr);
     }
   }
 
   private static final void appendOutlinks(StringBuffer sb, Outlink[] o) {
-    for (int i= 0; i < o.length; i++) {
+    for (int i = 0; i < o.length; i++) {
       sb.append(o[i].toString());
       sb.append(System.getProperty("line.separator"));
     }
   }
 
   private static final String outlinksString(Outlink[] o) {
-    StringBuffer sb= new StringBuffer();
+    StringBuffer sb = new StringBuffer();
     appendOutlinks(sb, o);
     return sb.toString();
   }
 
   private static final void compareOutlinks(Outlink[] o1, Outlink[] o2) {
     if (o1.length != o2.length) {
-      assertTrue("got wrong number of outlinks (expecting " + o1.length 
-                 + ", got " + o2.length + ")" 
-                 + System.getProperty("line.separator") 
-                 + "answer: " + System.getProperty("line.separator") 
-                 + outlinksString(o1) 
-                 + System.getProperty("line.separator") 
-                 + "got: " + System.getProperty("line.separator") 
-                 + outlinksString(o2)
-                 + System.getProperty("line.separator"),
-                 false
-        );
+      assertTrue(
+          "got wrong number of outlinks (expecting " + o1.length + ", got "
+              + o2.length + ")" + System.getProperty("line.separator")
+              + "answer: " + System.getProperty("line.separator")
+              + outlinksString(o1) + System.getProperty("line.separator")
+              + "got: " + System.getProperty("line.separator")
+              + outlinksString(o2) + System.getProperty("line.separator"),
+          false);
     }
 
-    for (int i= 0; i < o1.length; i++) {
+    for (int i = 0; i < o1.length; i++) {
       if (!o1[i].equals(o2[i])) {
-        assertTrue("got wrong outlinks at position " + i
-                   + System.getProperty("line.separator") 
-                   + "answer: " + System.getProperty("line.separator") 
-                   + "'" + o1[i].getToUrl() + "', anchor: '" + o1[i].getAnchor() + "'"
-                   + System.getProperty("line.separator") 
-                   + "got: " + System.getProperty("line.separator") 
-                   + "'" + o2[i].getToUrl() + "', anchor: '" + o2[i].getAnchor() + "'",
-                   false
-          );
-        
+        assertTrue(
+            "got wrong outlinks at position " + i
+                + System.getProperty("line.separator") + "answer: "
+                + System.getProperty("line.separator") + "'" + o1[i].getToUrl()
+                + "', anchor: '" + o1[i].getAnchor() + "'"
+                + System.getProperty("line.separator") + "got: "
+                + System.getProperty("line.separator") + "'" + o2[i].getToUrl()
+                + "', anchor: '" + o2[i].getAnchor() + "'", false);
+
       }
     }
   }
Index: src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java
===================================================================
--- src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java	(revision 1188252)
+++ src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java	(working copy)
@@ -42,53 +42,50 @@
 import org.apache.nutch.util.*;
 
 public class HtmlParser implements Parser {
-  public static final Logger LOG = LoggerFactory.getLogger("org.apache.nutch.parse.html");
+  public static final Logger LOG = LoggerFactory
+      .getLogger("org.apache.nutch.parse.html");
 
-  // I used 1000 bytes at first, but  found that some documents have 
-  // meta tag well past the first 1000 bytes. 
+  // I used 1000 bytes at first, but found that some documents have
+  // meta tag well past the first 1000 bytes.
   // (e.g. http://cn.promo.yahoo.com/customcare/music.html)
   private static final int CHUNK_SIZE = 2000;
 
   // NUTCH-1006 Meta equiv with single quotes not accepted
-  private static Pattern metaPattern =
-    Pattern.compile("<meta\\s+([^>]*http-equiv=(\"|')?content-type(\"|')?[^>]*)>",
-                    Pattern.CASE_INSENSITIVE);
-  private static Pattern charsetPattern =
-    Pattern.compile("charset=\\s*([a-z][_\\-0-9a-z]*)",
-                    Pattern.CASE_INSENSITIVE);
-  
+  private static Pattern metaPattern = Pattern.compile(
+      "<meta\\s+([^>]*http-equiv=(\"|')?content-type(\"|')?[^>]*)>",
+      Pattern.CASE_INSENSITIVE);
+  private static Pattern charsetPattern = Pattern.compile(
+      "charset=\\s*([a-z][_\\-0-9a-z]*)", Pattern.CASE_INSENSITIVE);
+
   private String parserImpl;
 
   /**
-   * Given a <code>byte[]</code> representing an html file of an 
-   * <em>unknown</em> encoding,  read out 'charset' parameter in the meta tag   
-   * from the first <code>CHUNK_SIZE</code> bytes.
-   * If there's no meta tag for Content-Type or no charset is specified,
-   * <code>null</code> is returned.  <br />
-   * FIXME: non-byte oriented character encodings (UTF-16, UTF-32)
-   * can't be handled with this. 
-   * We need to do something similar to what's done by mozilla
-   * (http://lxr.mozilla.org/seamonkey/source/parser/htmlparser/src/nsParser.cpp#1993).
-   * See also http://www.w3.org/TR/REC-xml/#sec-guessing
-   * <br />
-   *
-   * @param content <code>byte[]</code> representation of an html file
+   * Given a <code>byte[]</code> representing an html file of an
+   * <em>unknown</em> encoding, read out 'charset' parameter in the meta tag
+   * from the first <code>CHUNK_SIZE</code> bytes. If there's no meta tag for
+   * Content-Type or no charset is specified, <code>null</code> is returned. <br />
+   * FIXME: non-byte oriented character encodings (UTF-16, UTF-32) can't be
+   * handled with this. We need to do something similar to what's done by
+   * mozilla
+   * (http://lxr.mozilla.org/seamonkey/source/parser/htmlparser/src/nsParser
+   * .cpp#1993). See also http://www.w3.org/TR/REC-xml/#sec-guessing <br />
+   * 
+   * @param content
+   *          <code>byte[]</code> representation of an html file
    */
 
   private static String sniffCharacterEncoding(byte[] content) {
-    int length = content.length < CHUNK_SIZE ? 
-                 content.length : CHUNK_SIZE;
+    int length = content.length < CHUNK_SIZE ? content.length : CHUNK_SIZE;
 
     // We don't care about non-ASCII parts so that it's sufficient
-    // to just inflate each byte to a 16-bit value by padding. 
-    // For instance, the sequence {0x41, 0x82, 0xb7} will be turned into 
-    // {U+0041, U+0082, U+00B7}. 
+    // to just inflate each byte to a 16-bit value by padding.
+    // For instance, the sequence {0x41, 0x82, 0xb7} will be turned into
+    // {U+0041, U+0082, U+00B7}.
     String str = "";
     try {
-      str = new String(content, 0, length,
-                       Charset.forName("ASCII").toString());
+      str = new String(content, 0, length, Charset.forName("ASCII").toString());
     } catch (UnsupportedEncodingException e) {
-      // code should never come here, but just in case... 
+      // code should never come here, but just in case...
       return null;
     }
 
@@ -96,7 +93,7 @@
     String encoding = null;
     if (metaMatcher.find()) {
       Matcher charsetMatcher = charsetPattern.matcher(metaMatcher.group(1));
-      if (charsetMatcher.find()) 
+      if (charsetMatcher.find())
         encoding = new String(charsetMatcher.group(1));
     }
 
@@ -106,13 +103,13 @@
   private String defaultCharEncoding;
 
   private Configuration conf;
-  
+
   private DOMContentUtils utils;
 
   private HtmlParseFilters htmlParseFilters;
-  
+
   private String cachingPolicy;
-  
+
   public ParseResult getParse(Content content) {
     HTMLMetaTags metaTags = new HTMLMetaTags();
 
@@ -120,7 +117,8 @@
     try {
       base = new URL(content.getBaseUrl());
     } catch (MalformedURLException e) {
-      return new ParseStatus(e).getEmptyParseResult(content.getUrl(), getConf());
+      return new ParseStatus(e)
+          .getEmptyParseResult(content.getUrl(), getConf());
     }
 
     String text = "";
@@ -132,7 +130,8 @@
     DocumentFragment root;
     try {
       byte[] contentInOctets = content.getContent();
-      InputSource input = new InputSource(new ByteArrayInputStream(contentInOctets));
+      InputSource input = new InputSource(new ByteArrayInputStream(
+          contentInOctets));
 
       EncodingDetector detector = new EncodingDetector(conf);
       detector.autoDetectClues(content, true);
@@ -143,65 +142,78 @@
       metadata.set(Metadata.CHAR_ENCODING_FOR_CONVERSION, encoding);
 
       input.setEncoding(encoding);
-      if (LOG.isTraceEnabled()) { LOG.trace("Parsing..."); }
+      if (LOG.isTraceEnabled()) {
+        LOG.trace("Parsing...");
+      }
       root = parse(input);
     } catch (IOException e) {
-      return new ParseStatus(e).getEmptyParseResult(content.getUrl(), getConf());
+      return new ParseStatus(e)
+          .getEmptyParseResult(content.getUrl(), getConf());
     } catch (DOMException e) {
-      return new ParseStatus(e).getEmptyParseResult(content.getUrl(), getConf());
+      return new ParseStatus(e)
+          .getEmptyParseResult(content.getUrl(), getConf());
     } catch (SAXException e) {
-      return new ParseStatus(e).getEmptyParseResult(content.getUrl(), getConf());
+      return new ParseStatus(e)
+          .getEmptyParseResult(content.getUrl(), getConf());
     } catch (Exception e) {
       e.printStackTrace(LogUtil.getWarnStream(LOG));
-      return new ParseStatus(e).getEmptyParseResult(content.getUrl(), getConf());
+      return new ParseStatus(e)
+          .getEmptyParseResult(content.getUrl(), getConf());
     }
-      
+
     // get meta directives
     HTMLMetaProcessor.getMetaTags(metaTags, root, base);
     if (LOG.isTraceEnabled()) {
       LOG.trace("Meta tags for " + base + ": " + metaTags.toString());
     }
     // check meta directives
-    if (!metaTags.getNoIndex()) {               // okay to index
+    if (!metaTags.getNoIndex()) { // okay to index
       StringBuffer sb = new StringBuffer();
-      if (LOG.isTraceEnabled()) { LOG.trace("Getting text..."); }
-      utils.getText(sb, root);          // extract text
+      if (LOG.isTraceEnabled()) {
+        LOG.trace("Getting text...");
+      }
+      utils.getText(sb, root); // extract text
       text = sb.toString();
       sb.setLength(0);
-      if (LOG.isTraceEnabled()) { LOG.trace("Getting title..."); }
-      utils.getTitle(sb, root);         // extract title
+      if (LOG.isTraceEnabled()) {
+        LOG.trace("Getting title...");
+      }
+      utils.getTitle(sb, root); // extract title
       title = sb.toString().trim();
     }
-      
-    if (!metaTags.getNoFollow()) {              // okay to follow links
-      ArrayList<Outlink> l = new ArrayList<Outlink>();   // extract outlinks
+
+    if (!metaTags.getNoFollow()) { // okay to follow links
+      ArrayList<Outlink> l = new ArrayList<Outlink>(); // extract outlinks
       URL baseTag = utils.getBase(root);
-      if (LOG.isTraceEnabled()) { LOG.trace("Getting links..."); }
-      utils.getOutlinks(baseTag!=null?baseTag:base, l, root);
+      if (LOG.isTraceEnabled()) {
+        LOG.trace("Getting links...");
+      }
+      utils.getOutlinks(baseTag != null ? baseTag : base, l, root);
       outlinks = l.toArray(new Outlink[l.size()]);
       if (LOG.isTraceEnabled()) {
-        LOG.trace("found "+outlinks.length+" outlinks in "+content.getUrl());
+        LOG.trace("found " + outlinks.length + " outlinks in "
+            + content.getUrl());
       }
     }
-    
+
     ParseStatus status = new ParseStatus(ParseStatus.SUCCESS);
     if (metaTags.getRefresh()) {
       status.setMinorCode(ParseStatus.SUCCESS_REDIRECT);
-      status.setArgs(new String[] {metaTags.getRefreshHref().toString(),
-        Integer.toString(metaTags.getRefreshTime())});      
+      status.setArgs(new String[] { metaTags.getRefreshHref().toString(),
+          Integer.toString(metaTags.getRefreshTime()) });
     }
     ParseData parseData = new ParseData(status, title, outlinks,
-                                        content.getMetadata(), metadata);
-    ParseResult parseResult = ParseResult.createParseResult(content.getUrl(), 
-                                                 new ParseImpl(text, parseData));
+        content.getMetadata(), metadata);
+    ParseResult parseResult = ParseResult.createParseResult(content.getUrl(),
+        new ParseImpl(text, parseData));
 
     // run filters on parse
-    ParseResult filteredParse = this.htmlParseFilters.filter(content, parseResult, 
-                                                             metaTags, root);
-    if (metaTags.getNoCache()) {             // not okay to cache
-      for (Map.Entry<org.apache.hadoop.io.Text, Parse> entry : filteredParse) 
-        entry.getValue().getData().getParseMeta().set(Nutch.CACHING_FORBIDDEN_KEY, 
-                                                      cachingPolicy);
+    ParseResult filteredParse = this.htmlParseFilters.filter(content,
+        parseResult, metaTags, root);
+    if (metaTags.getNoCache()) { // not okay to cache
+      for (Map.Entry<org.apache.hadoop.io.Text, Parse> entry : filteredParse)
+        entry.getValue().getData().getParseMeta()
+            .set(Nutch.CACHING_FORBIDDEN_KEY, cachingPolicy);
     }
     return filteredParse;
   }
@@ -209,9 +221,10 @@
   private DocumentFragment parse(InputSource input) throws Exception {
     if (parserImpl.equalsIgnoreCase("tagsoup"))
       return parseTagSoup(input);
-    else return parseNeko(input);
+    else
+      return parseNeko(input);
   }
-  
+
   private DocumentFragment parseTagSoup(InputSource input) throws Exception {
     HTMLDocumentImpl doc = new HTMLDocumentImpl();
     DocumentFragment frag = doc.createDocumentFragment();
@@ -220,27 +233,35 @@
     reader.setContentHandler(builder);
     reader.setFeature(org.ccil.cowan.tagsoup.Parser.ignoreBogonsFeature, true);
     reader.setFeature(org.ccil.cowan.tagsoup.Parser.bogonsEmptyFeature, false);
-    reader.setProperty("http://xml.org/sax/properties/lexical-handler", builder);
+    reader
+        .setProperty("http://xml.org/sax/properties/lexical-handler", builder);
     reader.parse(input);
     return frag;
   }
-  
+
   private DocumentFragment parseNeko(InputSource input) throws Exception {
     DOMFragmentParser parser = new DOMFragmentParser();
     try {
       parser.setFeature("http://cyberneko.org/html/features/augmentations",
+          true);
+      parser.setProperty(
+          "http://cyberneko.org/html/properties/default-encoding",
+          defaultCharEncoding);
+      parser
+          .setFeature(
+              "http://cyberneko.org/html/features/scanner/ignore-specified-charset",
               true);
-      parser.setProperty("http://cyberneko.org/html/properties/default-encoding",
-              defaultCharEncoding);
-      parser.setFeature("http://cyberneko.org/html/features/scanner/ignore-specified-charset",
-              true);
-      parser.setFeature("http://cyberneko.org/html/features/balance-tags/ignore-outside-content",
+      parser
+          .setFeature(
+              "http://cyberneko.org/html/features/balance-tags/ignore-outside-content",
               false);
-      parser.setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment",
-              true);
+      parser.setFeature(
+          "http://cyberneko.org/html/features/balance-tags/document-fragment",
+          true);
       parser.setFeature("http://cyberneko.org/html/features/report-errors",
-              LOG.isTraceEnabled());
-    } catch (SAXException e) {}
+          LOG.isTraceEnabled());
+    } catch (SAXException e) {
+    }
     // convert Document to DocumentFragment
     HTMLDocumentImpl doc = new HTMLDocumentImpl();
     doc.setErrorChecking(false);
@@ -248,38 +269,44 @@
     DocumentFragment frag = doc.createDocumentFragment();
     parser.parse(input, frag);
     res.appendChild(frag);
-    
+
     try {
-      while(true) {
+      while (true) {
         frag = doc.createDocumentFragment();
         parser.parse(input, frag);
-        if (!frag.hasChildNodes()) break;
+        if (!frag.hasChildNodes())
+          break;
         if (LOG.isInfoEnabled()) {
-          LOG.info(" - new frag, " + frag.getChildNodes().getLength() + " nodes.");
+          LOG.info(" - new frag, " + frag.getChildNodes().getLength()
+              + " nodes.");
         }
         res.appendChild(frag);
       }
-    } catch (Exception x) { x.printStackTrace(LogUtil.getWarnStream(LOG));};
+    } catch (Exception x) {
+      x.printStackTrace(LogUtil.getWarnStream(LOG));
+    }
+    ;
     return res;
   }
-  
+
   public static void main(String[] args) throws Exception {
-    //LOG.setLevel(Level.FINE);
+    // LOG.setLevel(Level.FINE);
     String name = args[0];
-    String url = "file:"+name;
+    String url = "file:" + name;
     File file = new File(name);
-    byte[] bytes = new byte[(int)file.length()];
+    byte[] bytes = new byte[(int) file.length()];
     DataInputStream in = new DataInputStream(new FileInputStream(file));
     in.readFully(bytes);
     Configuration conf = NutchConfiguration.create();
     HtmlParser parser = new HtmlParser();
     parser.setConf(conf);
     Parse parse = parser.getParse(
-            new Content(url, url, bytes, "text/html", new Metadata(), conf)).get(url);
-    System.out.println("data: "+parse.getData());
+        new Content(url, url, bytes, "text/html", new Metadata(), conf)).get(
+        url);
+    System.out.println("data: " + parse.getData());
 
-    System.out.println("text: "+parse.getText());
-    
+    System.out.println("text: " + parse.getText());
+
   }
 
   public void setConf(Configuration conf) {
Index: src/plugin/parse-html/src/java/org/apache/nutch/parse/html/XMLCharacterRecognizer.java
===================================================================
--- src/plugin/parse-html/src/java/org/apache/nutch/parse/html/XMLCharacterRecognizer.java	(revision 1188252)
+++ src/plugin/parse-html/src/java/org/apache/nutch/parse/html/XMLCharacterRecognizer.java	(working copy)
@@ -26,40 +26,42 @@
 package org.apache.nutch.parse.html;
 
 /**
- * Class used to verify whether the specified <var>ch</var> 
- * conforms to the XML 1.0 definition of whitespace. 
+ * Class used to verify whether the specified <var>ch</var> conforms to the XML
+ * 1.0 definition of whitespace.
  */
-public class XMLCharacterRecognizer
-{
+public class XMLCharacterRecognizer {
 
   /**
-   * Returns whether the specified <var>ch</var> conforms to the XML 1.0 definition
-   * of whitespace.  Refer to <A href="http://www.w3.org/TR/1998/REC-xml-19980210#NT-S">
-   * the definition of <CODE>S</CODE></A> for details.
-   * @param ch Character to check as XML whitespace.
+   * Returns whether the specified <var>ch</var> conforms to the XML 1.0
+   * definition of whitespace. Refer to <A
+   * href="http://www.w3.org/TR/1998/REC-xml-19980210#NT-S"> the definition of
+   * <CODE>S</CODE></A> for details.
+   * 
+   * @param ch
+   *          Character to check as XML whitespace.
    * @return =true if <var>ch</var> is XML whitespace; otherwise =false.
    */
-  public static boolean isWhiteSpace(char ch)
-  {
+  public static boolean isWhiteSpace(char ch) {
     return (ch == 0x20) || (ch == 0x09) || (ch == 0xD) || (ch == 0xA);
   }
 
   /**
    * Tell if the string is whitespace.
-   *
-   * @param ch Character array to check as XML whitespace.
-   * @param start Start index of characters in the array
-   * @param length Number of characters in the array 
-   * @return True if the characters in the array are 
-   * XML whitespace; otherwise, false.
+   * 
+   * @param ch
+   *          Character array to check as XML whitespace.
+   * @param start
+   *          Start index of characters in the array
+   * @param length
+   *          Number of characters in the array
+   * @return True if the characters in the array are XML whitespace; otherwise,
+   *         false.
    */
-  public static boolean isWhiteSpace(char ch[], int start, int length)
-  {
+  public static boolean isWhiteSpace(char ch[], int start, int length) {
 
     int end = start + length;
 
-    for (int s = start; s < end; s++)
-    {
+    for (int s = start; s < end; s++) {
       if (!isWhiteSpace(ch[s]))
         return false;
     }
@@ -69,39 +71,36 @@
 
   /**
    * Tell if the string is whitespace.
-   *
-   * @param buf StringBuffer to check as XML whitespace.
+   * 
+   * @param buf
+   *          StringBuffer to check as XML whitespace.
    * @return True if characters in buffer are XML whitespace, false otherwise
    */
-  public static boolean isWhiteSpace(StringBuffer buf)
-  {
+  public static boolean isWhiteSpace(StringBuffer buf) {
 
     int n = buf.length();
 
-    for (int i = 0; i < n; i++)
-    {
+    for (int i = 0; i < n; i++) {
       if (!isWhiteSpace(buf.charAt(i)))
         return false;
     }
 
     return true;
   }
-  
+
   /**
    * Tell if the string is whitespace.
-   *
-   * @param s String to check as XML whitespace.
+   * 
+   * @param s
+   *          String to check as XML whitespace.
    * @return True if characters in buffer are XML whitespace, false otherwise
    */
-  public static boolean isWhiteSpace(String s)
-  {
+  public static boolean isWhiteSpace(String s) {
 
-    if(null != s)
-    {
+    if (null != s) {
       int n = s.length();
-  
-      for (int i = 0; i < n; i++)
-      {
+
+      for (int i = 0; i < n; i++) {
         if (!isWhiteSpace(s.charAt(i)))
           return false;
       }
Index: src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMBuilder.java
===================================================================
--- src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMBuilder.java	(revision 1188252)
+++ src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMBuilder.java	(working copy)
@@ -39,136 +39,125 @@
 import org.xml.sax.ContentHandler;
 import org.xml.sax.Locator;
 import org.xml.sax.ext.LexicalHandler;
+
 /**
- * This class takes SAX events (in addition to some extra events
- * that SAX doesn't handle yet) and adds the result to a document
- * or document fragment.
+ * This class takes SAX events (in addition to some extra events that SAX
+ * doesn't handle yet) and adds the result to a document or document fragment.
  */
-public class DOMBuilder
-        implements ContentHandler, LexicalHandler
-{
+public class DOMBuilder implements ContentHandler, LexicalHandler {
 
-  /** Root document          */
+  /** Root document */
   public Document m_doc;
 
-  /** Current node           */
+  /** Current node */
   protected Node m_currentNode = null;
 
-  /** First node of document fragment or null if not a DocumentFragment     */
+  /** First node of document fragment or null if not a DocumentFragment */
   public DocumentFragment m_docFrag = null;
 
-  /** Vector of element nodes          */
+  /** Vector of element nodes */
   protected Stack m_elemStack = new Stack();
 
   /**
-   * DOMBuilder instance constructor... it will add the DOM nodes
-   * to the document fragment.
-   *
-   * @param doc Root document
-   * @param node Current node
+   * DOMBuilder instance constructor... it will add the DOM nodes to the
+   * document fragment.
+   * 
+   * @param doc
+   *          Root document
+   * @param node
+   *          Current node
    */
-  public DOMBuilder(Document doc, Node node)
-  {
+  public DOMBuilder(Document doc, Node node) {
     m_doc = doc;
     m_currentNode = node;
   }
 
   /**
-   * DOMBuilder instance constructor... it will add the DOM nodes
-   * to the document fragment.
-   *
-   * @param doc Root document
-   * @param docFrag Document fragment
+   * DOMBuilder instance constructor... it will add the DOM nodes to the
+   * document fragment.
+   * 
+   * @param doc
+   *          Root document
+   * @param docFrag
+   *          Document fragment
    */
-  public DOMBuilder(Document doc, DocumentFragment docFrag)
-  {
+  public DOMBuilder(Document doc, DocumentFragment docFrag) {
     m_doc = doc;
     m_docFrag = docFrag;
   }
 
   /**
-   * DOMBuilder instance constructor... it will add the DOM nodes
-   * to the document.
-   *
-   * @param doc Root document
+   * DOMBuilder instance constructor... it will add the DOM nodes to the
+   * document.
+   * 
+   * @param doc
+   *          Root document
    */
-  public DOMBuilder(Document doc)
-  {
+  public DOMBuilder(Document doc) {
     m_doc = doc;
   }
 
   /**
-   * Get the root node of the DOM being created.  This
-   * is either a Document or a DocumentFragment.
-   *
+   * Get the root node of the DOM being created. This is either a Document or a
+   * DocumentFragment.
+   * 
    * @return The root document or document fragment if not null
    */
-  public Node getRootNode()
-  {
+  public Node getRootNode() {
     return (null != m_docFrag) ? (Node) m_docFrag : (Node) m_doc;
   }
 
   /**
    * Get the node currently being processed.
-   *
+   * 
    * @return the current node being processed
    */
-  public Node getCurrentNode()
-  {
+  public Node getCurrentNode() {
     return m_currentNode;
   }
 
   /**
    * Return null since there is no Writer for this class.
-   *
+   * 
    * @return null
    */
-  public java.io.Writer getWriter()
-  {
+  public java.io.Writer getWriter() {
     return null;
   }
 
   /**
    * Append a node to the current container.
-   *
-   * @param newNode New node to append
+   * 
+   * @param newNode
+   *          New node to append
    */
-  protected void append(Node newNode) throws org.xml.sax.SAXException
-  {
+  protected void append(Node newNode) throws org.xml.sax.SAXException {
 
     Node currentNode = m_currentNode;
 
-    if (null != currentNode)
-    {
+    if (null != currentNode) {
       currentNode.appendChild(newNode);
 
       // System.out.println(newNode.getNodeName());
-    }
-    else if (null != m_docFrag)
-    {
+    } else if (null != m_docFrag) {
       m_docFrag.appendChild(newNode);
-    }
-    else
-    {
+    } else {
       boolean ok = true;
       short type = newNode.getNodeType();
 
-      if (type == Node.TEXT_NODE)
-      {
+      if (type == Node.TEXT_NODE) {
         String data = newNode.getNodeValue();
 
-        if ((null != data) && (data.trim().length() > 0))
-        {
-          throw new org.xml.sax.SAXException("Warning: can't output text before document element!  Ignoring...");
+        if ((null != data) && (data.trim().length() > 0)) {
+          throw new org.xml.sax.SAXException(
+              "Warning: can't output text before document element!  Ignoring...");
         }
 
         ok = false;
-      }
-      else if (type == Node.ELEMENT_NODE)
-      {
-        if (m_doc.getDocumentElement() != null)
-        {
-          throw new org.xml.sax.SAXException("Can't have more than one root on a DOM!");
+      } else if (type == Node.ELEMENT_NODE) {
+        if (m_doc.getDocumentElement() != null) {
+          throw new org.xml.sax.SAXException(
+              "Can't have more than one root on a DOM!");
         }
       }
 
@@ -179,132 +168,139 @@
 
   /**
    * Receive an object for locating the origin of SAX document events.
-   *
-   * <p>SAX parsers are strongly encouraged (though not absolutely
-   * required) to supply a locator: if it does so, it must supply
-   * the locator to the application by invoking this method before
-   * invoking any of the other methods in the ContentHandler
-   * interface.</p>
-   *
-   * <p>The locator allows the application to determine the end
-   * position of any document-related event, even if the parser is
-   * not reporting an error.  Typically, the application will
-   * use this information for reporting its own errors (such as
-   * character content that does not match an application's
-   * business rules).  The information returned by the locator
-   * is probably not sufficient for use with a search engine.</p>
-   *
-   * <p>Note that the locator will return correct information only
-   * during the invocation of the events in this interface.  The
-   * application should not attempt to use it at any other time.</p>
-   *
-   * @param locator An object that can return the location of
-   *                any SAX document event.
+   * 
+   * <p>
+   * SAX parsers are strongly encouraged (though not absolutely required) to
+   * supply a locator: if it does so, it must supply the locator to the
+   * application by invoking this method before invoking any of the other
+   * methods in the ContentHandler interface.
+   * </p>
+   * 
+   * <p>
+   * The locator allows the application to determine the end position of any
+   * document-related event, even if the parser is not reporting an error.
+   * Typically, the application will use this information for reporting its own
+   * errors (such as character content that does not match an application's
+   * business rules). The information returned by the locator is probably not
+   * sufficient for use with a search engine.
+   * </p>
+   * 
+   * <p>
+   * Note that the locator will return correct information only during the
+   * invocation of the events in this interface. The application should not
+   * attempt to use it at any other time.
+   * </p>
+   * 
+   * @param locator
+   *          An object that can return the location of any SAX document event.
    * @see org.xml.sax.Locator
    */
-  public void setDocumentLocator(Locator locator)
-  {
+  public void setDocumentLocator(Locator locator) {
 
     // No action for the moment.
   }
 
   /**
    * Receive notification of the beginning of a document.
-   *
-   * <p>The SAX parser will invoke this method only once, before any
-   * other methods in this interface or in DTDHandler (except for
-   * setDocumentLocator).</p>
+   * 
+   * <p>
+   * The SAX parser will invoke this method only once, before any other methods
+   * in this interface or in DTDHandler (except for setDocumentLocator).
+   * </p>
    */
-  public void startDocument() throws org.xml.sax.SAXException
-  {
+  public void startDocument() throws org.xml.sax.SAXException {
 
     // No action for the moment.
   }
 
   /**
    * Receive notification of the end of a document.
-   *
-   * <p>The SAX parser will invoke this method only once, and it will
-   * be the last method invoked during the parse.  The parser shall
-   * not invoke this method until it has either abandoned parsing
-   * (because of an unrecoverable error) or reached the end of
-   * input.</p>
+   * 
+   * <p>
+   * The SAX parser will invoke this method only once, and it will be the last
+   * method invoked during the parse. The parser shall not invoke this method
+   * until it has either abandoned parsing (because of an unrecoverable error)
+   * or reached the end of input.
+   * </p>
    */
-  public void endDocument() throws org.xml.sax.SAXException
-  {
+  public void endDocument() throws org.xml.sax.SAXException {
 
     // No action for the moment.
   }
 
   /**
    * Receive notification of the beginning of an element.
-   *
-   * <p>The Parser will invoke this method at the beginning of every
-   * element in the XML document; there will be a corresponding
-   * endElement() event for every startElement() event (even when the
-   * element is empty). All of the element's content will be
-   * reported, in order, before the corresponding endElement()
-   * event.</p>
-   *
-   * <p>If the element name has a namespace prefix, the prefix will
-   * still be attached.  Note that the attribute list provided will
-   * contain only attributes with explicit values (specified or
-   * defaulted): #IMPLIED attributes will be omitted.</p>
-   *
-   *
-   * @param ns The namespace of the node
-   * @param localName The local part of the qualified name
-   * @param name The element name.
-   * @param atts The attributes attached to the element, if any.
+   * 
+   * <p>
+   * The Parser will invoke this method at the beginning of every element in the
+   * XML document; there will be a corresponding endElement() event for every
+   * startElement() event (even when the element is empty). All of the element's
+   * content will be reported, in order, before the corresponding endElement()
+   * event.
+   * </p>
+   * 
+   * <p>
+   * If the element name has a namespace prefix, the prefix will still be
+   * attached. Note that the attribute list provided will contain only
+   * attributes with explicit values (specified or defaulted): #IMPLIED
+   * attributes will be omitted.
+   * </p>
+   * 
+   * 
+   * @param ns
+   *          The namespace of the node
+   * @param localName
+   *          The local part of the qualified name
+   * @param name
+   *          The element name.
+   * @param atts
+   *          The attributes attached to the element, if any.
    * @see #endElement
    * @see org.xml.sax.Attributes
    */
-  public void startElement(
-          String ns, String localName, String name, Attributes atts)
-            throws org.xml.sax.SAXException
-  {
+  public void startElement(String ns, String localName, String name,
+      Attributes atts) throws org.xml.sax.SAXException {
 
     Element elem;
 
-	// Note that the namespace-aware call must be used to correctly
-	// construct a Level 2 DOM, even for non-namespaced nodes.
+    // Note that the namespace-aware call must be used to correctly
+    // construct a Level 2 DOM, even for non-namespaced nodes.
     if ((null == ns) || (ns.length() == 0))
-      elem = m_doc.createElementNS(null,name);
+      elem = m_doc.createElementNS(null, name);
     else
       elem = m_doc.createElementNS(ns, name);
 
     append(elem);
 
-    try
-    {
+    try {
       int nAtts = atts.getLength();
 
-      if (0 != nAtts)
-      {
-        for (int i = 0; i < nAtts; i++)
-        {
+      if (0 != nAtts) {
+        for (int i = 0; i < nAtts; i++) {
 
-          //System.out.println("type " + atts.getType(i) + " name " + atts.getLocalName(i) );
+          // System.out.println("type " + atts.getType(i) + " name " +
+          // atts.getLocalName(i) );
           // First handle a possible ID attribute
           if (atts.getType(i).equalsIgnoreCase("ID"))
             setIDAttribute(atts.getValue(i), elem);
 
           String attrNS = atts.getURI(i);
 
-          if("".equals(attrNS))
+          if ("".equals(attrNS))
             attrNS = null; // DOM represents no-namespace as null
 
           // System.out.println("attrNS: "+attrNS+", localName: "+atts.getQName(i)
-          //                   +", qname: "+atts.getQName(i)+", value: "+atts.getValue(i));
+          // +", qname: "+atts.getQName(i)+", value: "+atts.getValue(i));
           // Crimson won't let us set an xmlns: attribute on the DOM.
           String attrQName = atts.getQName(i);
 
-          // In SAX, xmlns: attributes have an empty namespace, while in DOM they should have the xmlns namespace
+          // In SAX, xmlns: attributes have an empty namespace, while
+          // in DOM they should have the xmlns namespace
           if (attrQName.startsWith("xmlns:"))
             attrNS = "http://www.w3.org/2000/xmlns/";
 
           // ALWAYS use the DOM Level 2 call!
-          elem.setAttributeNS(attrNS,attrQName, atts.getValue(i));
+          elem.setAttributeNS(attrNS, attrQName, atts.getValue(i));
         }
       }
 
@@ -315,9 +311,7 @@
       m_currentNode = elem;
 
       // append(elem);
-    }
-    catch(java.lang.Exception de)
-    {
+    } catch (java.lang.Exception de) {
       // de.printStackTrace();
       throw new org.xml.sax.SAXException(de);
     }
@@ -325,74 +319,87 @@
   }
 
   /**
-
-
-
+   * 
+   * 
+   * 
    * Receive notification of the end of an element.
-   *
-   * <p>The SAX parser will invoke this method at the end of every
-   * element in the XML document; there will be a corresponding
-   * startElement() event for every endElement() event (even when the
-   * element is empty).</p>
-   *
-   * <p>If the element name has a namespace prefix, the prefix will
-   * still be attached to the name.</p>
-   *
-   *
-   * @param ns the namespace of the element
-   * @param localName The local part of the qualified name of the element
-   * @param name The element name
+   * 
+   * <p>
+   * The SAX parser will invoke this method at the end of every element in the
+   * XML document; there will be a corresponding startElement() event for every
+   * endElement() event (even when the element is empty).
+   * </p>
+   * 
+   * <p>
+   * If the element name has a namespace prefix, the prefix will still be
+   * attached to the name.
+   * </p>
+   * 
+   * 
+   * @param ns
+   *          the namespace of the element
+   * @param localName
+   *          The local part of the qualified name of the element
+   * @param name
+   *          The element name
    */
   public void endElement(String ns, String localName, String name)
-          throws org.xml.sax.SAXException
-  {
+      throws org.xml.sax.SAXException {
     m_elemStack.pop();
-    m_currentNode = m_elemStack.isEmpty() ? null : (Node)m_elemStack.peek();
+    m_currentNode = m_elemStack.isEmpty() ? null : (Node) m_elemStack.peek();
   }
 
   /**
    * Set an ID string to node association in the ID table.
-   *
-   * @param id The ID string.
-   * @param elem The associated ID.
+   * 
+   * @param id
+   *          The ID string.
+   * @param elem
+   *          The associated ID.
    */
-  public void setIDAttribute(String id, Element elem)
-  {
+  public void setIDAttribute(String id, Element elem) {
 
     // Do nothing. This method is meant to be overiden.
   }
 
   /**
    * Receive notification of character data.
-   *
-   * <p>The Parser will call this method to report each chunk of
-   * character data.  SAX parsers may return all contiguous character
-   * data in a single chunk, or they may split it into several
-   * chunks; however, all of the characters in any single event
-   * must come from the same external entity, so that the Locator
-   * provides useful information.</p>
-   *
-   * <p>The application must not attempt to read from the array
-   * outside of the specified range.</p>
-   *
-   * <p>Note that some parsers will report whitespace using the
-   * ignorableWhitespace() method rather than this one (validating
-   * parsers must do so).</p>
-   *
-   * @param ch The characters from the XML document.
-   * @param start The start position in the array.
-   * @param length The number of characters to read from the array.
+   * 
+   * <p>
+   * The Parser will call this method to report each chunk of character data.
+   * SAX parsers may return all contiguous character data in a single chunk, or
+   * they may split it into several chunks; however, all of the characters in
+   * any single event must come from the same external entity, so that the
+   * Locator provides useful information.
+   * </p>
+   * 
+   * <p>
+   * The application must not attempt to read from the array outside of the
+   * specified range.
+   * </p>
+   * 
+   * <p>
+   * Note that some parsers will report whitespace using the
+   * ignorableWhitespace() method rather than this one (validating parsers must
+   * do so).
+   * </p>
+   * 
+   * @param ch
+   *          The characters from the XML document.
+   * @param start
+   *          The start position in the array.
+   * @param length
+   *          The number of characters to read from the array.
    * @see #ignorableWhitespace
    * @see org.xml.sax.Locator
    */
-  public void characters(char ch[], int start, int length) throws org.xml.sax.SAXException
-  {
-    if(isOutsideDocElem()
-       && XMLCharacterRecognizer.isWhiteSpace(ch, start, length))
-      return;  // avoid DOM006 Hierarchy request error
+  public void characters(char ch[], int start, int length)
+      throws org.xml.sax.SAXException {
+    if (isOutsideDocElem()
+        && XMLCharacterRecognizer.isWhiteSpace(ch, start, length))
+      return; // avoid DOM006 Hierarchy request error
 
-    if (m_inCData)
-    {
+    if (m_inCData) {
       cdata(ch, start, length);
 
       return;
@@ -400,57 +407,55 @@
 
     String s = new String(ch, start, length);
     Node childNode;
-    childNode =  m_currentNode != null ? m_currentNode.getLastChild(): null;
-    if( childNode != null && childNode.getNodeType() == Node.TEXT_NODE ){
-       ((Text)childNode).appendData(s);
+    childNode = m_currentNode != null ? m_currentNode.getLastChild() : null;
+    if (childNode != null && childNode.getNodeType() == Node.TEXT_NODE) {
+      ((Text) childNode).appendData(s);
+    } else {
+      Text text = m_doc.createTextNode(s);
+      append(text);
     }
-    else{
-       Text text = m_doc.createTextNode(s);
-       append(text);
-    }
   }
 
   /**
-   * If available, when the disable-output-escaping attribute is used,
-   * output raw text without escaping.  A PI will be inserted in front
-   * of the node with the name "lotusxsl-next-is-raw" and a value of
-   * "formatter-to-dom".
-   *
-   * @param ch Array containing the characters
-   * @param start Index to start of characters in the array
-   * @param length Number of characters in the array
+   * If available, when the disable-output-escaping attribute is used, output
+   * raw text without escaping. A PI will be inserted in front of the node with
+   * the name "lotusxsl-next-is-raw" and a value of "formatter-to-dom".
+   * 
+   * @param ch
+   *          Array containing the characters
+   * @param start
+   *          Index to start of characters in the array
+   * @param length
+   *          Number of characters in the array
    */
   public void charactersRaw(char ch[], int start, int length)
-          throws org.xml.sax.SAXException
-  {
-    if(isOutsideDocElem()
-       && XMLCharacterRecognizer.isWhiteSpace(ch, start, length))
-      return;  // avoid DOM006 Hierarchy request error
+      throws org.xml.sax.SAXException {
+    if (isOutsideDocElem()
+        && XMLCharacterRecognizer.isWhiteSpace(ch, start, length))
+      return; // avoid DOM006 Hierarchy request error
 
-
     String s = new String(ch, start, length);
 
     append(m_doc.createProcessingInstruction("xslt-next-is-raw",
-                                             "formatter-to-dom"));
+        "formatter-to-dom"));
     append(m_doc.createTextNode(s));
   }
 
   /**
    * Report the beginning of an entity.
-   *
-   * The start and end of the document entity are not reported.
-   * The start and end of the external DTD subset are reported
-   * using the pseudo-name "[dtd]".  All other events must be
-   * properly nested within start/end entity events.
-   *
-   * @param name The name of the entity.  If it is a parameter
-   *        entity, the name will begin with '%'.
+   * 
+   * The start and end of the document entity are not reported. The start and
+   * end of the external DTD subset are reported using the pseudo-name "[dtd]".
+   * All other events must be properly nested within start/end entity events.
+   * 
+   * @param name
+   *          The name of the entity. If it is a parameter entity, the name will
+   *          begin with '%'.
    * @see #endEntity
    * @see org.xml.sax.ext.DeclHandler#internalEntityDecl
    * @see org.xml.sax.ext.DeclHandler#externalEntityDecl
    */
-  public void startEntity(String name) throws org.xml.sax.SAXException
-  {
+  public void startEntity(String name) throws org.xml.sax.SAXException {
 
     // Almost certainly the wrong behavior...
     // entityReference(name);
@@ -458,49 +463,58 @@
 
   /**
    * Report the end of an entity.
-   *
-   * @param name The name of the entity that is ending.
+   * 
+   * @param name
+   *          The name of the entity that is ending.
    * @see #startEntity
    */
-  public void endEntity(String name) throws org.xml.sax.SAXException{}
+  public void endEntity(String name) throws org.xml.sax.SAXException {
+  }
 
   /**
    * Receive notivication of a entityReference.
-   *
-   * @param name name of the entity reference
+   * 
+   * @param name
+   *          name of the entity reference
    */
-  public void entityReference(String name) throws org.xml.sax.SAXException
-  {
+  public void entityReference(String name) throws org.xml.sax.SAXException {
     append(m_doc.createEntityReference(name));
   }
 
   /**
    * Receive notification of ignorable whitespace in element content.
-   *
-   * <p>Validating Parsers must use this method to report each chunk
-   * of ignorable whitespace (see the W3C XML 1.0 recommendation,
-   * section 2.10): non-validating parsers may also use this method
-   * if they are capable of parsing and using content models.</p>
-   *
-   * <p>SAX parsers may return all contiguous whitespace in a single
-   * chunk, or they may split it into several chunks; however, all of
-   * the characters in any single event must come from the same
-   * external entity, so that the Locator provides useful
-   * information.</p>
-   *
-   * <p>The application must not attempt to read from the array
-   * outside of the specified range.</p>
-   *
-   * @param ch The characters from the XML document.
-   * @param start The start position in the array.
-   * @param length The number of characters to read from the array.
+   * 
+   * <p>
+   * Validating Parsers must use this method to report each chunk of ignorable
+   * whitespace (see the W3C XML 1.0 recommendation, section 2.10):
+   * non-validating parsers may also use this method if they are capable of
+   * parsing and using content models.
+   * </p>
+   * 
+   * <p>
+   * SAX parsers may return all contiguous whitespace in a single chunk, or they
+   * may split it into several chunks; however, all of the characters in any
+   * single event must come from the same external entity, so that the Locator
+   * provides useful information.
+   * </p>
+   * 
+   * <p>
+   * The application must not attempt to read from the array outside of the
+   * specified range.
+   * </p>
+   * 
+   * @param ch
+   *          The characters from the XML document.
+   * @param start
+   *          The start position in the array.
+   * @param length
+   *          The number of characters to read from the array.
    * @see #characters
    */
   public void ignorableWhitespace(char ch[], int start, int length)
-          throws org.xml.sax.SAXException
-  {
-    if(isOutsideDocElem())
-      return;  // avoid DOM006 Hierarchy request error
+      throws org.xml.sax.SAXException {
+    if (isOutsideDocElem())
+      return; // avoid DOM006 Hierarchy request error
 
     String s = new String(ch, start, length);
 
@@ -509,232 +523,244 @@
 
   /**
    * Tell if the current node is outside the document element.
-   *
+   * 
    * @return true if the current node is outside the document element.
    */
-   private boolean isOutsideDocElem()
-   {
-      return (null == m_docFrag) && m_elemStack.size() == 0 && (null == m_currentNode || m_currentNode.getNodeType() == Node.DOCUMENT_NODE);
-   }
+  private boolean isOutsideDocElem() {
+    return (null == m_docFrag)
+        && m_elemStack.size() == 0
+        && (null == m_currentNode || m_currentNode.getNodeType() == Node.DOCUMENT_NODE);
+  }
 
   /**
    * Receive notification of a processing instruction.
-   *
-   * <p>The Parser will invoke this method once for each processing
-   * instruction found: note that processing instructions may occur
-   * before or after the main document element.</p>
-   *
-   * <p>A SAX parser should never report an XML declaration (XML 1.0,
-   * section 2.8) or a text declaration (XML 1.0, section 4.3.1)
-   * using this method.</p>
-   *
-   * @param target The processing instruction target.
-   * @param data The processing instruction data, or null if
-   *        none was supplied.
+   * 
+   * <p>
+   * The Parser will invoke this method once for each processing instruction
+   * found: note that processing instructions may occur before or after the main
+   * document element.
+   * </p>
+   * 
+   * <p>
+   * A SAX parser should never report an XML declaration (XML 1.0, section 2.8)
+   * or a text declaration (XML 1.0, section 4.3.1) using this method.
+   * </p>
+   * 
+   * @param target
+   *          The processing instruction target.
+   * @param data
+   *          The processing instruction data, or null if none was supplied.
    */
   public void processingInstruction(String target, String data)
-          throws org.xml.sax.SAXException
-  {
+      throws org.xml.sax.SAXException {
     append(m_doc.createProcessingInstruction(target, data));
   }
 
   /**
    * Report an XML comment anywhere in the document.
-   *
-   * This callback will be used for comments inside or outside the
-   * document element, including comments in the external DTD
-   * subset (if read).
-   *
-   * @param ch An array holding the characters in the comment.
-   * @param start The starting position in the array.
-   * @param length The number of characters to use from the array.
+   * 
+   * This callback will be used for comments inside or outside the document
+   * element, including comments in the external DTD subset (if read).
+   * 
+   * @param ch
+   *          An array holding the characters in the comment.
+   * @param start
+   *          The starting position in the array.
+   * @param length
+   *          The number of characters to use from the array.
    */
-  public void comment(char ch[], int start, int length) throws org.xml.sax.SAXException
-  {
+  public void comment(char ch[], int start, int length)
+      throws org.xml.sax.SAXException {
     // tagsoup sometimes submits invalid values here
-    if (ch == null || start < 0 || length >= (ch.length - start) || length < 0) return;
+    if (ch == null || start < 0 || length >= (ch.length - start) || length < 0)
+      return;
     append(m_doc.createComment(new String(ch, start, length)));
   }
 
-  /** Flag indicating that we are processing a CData section          */
+  /** Flag indicating that we are processing a CData section */
   protected boolean m_inCData = false;
 
   /**
    * Report the start of a CDATA section.
-   *
+   * 
    * @see #endCDATA
    */
-  public void startCDATA() throws org.xml.sax.SAXException
-  {
+  public void startCDATA() throws org.xml.sax.SAXException {
     m_inCData = true;
     append(m_doc.createCDATASection(""));
   }
 
   /**
    * Report the end of a CDATA section.
-   *
+   * 
    * @see #startCDATA
    */
-  public void endCDATA() throws org.xml.sax.SAXException
-  {
+  public void endCDATA() throws org.xml.sax.SAXException {
     m_inCData = false;
   }
 
   /**
    * Receive notification of cdata.
-   *
-   * <p>The Parser will call this method to report each chunk of
-   * character data.  SAX parsers may return all contiguous character
-   * data in a single chunk, or they may split it into several
-   * chunks; however, all of the characters in any single event
-   * must come from the same external entity, so that the Locator
-   * provides useful information.</p>
-   *
-   * <p>The application must not attempt to read from the array
-   * outside of the specified range.</p>
-   *
-   * <p>Note that some parsers will report whitespace using the
-   * ignorableWhitespace() method rather than this one (validating
-   * parsers must do so).</p>
-   *
-   * @param ch The characters from the XML document.
-   * @param start The start position in the array.
-   * @param length The number of characters to read from the array.
+   * 
+   * <p>
+   * The Parser will call this method to report each chunk of character data.
+   * SAX parsers may return all contiguous character data in a single chunk, or
+   * they may split it into several chunks; however, all of the characters in
+   * any single event must come from the same external entity, so that the
+   * Locator provides useful information.
+   * </p>
+   * 
+   * <p>
+   * The application must not attempt to read from the array outside of the
+   * specified range.
+   * </p>
+   * 
+   * <p>
+   * Note that some parsers will report whitespace using the
+   * ignorableWhitespace() method rather than this one (validating parsers must
+   * do so).
+   * </p>
+   * 
+   * @param ch
+   *          The characters from the XML document.
+   * @param start
+   *          The start position in the array.
+   * @param length
+   *          The number of characters to read from the array.
    * @see #ignorableWhitespace
    * @see org.xml.sax.Locator
    */
-  public void cdata(char ch[], int start, int length) throws org.xml.sax.SAXException
-  {
-    if(isOutsideDocElem()
-       && XMLCharacterRecognizer.isWhiteSpace(ch, start, length))
-      return;  // avoid DOM006 Hierarchy request error
+  public void cdata(char ch[], int start, int length)
+      throws org.xml.sax.SAXException {
+    if (isOutsideDocElem()
+        && XMLCharacterRecognizer.isWhiteSpace(ch, start, length))
+      return; // avoid DOM006 Hierarchy request error
 
     String s = new String(ch, start, length);
 
-    // XXX ab@apache.org: modified from the original, to accomodate TagSoup. 
+    // XXX ab@apache.org: modified from the original, to accomodate TagSoup.
     Node n = m_currentNode.getLastChild();
     if (n instanceof CDATASection)
-      ((CDATASection)n).appendData(s);
+      ((CDATASection) n).appendData(s);
     else if (n instanceof Comment)
-      ((Comment)n).appendData(s);
+      ((Comment) n).appendData(s);
   }
 
   /**
    * Report the start of DTD declarations, if any.
-   *
-   * Any declarations are assumed to be in the internal subset
-   * unless otherwise indicated.
-   *
-   * @param name The document type name.
-   * @param publicId The declared public identifier for the
-   *        external DTD subset, or null if none was declared.
-   * @param systemId The declared system identifier for the
-   *        external DTD subset, or null if none was declared.
+   * 
+   * Any declarations are assumed to be in the internal subset unless otherwise
+   * indicated.
+   * 
+   * @param name
+   *          The document type name.
+   * @param publicId
+   *          The declared public identifier for the external DTD subset, or
+   *          null if none was declared.
+   * @param systemId
+   *          The declared system identifier for the external DTD subset, or
+   *          null if none was declared.
    * @see #endDTD
    * @see #startEntity
    */
   public void startDTD(String name, String publicId, String systemId)
-          throws org.xml.sax.SAXException
-  {
+      throws org.xml.sax.SAXException {
 
     // Do nothing for now.
   }
 
   /**
    * Report the end of DTD declarations.
-   *
+   * 
    * @see #startDTD
    */
-  public void endDTD() throws org.xml.sax.SAXException
-  {
+  public void endDTD() throws org.xml.sax.SAXException {
 
     // Do nothing for now.
   }
 
   /**
    * Begin the scope of a prefix-URI Namespace mapping.
-   *
-   * <p>The information from this event is not necessary for
-   * normal Namespace processing: the SAX XML reader will
-   * automatically replace prefixes for element and attribute
-   * names when the http://xml.org/sax/features/namespaces
-   * feature is true (the default).</p>
-   *
-   * <p>There are cases, however, when applications need to
-   * use prefixes in character data or in attribute values,
-   * where they cannot safely be expanded automatically; the
-   * start/endPrefixMapping event supplies the information
-   * to the application to expand prefixes in those contexts
-   * itself, if necessary.</p>
-   *
-   * <p>Note that start/endPrefixMapping events are not
-   * guaranteed to be properly nested relative to each-other:
-   * all startPrefixMapping events will occur before the
-   * corresponding startElement event, and all endPrefixMapping
-   * events will occur after the corresponding endElement event,
-   * but their order is not guaranteed.</p>
-   *
-   * @param prefix The Namespace prefix being declared.
-   * @param uri The Namespace URI the prefix is mapped to.
+   * 
+   * <p>
+   * The information from this event is not necessary for normal Namespace
+   * processing: the SAX XML reader will automatically replace prefixes for
+   * element and attribute names when the http://xml.org/sax/features/namespaces
+   * feature is true (the default).
+   * </p>
+   * 
+   * <p>
+   * There are cases, however, when applications need to use prefixes in
+   * character data or in attribute values, where they cannot safely be expanded
+   * automatically; the start/endPrefixMapping event supplies the information to
+   * the application to expand prefixes in those contexts itself, if necessary.
+   * </p>
+   * 
+   * <p>
+   * Note that start/endPrefixMapping events are not guaranteed to be properly
+   * nested relative to each-other: all startPrefixMapping events will occur
+   * before the corresponding startElement event, and all endPrefixMapping
+   * events will occur after the corresponding endElement event, but their order
+   * is not guaranteed.
+   * </p>
+   * 
+   * @param prefix
+   *          The Namespace prefix being declared.
+   * @param uri
+   *          The Namespace URI the prefix is mapped to.
    * @see #endPrefixMapping
    * @see #startElement
    */
   public void startPrefixMapping(String prefix, String uri)
-          throws org.xml.sax.SAXException
-  {
+      throws org.xml.sax.SAXException {
 
     /*
-    // Not sure if this is needed or wanted
-    // Also, it fails in the stree.
-    if((null != m_currentNode)
-       && (m_currentNode.getNodeType() == Node.ELEMENT_NODE))
-    {
-      String qname;
-      if(((null != prefix) && (prefix.length() == 0))
-         || (null == prefix))
-        qname = "xmlns";
-      else
-        qname = "xmlns:"+prefix;
-
-      Element elem = (Element)m_currentNode;
-      String val = elem.getAttribute(qname); // Obsolete, should be DOM2...?
-      if(val == null)
-      {
-        elem.setAttributeNS("http://www.w3.org/XML/1998/namespace",
-                            qname, uri);
-      }
-    }
-    */
+     * // Not sure if this is needed or wanted // Also, it fails in the stree.
+     * if((null != m_currentNode) && (m_currentNode.getNodeType() ==
+     * Node.ELEMENT_NODE)) { String qname; if(((null != prefix) &&
+     * (prefix.length() == 0)) || (null == prefix)) qname = "xmlns"; else qname
+     * = "xmlns:"+prefix;
+     * 
+     * Element elem = (Element)m_currentNode; String val =
+     * elem.getAttribute(qname); // Obsolete, should be DOM2...? if(val == null)
+     * { elem.setAttributeNS("http://www.w3.org/XML/1998/namespace", qname,
+     * uri); } }
+     */
   }
 
   /**
    * End the scope of a prefix-URI mapping.
-   *
-   * <p>See startPrefixMapping for details.  This event will
-   * always occur after the corresponding endElement event,
-   * but the order of endPrefixMapping events is not otherwise
-   * guaranteed.</p>
-   *
-   * @param prefix The prefix that was being mapping.
+   * 
+   * <p>
+   * See startPrefixMapping for details. This event will always occur after the
+   * corresponding endElement event, but the order of endPrefixMapping events is
+   * not otherwise guaranteed.
+   * </p>
+   * 
+   * @param prefix
+   *          The prefix that was being mapping.
    * @see #startPrefixMapping
    * @see #endElement
    */
-  public void endPrefixMapping(String prefix) throws org.xml.sax.SAXException{}
+  public void endPrefixMapping(String prefix) throws org.xml.sax.SAXException {
+  }
 
   /**
    * Receive notification of a skipped entity.
-   *
-   * <p>The Parser will invoke this method once for each entity
-   * skipped.  Non-validating processors may skip entities if they
-   * have not seen the declarations (because, for example, the
-   * entity was declared in an external DTD subset).  All processors
-   * may skip external entities, depending on the values of the
-   * http://xml.org/sax/features/external-general-entities and the
-   * http://xml.org/sax/features/external-parameter-entities
-   * properties.</p>
-   *
-   * @param name The name of the skipped entity.  If it is a
-   *        parameter entity, the name will begin with '%'.
+   * 
+   * <p>
+   * The Parser will invoke this method once for each entity skipped.
+   * Non-validating processors may skip entities if they have not seen the
+   * declarations (because, for example, the entity was declared in an external
+   * DTD subset). All processors may skip external entities, depending on the
+   * values of the http://xml.org/sax/features/external-general-entities and the
+   * http://xml.org/sax/features/external-parameter-entities properties.
+   * </p>
+   * 
+   * @param name
+   *          The name of the skipped entity. If it is a parameter entity, the
+   *          name will begin with '%'.
    */
-  public void skippedEntity(String name) throws org.xml.sax.SAXException{}
+  public void skippedEntity(String name) throws org.xml.sax.SAXException {
+  }
 }
Index: src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMContentUtils.java
===================================================================
--- src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMContentUtils.java	(revision 1188252)
+++ src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMContentUtils.java	(working copy)
@@ -34,35 +34,35 @@
 /**
  * A collection of methods for extracting content from DOM trees.
  * 
- * This class holds a few utility methods for pulling content out of 
- * DOM nodes, such as getOutlinks, getText, etc.
- *
+ * This class holds a few utility methods for pulling content out of DOM nodes,
+ * such as getOutlinks, getText, etc.
+ * 
  */
 public class DOMContentUtils {
 
   public static class LinkParams {
     public String elName;
     public String attrName;
-      public int childLen;
-      
-      public LinkParams(String elName, String attrName, int childLen) {
-          this.elName = elName;
-          this.attrName = attrName;
-          this.childLen = childLen;
-      }
-      
-      public String toString() {
-          return "LP[el=" + elName + ",attr=" + attrName + ",len=" + childLen + "]";
-      }
+    public int childLen;
+
+    public LinkParams(String elName, String attrName, int childLen) {
+      this.elName = elName;
+      this.attrName = attrName;
+      this.childLen = childLen;
+    }
+
+    public String toString() {
+      return "LP[el=" + elName + ",attr=" + attrName + ",len=" + childLen + "]";
+    }
   }
-  
-  private HashMap<String,LinkParams> linkParams = new HashMap<String,LinkParams>();
+
+  private HashMap<String, LinkParams> linkParams = new HashMap<String, LinkParams>();
   private Configuration conf;
-  
+
   public DOMContentUtils(Configuration conf) {
     setConf(conf);
   }
-  
+
   public void setConf(Configuration conf) {
     // forceTags is used to override configurable tag ignoring, later on
     Collection<String> forceTags = new ArrayList<String>(1);
@@ -84,59 +84,57 @@
 
     // remove unwanted link tags from the linkParams map
     String[] ignoreTags = conf.getStrings("parser.html.outlinks.ignore_tags");
-    for ( int i = 0 ; ignoreTags != null && i < ignoreTags.length ; i++ ) {
-      if ( ! forceTags.contains(ignoreTags[i]) )
+    for (int i = 0; ignoreTags != null && i < ignoreTags.length; i++) {
+      if (!forceTags.contains(ignoreTags[i]))
         linkParams.remove(ignoreTags[i]);
     }
   }
-  
+
   /**
-   * This method takes a {@link StringBuffer} and a DOM {@link Node},
-   * and will append all the content text found beneath the DOM node to 
-   * the <code>StringBuffer</code>.
-   *
+   * This method takes a {@link StringBuffer} and a DOM {@link Node}, and will
+   * append all the content text found beneath the DOM node to the
+   * <code>StringBuffer</code>.
+   * 
    * <p>
-   *
-   * If <code>abortOnNestedAnchors</code> is true, DOM traversal will
-   * be aborted and the <code>StringBuffer</code> will not contain
-   * any text encountered after a nested anchor is found.
    * 
+   * If <code>abortOnNestedAnchors</code> is true, DOM traversal will be aborted
+   * and the <code>StringBuffer</code> will not contain any text encountered
+   * after a nested anchor is found.
+   * 
    * <p>
-   *
+   * 
    * @return true if nested anchors were found
    */
-  public boolean getText(StringBuffer sb, Node node, 
-                                      boolean abortOnNestedAnchors) {
+  public boolean getText(StringBuffer sb, Node node,
+      boolean abortOnNestedAnchors) {
     if (getTextHelper(sb, node, abortOnNestedAnchors, 0)) {
       return true;
-    } 
+    }
     return false;
   }
 
-
   /**
-   * This is a convinience method, equivalent to {@link
-   * #getText(StringBuffer,Node,boolean) getText(sb, node, false)}.
+   * This is a convinience method, equivalent to
+   * {@link #getText(StringBuffer,Node,boolean) getText(sb, node, false)}.
    * 
    */
   public void getText(StringBuffer sb, Node node) {
     getText(sb, node, false);
   }
 
-  // returns true if abortOnNestedAnchors is true and we find nested 
+  // returns true if abortOnNestedAnchors is true and we find nested
   // anchors
-  private boolean getTextHelper(StringBuffer sb, Node node, 
-                                             boolean abortOnNestedAnchors,
-                                             int anchorDepth) {
+  private boolean getTextHelper(StringBuffer sb, Node node,
+      boolean abortOnNestedAnchors, int anchorDepth) {
     boolean abort = false;
     NodeWalker walker = new NodeWalker(node);
-    
+
     while (walker.hasNext()) {
-    
+
       Node currentNode = walker.nextNode();
       String nodeName = currentNode.getNodeName();
       short nodeType = currentNode.getNodeType();
-      
+
       if ("script".equalsIgnoreCase(nodeName)) {
         walker.skipChildren();
       }
@@ -148,7 +146,7 @@
         if (anchorDepth > 1) {
           abort = true;
           break;
-        }        
+        }
       }
       if (nodeType == Node.COMMENT_NODE) {
         walker.skipChildren();
@@ -159,44 +157,45 @@
         text = text.replaceAll("\\s+", " ");
         text = text.trim();
         if (text.length() > 0) {
-          if (sb.length() > 0) sb.append(' ');
-        	sb.append(text);
+          if (sb.length() > 0)
+            sb.append(' ');
+          sb.append(text);
         }
       }
     }
-    
+
     return abort;
   }
 
   /**
-   * This method takes a {@link StringBuffer} and a DOM {@link Node},
-   * and will append the content text found beneath the first
-   * <code>title</code> node to the <code>StringBuffer</code>.
-   *
+   * This method takes a {@link StringBuffer} and a DOM {@link Node}, and will
+   * append the content text found beneath the first <code>title</code> node to
+   * the <code>StringBuffer</code>.
+   * 
    * @return true if a title node was found, false otherwise
    */
   public boolean getTitle(StringBuffer sb, Node node) {
-    
+
     NodeWalker walker = new NodeWalker(node);
-    
+
     while (walker.hasNext()) {
-  
+
       Node currentNode = walker.nextNode();
       String nodeName = currentNode.getNodeName();
       short nodeType = currentNode.getNodeType();
-      
+
       if ("body".equalsIgnoreCase(nodeName)) { // stop after HEAD
         return false;
       }
-  
+
       if (nodeType == Node.ELEMENT_NODE) {
         if ("title".equalsIgnoreCase(nodeName)) {
           getText(sb, currentNode);
           return true;
         }
       }
-    }      
-    
+    }
+
     return false;
   }
 
@@ -204,28 +203,29 @@
   public URL getBase(Node node) {
 
     NodeWalker walker = new NodeWalker(node);
-    
+
     while (walker.hasNext()) {
-  
+
       Node currentNode = walker.nextNode();
       String nodeName = currentNode.getNodeName();
       short nodeType = currentNode.getNodeType();
-      
+
       // is this node a BASE tag?
       if (nodeType == Node.ELEMENT_NODE) {
-  
+
         if ("body".equalsIgnoreCase(nodeName)) { // stop after HEAD
           return null;
         }
-  
+
         if ("base".equalsIgnoreCase(nodeName)) {
           NamedNodeMap attrs = currentNode.getAttributes();
-          for (int i= 0; i < attrs.getLength(); i++ ) {
+          for (int i = 0; i < attrs.getLength(); i++) {
             Node attr = attrs.item(i);
             if ("href".equalsIgnoreCase(attr.getNodeName())) {
               try {
                 return new URL(attr.getNodeValue());
-              } catch (MalformedURLException e) {}
+              } catch (MalformedURLException e) {
+              }
             }
           }
         }
@@ -236,10 +236,9 @@
     return null;
   }
 
-
   private boolean hasOnlyWhiteSpace(Node node) {
-    String val= node.getNodeValue();
-    for (int i= 0; i < val.length(); i++) {
+    String val = node.getNodeValue();
+    for (int i = 0; i < val.length(); i++) {
       if (!Character.isWhitespace(val.charAt(i)))
         return false;
     }
@@ -248,50 +247,49 @@
 
   // this only covers a few cases of empty links that are symptomatic
   // of nekohtml's DOM-fixup process...
-  private boolean shouldThrowAwayLink(Node node, NodeList children, 
-                                              int childLen, LinkParams params) {
+  private boolean shouldThrowAwayLink(Node node, NodeList children,
+      int childLen, LinkParams params) {
     if (childLen == 0) {
-      // this has no inner structure 
-      if (params.childLen == 0) return false;
-      else return true;
-    } else if ((childLen == 1) 
-               && (children.item(0).getNodeType() == Node.ELEMENT_NODE)
-               && (params.elName.equalsIgnoreCase(children.item(0).getNodeName()))) { 
+      // this has no inner structure
+      if (params.childLen == 0)
+        return false;
+      else
+        return true;
+    } else if ((childLen == 1)
+        && (children.item(0).getNodeType() == Node.ELEMENT_NODE)
+        && (params.elName.equalsIgnoreCase(children.item(0).getNodeName()))) {
       // single nested link
       return true;
 
     } else if (childLen == 2) {
 
-      Node c0= children.item(0);
-      Node c1= children.item(1);
+      Node c0 = children.item(0);
+      Node c1 = children.item(1);
 
       if ((c0.getNodeType() == Node.ELEMENT_NODE)
           && (params.elName.equalsIgnoreCase(c0.getNodeName()))
-          && (c1.getNodeType() == Node.TEXT_NODE) 
-          && hasOnlyWhiteSpace(c1) ) {
+          && (c1.getNodeType() == Node.TEXT_NODE) && hasOnlyWhiteSpace(c1)) {
         // single link followed by whitespace node
         return true;
       }
 
       if ((c1.getNodeType() == Node.ELEMENT_NODE)
           && (params.elName.equalsIgnoreCase(c1.getNodeName()))
-          && (c0.getNodeType() == Node.TEXT_NODE) 
-          && hasOnlyWhiteSpace(c0) ) {
+          && (c0.getNodeType() == Node.TEXT_NODE) && hasOnlyWhiteSpace(c0)) {
         // whitespace node followed by single link
         return true;
       }
 
     } else if (childLen == 3) {
-      Node c0= children.item(0);
-      Node c1= children.item(1);
-      Node c2= children.item(2);
-      
+      Node c0 = children.item(0);
+      Node c1 = children.item(1);
+      Node c2 = children.item(2);
+
       if ((c1.getNodeType() == Node.ELEMENT_NODE)
           && (params.elName.equalsIgnoreCase(c1.getNodeName()))
-          && (c0.getNodeType() == Node.TEXT_NODE) 
-          && (c2.getNodeType() == Node.TEXT_NODE) 
-          && hasOnlyWhiteSpace(c0)
-          && hasOnlyWhiteSpace(c2) ) {
+          && (c0.getNodeType() == Node.TEXT_NODE)
+          && (c2.getNodeType() == Node.TEXT_NODE) && hasOnlyWhiteSpace(c0)
+          && hasOnlyWhiteSpace(c2)) {
         // single link surrounded by whitespace nodes
         return true;
       }
@@ -299,40 +297,37 @@
 
     return false;
   }
-  
+
   /**
-   * This method finds all anchors below the supplied DOM
-   * <code>node</code>, and creates appropriate {@link Outlink}
-   * records for each (relative to the supplied <code>base</code>
-   * URL), and adds them to the <code>outlinks</code> {@link
-   * ArrayList}.
-   *
+   * This method finds all anchors below the supplied DOM <code>node</code>, and
+   * creates appropriate {@link Outlink} records for each (relative to the
+   * supplied <code>base</code> URL), and adds them to the <code>outlinks</code>
+   * {@link ArrayList}.
+   * 
    * <p>
-   *
-   * Links without inner structure (tags, text, etc) are discarded, as
-   * are links which contain only single nested links and empty text
-   * nodes (this is a common DOM-fixup artifact, at least with
-   * nekohtml).
+   * 
+   * Links without inner structure (tags, text, etc) are discarded, as are links
+   * which contain only single nested links and empty text nodes (this is a
+   * common DOM-fixup artifact, at least with nekohtml).
    */
-  public void getOutlinks(URL base, ArrayList<Outlink> outlinks, 
-                                       Node node) {
-    
+  public void getOutlinks(URL base, ArrayList<Outlink> outlinks, Node node) {
+
     NodeWalker walker = new NodeWalker(node);
     while (walker.hasNext()) {
-      
+
       Node currentNode = walker.nextNode();
       String nodeName = currentNode.getNodeName();
-      short nodeType = currentNode.getNodeType();      
+      short nodeType = currentNode.getNodeType();
       NodeList children = currentNode.getChildNodes();
-      int childLen = (children != null) ? children.getLength() : 0; 
-      
+      int childLen = (children != null) ? children.getLength() : 0;
+
       if (nodeType == Node.ELEMENT_NODE) {
-        
+
         nodeName = nodeName.toLowerCase();
-        LinkParams params = (LinkParams)linkParams.get(nodeName);
+        LinkParams params = (LinkParams) linkParams.get(nodeName);
         if (params != null) {
           if (!shouldThrowAwayLink(currentNode, children, childLen, params)) {
-  
+
             StringBuffer linkText = new StringBuffer();
             getText(linkText, currentNode, true);
             if (linkText.toString().trim().length() == 0) {
@@ -347,57 +342,59 @@
                     if (alt != null) {
                       String altTxt = alt.getTextContent();
                       if (altTxt != null && altTxt.trim().length() > 0) {
-                        if (linkText.length() > 0) linkText.append(' ');
+                        if (linkText.length() > 0)
+                          linkText.append(' ');
                         linkText.append(altTxt);
                       }
                     }
                   } else {
                     // ignore other types of elements
-                    
-                  } 
+
+                  }
                 } else if (subNode.getNodeType() == Node.TEXT_NODE) {
                   String txt = subNode.getTextContent();
                   if (txt != null && txt.length() > 0) {
-                    if (linkText.length() > 0) linkText.append(' ');
+                    if (linkText.length() > 0)
+                      linkText.append(' ');
                     linkText.append(txt);
-                  }                  
+                  }
                 }
               }
             }
-  
+
             NamedNodeMap attrs = currentNode.getAttributes();
             String target = null;
             boolean noFollow = false;
             boolean post = false;
-            for (int i= 0; i < attrs.getLength(); i++ ) {
+            for (int i = 0; i < attrs.getLength(); i++) {
               Node attr = attrs.item(i);
               String attrName = attr.getNodeName();
               if (params.attrName.equalsIgnoreCase(attrName)) {
                 target = attr.getNodeValue();
-              } else if ("rel".equalsIgnoreCase(attrName) &&
-                         "nofollow".equalsIgnoreCase(attr.getNodeValue())) {
+              } else if ("rel".equalsIgnoreCase(attrName)
+                  && "nofollow".equalsIgnoreCase(attr.getNodeValue())) {
                 noFollow = true;
-              } else if ("method".equalsIgnoreCase(attrName) &&
-                         "post".equalsIgnoreCase(attr.getNodeValue())) {
+              } else if ("method".equalsIgnoreCase(attrName)
+                  && "post".equalsIgnoreCase(attr.getNodeValue())) {
                 post = true;
               }
             }
             if (target != null && !noFollow && !post)
               try {
-                
+
                 URL url = URLUtil.resolveURL(base, target);
-                outlinks.add(new Outlink(url.toString(),
-                                         linkText.toString().trim()));
+                outlinks.add(new Outlink(url.toString(), linkText.toString()
+                    .trim()));
               } catch (MalformedURLException e) {
                 // don't care
               }
           }
           // this should not have any children, skip them
-          if (params.childLen == 0) continue;
+          if (params.childLen == 0)
+            continue;
         }
       }
     }
   }
 
 }
-
Index: src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HTMLMetaProcessor.java
===================================================================
--- src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HTMLMetaProcessor.java	(revision 1188252)
+++ src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HTMLMetaProcessor.java	(working copy)
@@ -23,32 +23,31 @@
 import org.w3c.dom.*;
 
 /**
- * Class for parsing META Directives from DOM trees.  This class
- * handles specifically Robots META directives (all, none, nofollow,
- * noindex), finding BASE HREF tags, and HTTP-EQUIV no-cache
- * instructions. All meta directives are stored in a HTMLMetaTags instance.
+ * Class for parsing META Directives from DOM trees. This class handles
+ * specifically Robots META directives (all, none, nofollow, noindex), finding
+ * BASE HREF tags, and HTTP-EQUIV no-cache instructions. All meta directives are
+ * stored in a HTMLMetaTags instance.
  */
 public class HTMLMetaProcessor {
 
   /**
-   * Utility class with indicators for the robots directives "noindex"
-   * and "nofollow", and HTTP-EQUIV/no-cache
+   * Utility class with indicators for the robots directives "noindex" and
+   * "nofollow", and HTTP-EQUIV/no-cache
    */
-  
+
   /**
-   * Sets the indicators in <code>robotsMeta</code> to appropriate
-   * values, based on any META tags found under the given
-   * <code>node</code>.
+   * Sets the indicators in <code>robotsMeta</code> to appropriate values, based
+   * on any META tags found under the given <code>node</code>.
    */
-  public static final void getMetaTags (
-    HTMLMetaTags metaTags, Node node, URL currURL) {
+  public static final void getMetaTags(HTMLMetaTags metaTags, Node node,
+      URL currURL) {
 
     metaTags.reset();
     getMetaTagsHelper(metaTags, node, currURL);
   }
 
-  private static final void getMetaTagsHelper(
-    HTMLMetaTags metaTags, Node node, URL currURL) {
+  private static final void getMetaTagsHelper(HTMLMetaTags metaTags, Node node,
+      URL currURL) {
 
     if (node.getNodeType() == Node.ELEMENT_NODE) {
 
@@ -63,7 +62,7 @@
         Node equivNode = null;
         Node contentNode = null;
         // Retrieves name, http-equiv and content attribues
-        for (int i=0; i<attrs.getLength(); i++) {
+        for (int i = 0; i < attrs.getLength(); i++) {
           Node attr = attrs.item(i);
           String attrName = attr.getNodeName().toLowerCase();
           if (attrName.equals("name")) {
@@ -74,44 +73,44 @@
             contentNode = attr;
           }
         }
-        
+
         if (nameNode != null) {
           if (contentNode != null) {
             String name = nameNode.getNodeValue().toLowerCase();
-            metaTags.getGeneralTags().setProperty(name, contentNode.getNodeValue());
+            metaTags.getGeneralTags().setProperty(name,
+                contentNode.getNodeValue());
             if ("robots".equals(name)) {
-  
+
               if (contentNode != null) {
-                String directives = 
-                  contentNode.getNodeValue().toLowerCase();
+                String directives = contentNode.getNodeValue().toLowerCase();
                 int index = directives.indexOf("none");
-  
+
                 if (index >= 0) {
                   metaTags.setNoIndex();
                   metaTags.setNoFollow();
                 }
-  
+
                 index = directives.indexOf("all");
                 if (index >= 0) {
                   // do nothing...
                 }
-  
+
                 index = directives.indexOf("noindex");
                 if (index >= 0) {
                   metaTags.setNoIndex();
                 }
-  
+
                 index = directives.indexOf("nofollow");
                 if (index >= 0) {
                   metaTags.setNoFollow();
                 }
-                
+
                 index = directives.indexOf("noarchive");
                 if (index >= 0) {
                   metaTags.setNoCache();
                 }
-              } 
-  
+              }
+
             } // end if (name == robots)
           }
         }
@@ -124,14 +123,15 @@
             if ("pragma".equals(name)) {
               content = content.toLowerCase();
               int index = content.indexOf("no-cache");
-              if (index >= 0) 
+              if (index >= 0)
                 metaTags.setNoCache();
             } else if ("refresh".equals(name)) {
               int idx = content.indexOf(';');
               String time = null;
               if (idx == -1) { // just the refresh time
                 time = content;
-              } else time = content.substring(0, idx);
+              } else
+                time = content.substring(0, idx);
               try {
                 metaTags.setRefreshTime(Integer.parseInt(time));
                 // skip this if we couldn't parse the time
@@ -140,22 +140,30 @@
                 ;
               }
               URL refreshUrl = null;
-              if (metaTags.getRefresh() && idx != -1) { // set the URL
+              if (metaTags.getRefresh() && idx != -1) { // set the
+                // URL
                 idx = content.toLowerCase().indexOf("url=");
-                if (idx == -1) { // assume a mis-formatted entry with just the url
+                if (idx == -1) { // assume a mis-formatted entry
+                  // with just the url
                   idx = content.indexOf(';') + 1;
-                } else idx += 4;
+                } else
+                  idx += 4;
                 if (idx != -1) {
                   String url = content.substring(idx);
                   try {
                     refreshUrl = new URL(url);
                   } catch (Exception e) {
-                    // XXX according to the spec, this has to be an absolute
-                    // XXX url. However, many websites use relative URLs and
+                    // XXX according to the spec, this has
+                    // to be an absolute
+                    // XXX url. However, many websites use
+                    // relative URLs and
                     // XXX expect browsers to handle that.
-                    // XXX Unfortunately, in some cases this may create a
-                    // XXX infinitely recursive paths (a crawler trap)...
-                    // if (!url.startsWith("/")) url = "/" + url;
+                    // XXX Unfortunately, in some cases this
+                    // may create a
+                    // XXX infinitely recursive paths (a
+                    // crawler trap)...
+                    // if (!url.startsWith("/")) url = "/" +
+                    // url;
                     try {
                       refreshUrl = new URL(currURL, url);
                     } catch (Exception e1) {
@@ -166,7 +174,8 @@
               }
               if (metaTags.getRefresh()) {
                 if (refreshUrl == null) {
-                  // apparently only refresh time was present. set the URL
+                  // apparently only refresh time was present.
+                  // set the URL
                   // to the same URL.
                   refreshUrl = currURL;
                 }
@@ -187,13 +196,13 @@
           try {
             if (currURL == null)
               url = new URL(urlString);
-            else 
+            else
               url = new URL(currURL, urlString);
           } catch (Exception e) {
             ;
           }
 
-          if (url != null) 
+          if (url != null)
             metaTags.setBaseHref(url);
         }
 
Index: src/plugin/protocol-httpclient/src/test/org/apache/nutch/protocol/httpclient/TestProtocolHttpClient.java
===================================================================
--- src/plugin/protocol-httpclient/src/test/org/apache/nutch/protocol/httpclient/TestProtocolHttpClient.java	(revision 1188252)
+++ src/plugin/protocol-httpclient/src/test/org/apache/nutch/protocol/httpclient/TestProtocolHttpClient.java	(working copy)
@@ -32,7 +32,7 @@
 
 /**
  * Test cases for protocol-httpclient.
- *
+ * 
  * @author Susam Pal
  */
 public class TestProtocolHttpClient extends TestCase {
@@ -59,7 +59,7 @@
     conf = new Configuration();
     conf.addResource("nutch-default.xml");
     conf.addResource("nutch-site-test.xml");
-    
+
     http = new Http();
     http.setConf(conf);
   }
@@ -70,8 +70,9 @@
 
   /**
    * Tests whether the client can remember cookies.
-   *
-   * @throws Exception If an error occurs or the test case fails.
+   * 
+   * @throws Exception
+   *           If an error occurs or the test case fails.
    */
   public void testCookies() throws Exception {
     startServer(47500);
@@ -80,10 +81,10 @@
   }
 
   /**
-   * Tests that no pre-emptive authorization headers are sent by the
-   * client.
-   *
-   * @throws Exception If an error occurs or the test case fails.
+   * Tests that no pre-emptive authorization headers are sent by the client.
+   * 
+   * @throws Exception
+   *           If an error occurs or the test case fails.
    */
   public void testNoPreemptiveAuth() throws Exception {
     startServer(47500);
@@ -92,8 +93,9 @@
 
   /**
    * Tests default credentials.
-   *
-   * @throws Exception If an error occurs or the test case fails.
+   * 
+   * @throws Exception
+   *           If an error occurs or the test case fails.
    */
   public void testDefaultCredentials() throws Exception {
     startServer(47502);
@@ -103,7 +105,8 @@
   /**
    * Tests basic authentication scheme for various realms.
    * 
-   * @throws Exception If an error occurs or the test case fails.
+   * @throws Exception
+   *           If an error occurs or the test case fails.
    */
   public void testBasicAuth() throws Exception {
     startServer(47500);
@@ -114,11 +117,12 @@
   }
 
   /**
-   * Tests that authentication happens for a defined realm and not for
-   * other realms for a host:port when an extra <code>authscope</code>
-   * tag is not defined to match all other realms.
-   *
-   * @throws Exception If an error occurs or the test case fails.
+   * Tests that authentication happens for a defined realm and not for other
+   * realms for a host:port when an extra <code>authscope</code> tag is not
+   * defined to match all other realms.
+   * 
+   * @throws Exception
+   *           If an error occurs or the test case fails.
    */
   public void testOtherRealmsNoAuth() throws Exception {
     startServer(47501);
@@ -129,8 +133,9 @@
 
   /**
    * Tests Digest authentication scheme.
-   *
-   * @throws Exception If an error occurs or the test case fails.
+   * 
+   * @throws Exception
+   *           If an error occurs or the test case fails.
    */
   public void testDigestAuth() throws Exception {
     startServer(47500);
@@ -139,8 +144,9 @@
 
   /**
    * Tests NTLM authentication scheme.
-   *
-   * @throws Exception If an error occurs or the test case fails.
+   * 
+   * @throws Exception
+   *           If an error occurs or the test case fails.
    */
   public void testNtlmAuth() throws Exception {
     startServer(47501);
@@ -149,9 +155,11 @@
 
   /**
    * Starts the Jetty server at a specified port.
-   *
-   * @param  portno     Port number.
-   * @throws Exception  When an error occurs.
+   * 
+   * @param portno
+   *          Port number.
+   * @throws Exception
+   *           When an error occurs.
    */
   private void startServer(int portno) throws Exception {
     port = portno;
@@ -163,17 +171,18 @@
   }
 
   /**
-   * Fetches the specified <code>page</code> from the local Jetty server
-   * and checks whether the HTTP response status code matches with the
-   * expected code.
-   *
-   * @param page          Page to be fetched.
-   * @param expectedCode  HTTP response status code expected while
-   *                      fetching the page.
-   * @throws Exception    When an error occurs or test case fails.
+   * Fetches the specified <code>page</code> from the local Jetty server and
+   * checks whether the HTTP response status code matches with the expected
+   * code.
+   * 
+   * @param page
+   *          Page to be fetched.
+   * @param expectedCode
+   *          HTTP response status code expected while fetching the page.
+   * @throws Exception
+   *           When an error occurs or test case fails.
    */
-  private void fetchPage(String page, int expectedCode)
-      throws Exception {
+  private void fetchPage(String page, int expectedCode) throws Exception {
     URL url = new URL("http", "127.0.0.1", port, page);
     Response response = null;
     response = http.getResponse(url, new CrawlDatum(), true);
@@ -181,13 +190,14 @@
     int code = response.getCode();
     assertEquals("HTTP Status Code for " + url, expectedCode, code);
   }
-  
+
   /**
    * Returns an URL to the specified page.
-   *
-   * @param  page                  Page available in the local Jetty
-   *                               server.
-   * @throws MalformedURLException If an URL can not be formed.
+   * 
+   * @param page
+   *          Page available in the local Jetty server.
+   * @throws MalformedURLException
+   *           If an URL can not be formed.
    */
   private URL getURL(String page) throws MalformedURLException {
     return new URL("http", "127.0.0.1", port, page);
Index: src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java
===================================================================
--- src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java	(revision 1188252)
+++ src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java	(working copy)
@@ -53,19 +53,18 @@
 import org.apache.nutch.util.NutchConfiguration;
 
 /**
- * This class is a protocol plugin that configures an HTTP client for
- * Basic, Digest and NTLM authentication schemes for web server as well
- * as proxy server. It takes care of HTTPS protocol as well as cookies
- * in a single fetch session.
- *
+ * This class is a protocol plugin that configures an HTTP client for Basic,
+ * Digest and NTLM authentication schemes for web server as well as proxy
+ * server. It takes care of HTTPS protocol as well as cookies in a single fetch
+ * session.
+ * 
  * @author Susam Pal
  */
 public class Http extends HttpBase {
 
   public static final Logger LOG = LoggerFactory.getLogger(Http.class);
 
-  private static MultiThreadedHttpConnectionManager connectionManager =
-          new MultiThreadedHttpConnectionManager();
+  private static MultiThreadedHttpConnectionManager connectionManager = new MultiThreadedHttpConnectionManager();
 
   // Since the Configuration has not yet been set,
   // then an unconfigured client is returned.
@@ -85,10 +84,9 @@
   private String proxyPassword;
   private String proxyRealm;
 
-
   /**
    * Returns the configured HTTP client.
-   *
+   * 
    * @return HTTP client
    */
   static synchronized HttpClient getClient() {
@@ -103,10 +101,11 @@
   }
 
   /**
-   * Reads the configuration from the Nutch configuration files and sets
-   * the configuration.
-   *
-   * @param conf Configuration
+   * Reads the configuration from the Nutch configuration files and sets the
+   * configuration.
+   * 
+   * @param conf
+   *          Configuration
    */
   public void setConf(Configuration conf) {
     super.setConf(conf);
@@ -130,8 +129,9 @@
 
   /**
    * Main method.
-   *
-   * @param args Command line arguments
+   * 
+   * @param args
+   *          Command line arguments
    */
   public static void main(String[] args) throws Exception {
     Http http = new Http();
@@ -140,16 +140,19 @@
   }
 
   /**
-   * Fetches the <code>url</code> with a configured HTTP client and
-   * gets the response.
-   *
-   * @param url       URL to be fetched
-   * @param datum     Crawl data
-   * @param redirect  Follow redirects if and only if true
-   * @return          HTTP response
+   * Fetches the <code>url</code> with a configured HTTP client and gets the
+   * response.
+   * 
+   * @param url
+   *          URL to be fetched
+   * @param datum
+   *          Crawl data
+   * @param redirect
+   *          Follow redirects if and only if true
+   * @return HTTP response
    */
   protected Response getResponse(URL url, CrawlDatum datum, boolean redirect)
-    throws ProtocolException, IOException {
+      throws ProtocolException, IOException {
     resolveCredentials(url);
     return new HttpResponse(this, url, datum, redirect);
   }
@@ -160,8 +163,8 @@
   private void configureClient() {
 
     // Set up an HTTPS socket factory that accepts self-signed certs.
-    Protocol https = new Protocol("https",
-        new DummySSLProtocolSocketFactory(), 443);
+    Protocol https = new Protocol("https", new DummySSLProtocolSocketFactory(),
+        443);
     Protocol.registerProtocol("https", https);
 
     HttpConnectionManagerParams params = connectionManager.getParams();
@@ -171,7 +174,8 @@
     params.setReceiveBufferSize(BUFFER_SIZE);
     params.setMaxTotalConnections(maxThreadsTotal);
 
-    // executeMethod(HttpMethod) seems to ignore the connection timeout on the connection manager.
+    // executeMethod(HttpMethod) seems to ignore the connection timeout on
+    // the connection manager.
     // set it explicitly on the HttpClient.
     client.getParams().setConnectionManagerTimeout(timeout);
 
@@ -184,7 +188,9 @@
     // prefer UTF-8
     headers.add(new Header("Accept-Charset", "utf-8,ISO-8859-1;q=0.7,*;q=0.7"));
     // prefer understandable formats
-    headers.add(new Header("Accept",
+    headers
+        .add(new Header(
+            "Accept",
             "text/html,application/xml;q=0.9,application/xhtml+xml,text/xml;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5"));
     // accept gzipped content
     headers.add(new Header("Accept-Encoding", "x-gzip, gzip, deflate"));
@@ -196,43 +202,42 @@
 
       if (proxyUsername.length() > 0) {
 
-        AuthScope proxyAuthScope = getAuthScope(
-            this.proxyHost, this.proxyPort, this.proxyRealm);
+        AuthScope proxyAuthScope = getAuthScope(this.proxyHost, this.proxyPort,
+            this.proxyRealm);
 
-        NTCredentials proxyCredentials = new NTCredentials(
-            this.proxyUsername, this.proxyPassword,
-            this.agentHost, this.proxyRealm);
+        NTCredentials proxyCredentials = new NTCredentials(this.proxyUsername,
+            this.proxyPassword, this.agentHost, this.proxyRealm);
 
-        client.getState().setProxyCredentials(
-            proxyAuthScope, proxyCredentials);
+        client.getState().setProxyCredentials(proxyAuthScope, proxyCredentials);
       }
     }
 
   }
 
   /**
-   * Reads authentication configuration file (defined as
-   * 'http.auth.file' in Nutch configuration file) and sets the
-   * credentials for the configured authentication scopes in the HTTP
-   * client object.
-   *
-   * @throws ParserConfigurationException  If a document builder can not
-   *                                       be created.
-   * @throws SAXException                  If any parsing error occurs.
-   * @throws IOException                   If any I/O error occurs.
+   * Reads authentication configuration file (defined as 'http.auth.file' in
+   * Nutch configuration file) and sets the credentials for the configured
+   * authentication scopes in the HTTP client object.
+   * 
+   * @throws ParserConfigurationException
+   *           If a document builder can not be created.
+   * @throws SAXException
+   *           If any parsing error occurs.
+   * @throws IOException
+   *           If any I/O error occurs.
    */
-  private static synchronized void setCredentials() throws 
-      ParserConfigurationException, SAXException, IOException {
+  private static synchronized void setCredentials()
+      throws ParserConfigurationException, SAXException, IOException {
 
     if (authRulesRead)
       return;
 
     authRulesRead = true; // Avoid re-attempting to read
 
-    InputStream is = conf.getConfResourceAsInputStream(authFile);    
+    InputStream is = conf.getConfResourceAsInputStream(authFile);
     if (is != null) {
-      Document doc = DocumentBuilderFactory.newInstance()
-                     .newDocumentBuilder().parse(is);
+      Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder()
+          .parse(is);
 
       Element rootElement = doc.getDocumentElement();
       if (!"auth-configuration".equals(rootElement.getTagName())) {
@@ -247,14 +252,14 @@
       for (int i = 0; i < credList.getLength(); i++) {
         Node credNode = credList.item(i);
         if (!(credNode instanceof Element))
-          continue;    
+          continue;
 
         Element credElement = (Element) credNode;
         if (!"credentials".equals(credElement.getTagName())) {
           if (LOG.isWarnEnabled())
-            LOG.warn("Bad auth conf file: Element <"
-            + credElement.getTagName() + "> not recognized in "
-            + authFile + " - expected <credentials>");
+            LOG.warn("Bad auth conf file: Element <" + credElement.getTagName()
+                + "> not recognized in " + authFile
+                + " - expected <credentials>");
           continue;
         }
 
@@ -267,7 +272,7 @@
           Node scopeNode = scopeList.item(j);
           if (!(scopeNode instanceof Element))
             continue;
-          
+
           Element scopeElement = (Element) scopeNode;
 
           if ("default".equals(scopeElement.getTagName())) {
@@ -283,9 +288,9 @@
             defaultScheme = scheme;
 
             if (LOG.isTraceEnabled()) {
-              LOG.trace("Credentials - username: " + username 
-                  + "; set as default"
-                  + " for realm: " + realm + "; scheme: " + scheme);
+              LOG.trace("Credentials - username: " + username
+                  + "; set as default" + " for realm: " + realm + "; scheme: "
+                  + scheme);
             }
 
           } else if ("authscope".equals(scopeElement.getTagName())) {
@@ -294,8 +299,7 @@
             String host = scopeElement.getAttribute("host");
             int port = -1; // For setting port to AuthScope.ANY_PORT
             try {
-              port = Integer.parseInt(
-                  scopeElement.getAttribute("port"));
+              port = Integer.parseInt(scopeElement.getAttribute("port"));
             } catch (Exception ex) {
               // do nothing, port is already set to any port
             }
@@ -304,16 +308,15 @@
 
             // Set credentials for the determined scope
             AuthScope authScope = getAuthScope(host, port, realm, scheme);
-            NTCredentials credentials = new NTCredentials(
-                username, password, agentHost, realm);
+            NTCredentials credentials = new NTCredentials(username, password,
+                agentHost, realm);
 
             client.getState().setCredentials(authScope, credentials);
 
             if (LOG.isTraceEnabled()) {
               LOG.trace("Credentials - username: " + username
-                  + "; set for AuthScope - " + "host: " + host
-                  + "; port: " + port + "; realm: " + realm
-                  + "; scheme: " + scheme);
+                  + "; set for AuthScope - " + "host: " + host + "; port: "
+                  + port + "; realm: " + realm + "; scheme: " + scheme);
             }
 
           } else {
@@ -329,14 +332,14 @@
   }
 
   /**
-   * If credentials for the authentication scope determined from the
-   * specified <code>url</code> is not already set in the HTTP client,
-   * then this method sets the default credentials to fetch the
-   * specified <code>url</code>. If credentials are found for the
-   * authentication scope, the method returns without altering the
-   * client.
-   *
-   * @param url URL to be fetched
+   * If credentials for the authentication scope determined from the specified
+   * <code>url</code> is not already set in the HTTP client, then this method
+   * sets the default credentials to fetch the specified <code>url</code>. If
+   * credentials are found for the authentication scope, the method returns
+   * without altering the client.
+   * 
+   * @param url
+   *          URL to be fetched
    */
   private void resolveCredentials(URL url) {
 
@@ -355,43 +358,42 @@
       if (client.getState().getCredentials(scope) != null) {
         if (LOG.isTraceEnabled())
           LOG.trace("Pre-configured credentials with scope - host: "
-              + url.getHost() + "; port: " + port
-              + "; found for url: " + url);
+              + url.getHost() + "; port: " + port + "; found for url: " + url);
 
         // Credentials are already configured, so do nothing and return
         return;
       }
 
       if (LOG.isTraceEnabled())
-          LOG.trace("Pre-configured credentials with scope -  host: "
-              + url.getHost() + "; port: " + port
-              + "; not found for url: " + url);
+        LOG.trace("Pre-configured credentials with scope -  host: "
+            + url.getHost() + "; port: " + port + "; not found for url: " + url);
 
-      AuthScope serverAuthScope = getAuthScope(
-          url.getHost(), port, defaultRealm, defaultScheme);
+      AuthScope serverAuthScope = getAuthScope(url.getHost(), port,
+          defaultRealm, defaultScheme);
 
-      NTCredentials serverCredentials = new NTCredentials(
-          defaultUsername, defaultPassword,
-          agentHost, defaultRealm);
+      NTCredentials serverCredentials = new NTCredentials(defaultUsername,
+          defaultPassword, agentHost, defaultRealm);
 
-      client.getState().setCredentials(
-          serverAuthScope, serverCredentials);
+      client.getState().setCredentials(serverAuthScope, serverCredentials);
     }
   }
 
   /**
-   * Returns an authentication scope for the specified
-   * <code>host</code>, <code>port</code>, <code>realm</code> and
-   * <code>scheme</code>.
-   *
-   * @param host    Host name or address.
-   * @param port    Port number.
-   * @param realm   Authentication realm.
-   * @param scheme  Authentication scheme.
+   * Returns an authentication scope for the specified <code>host</code>,
+   * <code>port</code>, <code>realm</code> and <code>scheme</code>.
+   * 
+   * @param host
+   *          Host name or address.
+   * @param port
+   *          Port number.
+   * @param realm
+   *          Authentication realm.
+   * @param scheme
+   *          Authentication scheme.
    */
-  private static AuthScope getAuthScope(String host, int port,
-      String realm, String scheme) {
-    
+  private static AuthScope getAuthScope(String host, int port, String realm,
+      String scheme) {
+
     if (host.length() == 0)
       host = null;
 
@@ -408,17 +410,18 @@
   }
 
   /**
-   * Returns an authentication scope for the specified
-   * <code>host</code>, <code>port</code> and <code>realm</code>.
-   *
-   * @param host    Host name or address.
-   * @param port    Port number.
-   * @param realm   Authentication realm.
+   * Returns an authentication scope for the specified <code>host</code>,
+   * <code>port</code> and <code>realm</code>.
+   * 
+   * @param host
+   *          Host name or address.
+   * @param port
+   *          Port number.
+   * @param realm
+   *          Authentication realm.
    */
-  private static AuthScope getAuthScope(String host, int port,
-      String realm) {
+  private static AuthScope getAuthScope(String host, int port, String realm) {
 
-      return getAuthScope(host, port, realm, "");
+    return getAuthScope(host, port, realm, "");
   }
 }
-
Index: src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthentication.java
===================================================================
--- src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthentication.java	(revision 1188252)
+++ src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthentication.java	(working copy)
@@ -15,32 +15,31 @@
  * limitations under the License.
  */
 package org.apache.nutch.protocol.httpclient;
- 
+
 import java.util.List;
 
 /**
- *  The base level of services required for Http Authentication
- *
+ * The base level of services required for Http Authentication
+ * 
  * @see HttpAuthenticationFactory
  * 
- * @author    Matt Tencati
+ * @author Matt Tencati
  */
 public interface HttpAuthentication {
 
-    /**
-     *  Gets the credentials generated by the HttpAuthentication
-     *  object.  May return null.
-     *
-     * @return    The credentials value
-     */
-    public List getCredentials();
+  /**
+   * Gets the credentials generated by the HttpAuthentication object. May return
+   * null.
+   * 
+   * @return The credentials value
+   */
+  public List getCredentials();
 
-    /**
-     *  Gets the realm used by the HttpAuthentication object during creation.
-     *
-     *  @return    The realm value
-     */
-    public String getRealm();
+  /**
+   * Gets the realm used by the HttpAuthentication object during creation.
+   * 
+   * @return The realm value
+   */
+  public String getRealm();
 
 }
-
Index: src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummySSLProtocolSocketFactory.java
===================================================================
--- src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummySSLProtocolSocketFactory.java	(revision 1188252)
+++ src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummySSLProtocolSocketFactory.java	(working copy)
@@ -1,19 +1,19 @@
 /*
-* Licensed to the Apache Software Foundation (ASF) under one or more
-* contributor license agreements.  See the NOTICE file distributed with
-* this work for additional information regarding copyright ownership.
-* The ASF licenses this file to You under the Apache License, Version 2.0
-* (the "License"); you may not use this file except in compliance with
-* the License.  You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 /*
  * Based on EasySSLProtocolSocketFactory from commons-httpclient:
  * 
@@ -41,10 +41,12 @@
 import javax.net.ssl.SSLContext;
 import javax.net.ssl.TrustManager;
 
-public class DummySSLProtocolSocketFactory implements SecureProtocolSocketFactory {
+public class DummySSLProtocolSocketFactory implements
+    SecureProtocolSocketFactory {
 
   /** Logger object for this class. */
-  private static final Logger LOG = LoggerFactory.getLogger(DummySSLProtocolSocketFactory.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(DummySSLProtocolSocketFactory.class);
 
   private SSLContext sslcontext = null;
 
@@ -58,10 +60,13 @@
   private static SSLContext createEasySSLContext() {
     try {
       SSLContext context = SSLContext.getInstance("SSL");
-      context.init(null, new TrustManager[] { new DummyX509TrustManager(null) }, null);
+      context.init(null,
+          new TrustManager[] { new DummyX509TrustManager(null) }, null);
       return context;
     } catch (Exception e) {
-      if (LOG.isErrorEnabled()) { LOG.error(e.getMessage(), e); }
+      if (LOG.isErrorEnabled()) {
+        LOG.error(e.getMessage(), e);
+      }
       throw new HttpClientError(e.toString());
     }
   }
@@ -76,10 +81,11 @@
   /**
    * @see org.apache.commons.httpclient.protocol.SecureProtocolSocketFactory#createSocket(String,int,InetAddress,int)
    */
-  public Socket createSocket(String host, int port, InetAddress clientHost, int clientPort) throws IOException,
-          UnknownHostException {
+  public Socket createSocket(String host, int port, InetAddress clientHost,
+      int clientPort) throws IOException, UnknownHostException {
 
-    return getSSLContext().getSocketFactory().createSocket(host, port, clientHost, clientPort);
+    return getSSLContext().getSocketFactory().createSocket(host, port,
+        clientHost, clientPort);
   }
 
   /**
@@ -93,20 +99,28 @@
    * throws an {@link ConnectTimeoutException}
    * </p>
    * 
-   * @param host the host name/IP
-   * @param port the port on the host
-   * @param localAddress the local host name/IP to bind the socket to
-   * @param localPort the port on the local machine
-   * @param params {@link HttpConnectionParams Http connection parameters}
+   * @param host
+   *          the host name/IP
+   * @param port
+   *          the port on the host
+   * @param localAddress
+   *          the local host name/IP to bind the socket to
+   * @param localPort
+   *          the port on the local machine
+   * @param params
+   *          {@link HttpConnectionParams Http connection parameters}
    * 
    * @return Socket a new socket
    * 
-   * @throws IOException if an I/O error occurs while creating the socket
-   * @throws UnknownHostException if the IP address of the host cannot be
-   *         determined
+   * @throws IOException
+   *           if an I/O error occurs while creating the socket
+   * @throws UnknownHostException
+   *           if the IP address of the host cannot be determined
    */
-  public Socket createSocket(final String host, final int port, final InetAddress localAddress, final int localPort,
-          final HttpConnectionParams params) throws IOException, UnknownHostException, ConnectTimeoutException {
+  public Socket createSocket(final String host, final int port,
+      final InetAddress localAddress, final int localPort,
+      final HttpConnectionParams params) throws IOException,
+      UnknownHostException, ConnectTimeoutException {
     if (params == null) {
       throw new IllegalArgumentException("Parameters may not be null");
     }
@@ -115,27 +129,31 @@
       return createSocket(host, port, localAddress, localPort);
     } else {
       // To be eventually deprecated when migrated to Java 1.4 or above
-      return ControllerThreadSocketFactory.createSocket(this, host, port, localAddress, localPort, timeout);
+      return ControllerThreadSocketFactory.createSocket(this, host, port,
+          localAddress, localPort, timeout);
     }
   }
 
   /**
    * @see org.apache.commons.httpclient.protocol.SecureProtocolSocketFactory#createSocket(String,int)
    */
-  public Socket createSocket(String host, int port) throws IOException, UnknownHostException {
+  public Socket createSocket(String host, int port) throws IOException,
+      UnknownHostException {
     return getSSLContext().getSocketFactory().createSocket(host, port);
   }
 
   /**
    * @see org.apache.commons.httpclient.protocol.SecureProtocolSocketFactory#createSocket(Socket,String,int,boolean)
    */
-  public Socket createSocket(Socket socket, String host, int port, boolean autoClose) throws IOException,
-          UnknownHostException {
-    return getSSLContext().getSocketFactory().createSocket(socket, host, port, autoClose);
+  public Socket createSocket(Socket socket, String host, int port,
+      boolean autoClose) throws IOException, UnknownHostException {
+    return getSSLContext().getSocketFactory().createSocket(socket, host, port,
+        autoClose);
   }
 
   public boolean equals(Object obj) {
-    return ((obj != null) && obj.getClass().equals(DummySSLProtocolSocketFactory.class));
+    return ((obj != null) && obj.getClass().equals(
+        DummySSLProtocolSocketFactory.class));
   }
 
   public int hashCode() {
Index: src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java
===================================================================
--- src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java	(revision 1188252)
+++ src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java	(working copy)
@@ -35,156 +35,165 @@
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configurable;
 
-
 /**
- * Implementation of RFC 2617 Basic Authentication.  Usernames and passwords are stored 
- * in standard Nutch configuration files using the following properties:
- *      http.auth.basic.<realm>.user
- *      http.auth.basic.<realm>.pass
- *
- * @author    Matt Tencati
+ * Implementation of RFC 2617 Basic Authentication. Usernames and passwords are
+ * stored in standard Nutch configuration files using the following properties:
+ * http.auth.basic.<realm>.user http.auth.basic.<realm>.pass
+ * 
+ * @author Matt Tencati
  */
-public class HttpBasicAuthentication implements HttpAuthentication, Configurable {
+public class HttpBasicAuthentication implements HttpAuthentication,
+    Configurable {
 
-    public static final Logger LOG = LoggerFactory.getLogger(HttpBasicAuthentication.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(HttpBasicAuthentication.class);
 
-    private static Pattern basic = Pattern.compile("[bB][aA][sS][iI][cC] [rR][eE][aA][lL][mM]=\"(\\w*)\"");
-	
-    private static Map authMap = new TreeMap();
-   
-    private Configuration conf = null; 
-    private String challenge = null;
-    private ArrayList credentials = null;
-    private String realm = null;
+  private static Pattern basic = Pattern
+      .compile("[bB][aA][sS][iI][cC] [rR][eE][aA][lL][mM]=\"(\\w*)\"");
 
+  private static Map authMap = new TreeMap();
 
-    /**
-     *  Construct an HttpBasicAuthentication for the given challenge
-     *  parameters. The challenge parameters are returned by the web
-     *  server using a WWW-Authenticate header. This will typically be
-     *  represented by single line of the form <code>WWW-Authenticate: Basic realm="myrealm"</code>
-     *
-     * @param  challenge  WWW-Authenticate header from web server
-     */
-    protected HttpBasicAuthentication(String challenge, Configuration conf) throws HttpAuthenticationException {
-        
-        setConf(conf);
-        this.challenge = challenge;
-        credentials = new ArrayList();
-        
-        String username = this.conf.get("http.auth.basic." + challenge + ".user");
-        String password = this.conf.get("http.auth.basic." + challenge + ".password");
-        
-        if (LOG.isTraceEnabled()) {
-          LOG.trace("BasicAuthentication challenge is " + challenge);
-          LOG.trace("BasicAuthentication username=" + username);
-          LOG.trace("BasicAuthentication password=" + password);
-        }
- 
-        if (username == null) {
-        	throw new HttpAuthenticationException("Username for " + challenge + " is null");
-        }
+  private Configuration conf = null;
+  private String challenge = null;
+  private ArrayList credentials = null;
+  private String realm = null;
 
-        if (password == null) {
-        	throw new HttpAuthenticationException("Password for " + challenge + " is null");
-        }
-        
-        byte[] credBytes = (username + ":" + password).getBytes();
-        credentials.add("Authorization: Basic " + new String(Base64.encodeBase64(credBytes)));
-        if (LOG.isTraceEnabled()) {
-          LOG.trace("Basic credentials: " + credentials);
-        }
-    }
+  /**
+   * Construct an HttpBasicAuthentication for the given challenge parameters.
+   * The challenge parameters are returned by the web server using a
+   * WWW-Authenticate header. This will typically be represented by single line
+   * of the form <code>WWW-Authenticate: Basic realm="myrealm"</code>
+   * 
+   * @param challenge
+   *          WWW-Authenticate header from web server
+   */
+  protected HttpBasicAuthentication(String challenge, Configuration conf)
+      throws HttpAuthenticationException {
 
+    setConf(conf);
+    this.challenge = challenge;
+    credentials = new ArrayList();
 
-    /* ---------------------------------- *
-     * <implementation:Configurable> *
-     * ---------------------------------- */
+    String username = this.conf.get("http.auth.basic." + challenge + ".user");
+    String password = this.conf.get("http.auth.basic." + challenge
+        + ".password");
 
-    public void setConf(Configuration conf) {
-      this.conf = conf;
-      //if (conf.getBoolean("http.auth.verbose", false)) {
-      //  LOG.setLevel(Level.FINE);
-      //} else {
-      //  LOG.setLevel(Level.WARNING);
-      //}
+    if (LOG.isTraceEnabled()) {
+      LOG.trace("BasicAuthentication challenge is " + challenge);
+      LOG.trace("BasicAuthentication username=" + username);
+      LOG.trace("BasicAuthentication password=" + password);
     }
 
-    public Configuration getConf() {
-      return this.conf;
+    if (username == null) {
+      throw new HttpAuthenticationException("Username for " + challenge
+          + " is null");
     }
 
-    /* ---------------------------------- *
-     * <implementation:Configurable> *
-     * ---------------------------------- */
+    if (password == null) {
+      throw new HttpAuthenticationException("Password for " + challenge
+          + " is null");
+    }
 
-
-    /**
-     *  Gets the Basic credentials generated by this
-     *  HttpBasicAuthentication object
-     *
-     * @return    Credentials in the form of <code>Authorization: Basic &lt;Base64 encoded userid:password&gt;
-     *
-     */
-    public List getCredentials() {
-        return credentials;
+    byte[] credBytes = (username + ":" + password).getBytes();
+    credentials.add("Authorization: Basic "
+        + new String(Base64.encodeBase64(credBytes)));
+    if (LOG.isTraceEnabled()) {
+      LOG.trace("Basic credentials: " + credentials);
     }
+  }
 
+  /*
+   * ---------------------------------- * <implementation:Configurable> *
+   * ----------------------------------
+   */
 
-   /**
-    * Gets the realm attribute of the HttpBasicAuthentication object.
-    * This should have been supplied to the {@link #getAuthentication(String, Configuration)}
-    * static method
-    *
-    * @return    The realm
-    */
-    public String getRealm() {
-        return realm;
-    }
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+    // if (conf.getBoolean("http.auth.verbose", false)) {
+    // LOG.setLevel(Level.FINE);
+    // } else {
+    // LOG.setLevel(Level.WARNING);
+    // }
+  }
 
-    /**
-     * This method is responsible for providing Basic authentication information.  The
-     * method caches authentication information for each realm so that the required
-     * authentication information does not need to be regenerated for every request.
-     *  
-     * @param challenge The challenge string provided by the webserver.  This is the
-     * text which follows the WWW-Authenticate header, including the Basic tag.
-     * @return An HttpBasicAuthentication object or null 
-     * if unable to generate appropriate credentials.
-     */
-    public static HttpBasicAuthentication getAuthentication(String challenge, Configuration conf) {
-        if (challenge == null) return null;
-        Matcher basicMatcher = basic.matcher(challenge);
-        if (basicMatcher.matches()) {
-        	String realm = basicMatcher.group(1);
-	        Object auth = authMap.get(realm);
-	        if (auth == null) {
-	            HttpBasicAuthentication newAuth = null;
-	            try {
-	            	newAuth = new HttpBasicAuthentication(realm, conf);
-	            } catch (HttpAuthenticationException hae) { 
-                        if (LOG.isTraceEnabled()) {
-	            	  LOG.trace("HttpBasicAuthentication failed for " + challenge);
-                        }
-	            }
-	            authMap.put(realm, newAuth);
-	            return newAuth;
-	        } else {
-	            return (HttpBasicAuthentication) auth;
-	        }
+  public Configuration getConf() {
+    return this.conf;
+  }
+
+  /*
+   * ---------------------------------- * <implementation:Configurable> *
+   * ----------------------------------
+   */
+
+  /**
+   * Gets the Basic credentials generated by this HttpBasicAuthentication object
+   * 
+   * @return Credentials in the form of
+   *         <code>Authorization: Basic &lt;Base64 encoded userid:password&gt;
+   * 
+   */
+  public List getCredentials() {
+    return credentials;
+  }
+
+  /**
+   * Gets the realm attribute of the HttpBasicAuthentication object. This should
+   * have been supplied to the {@link #getAuthentication(String, Configuration)}
+   * static method
+   * 
+   * @return The realm
+   */
+  public String getRealm() {
+    return realm;
+  }
+
+  /**
+   * This method is responsible for providing Basic authentication information.
+   * The method caches authentication information for each realm so that the
+   * required authentication information does not need to be regenerated for
+   * every request.
+   * 
+   * @param challenge
+   *          The challenge string provided by the webserver. This is the text
+   *          which follows the WWW-Authenticate header, including the Basic
+   *          tag.
+   * @return An HttpBasicAuthentication object or null if unable to generate
+   *         appropriate credentials.
+   */
+  public static HttpBasicAuthentication getAuthentication(String challenge,
+      Configuration conf) {
+    if (challenge == null)
+      return null;
+    Matcher basicMatcher = basic.matcher(challenge);
+    if (basicMatcher.matches()) {
+      String realm = basicMatcher.group(1);
+      Object auth = authMap.get(realm);
+      if (auth == null) {
+        HttpBasicAuthentication newAuth = null;
+        try {
+          newAuth = new HttpBasicAuthentication(realm, conf);
+        } catch (HttpAuthenticationException hae) {
+          if (LOG.isTraceEnabled()) {
+            LOG.trace("HttpBasicAuthentication failed for " + challenge);
+          }
         }
-        return null;
+        authMap.put(realm, newAuth);
+        return newAuth;
+      } else {
+        return (HttpBasicAuthentication) auth;
+      }
     }
-    
-	/**
-	 * Provides a pattern which can be used by an outside resource to determine if 
-	 * this class can provide credentials based on simple header information.  It does
-	 * not calculate any information regarding realms or challenges.
-	 * 
-	 * @return Returns a Pattern which will match a Basic WWW-Authenticate header.
-	 */
-	public static final Pattern getBasicPattern() {
-		return basic;
-	}
+    return null;
+  }
+
+  /**
+   * Provides a pattern which can be used by an outside resource to determine if
+   * this class can provide credentials based on simple header information. It
+   * does not calculate any information regarding realms or challenges.
+   * 
+   * @return Returns a Pattern which will match a Basic WWW-Authenticate header.
+   */
+  public static final Pattern getBasicPattern() {
+    return basic;
+  }
 }
-
Index: src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java
===================================================================
--- src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java	(revision 1188252)
+++ src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java	(working copy)
@@ -35,12 +35,10 @@
 import org.apache.nutch.metadata.Metadata;
 import org.apache.nutch.util.LogUtil;
 
-
 /**
- * Provides the Http protocol implementation
- * with the ability to authenticate when prompted.  The goal is to provide 
- * multiple authentication types but for now just the {@link HttpBasicAuthentication} authentication 
- * type is provided.
+ * Provides the Http protocol implementation with the ability to authenticate
+ * when prompted. The goal is to provide multiple authentication types but for
+ * now just the {@link HttpBasicAuthentication} authentication type is provided.
  * 
  * @see HttpBasicAuthentication
  * @see Http
@@ -50,93 +48,95 @@
  */
 public class HttpAuthenticationFactory implements Configurable {
 
-    /** 
-     * The HTTP Authentication (WWW-Authenticate) header which is returned 
-     * by a webserver requiring authentication.
-     */
-    public static final String WWW_AUTHENTICATE = "WWW-Authenticate";
-	
-    public static final Logger LOG = LoggerFactory.getLogger(HttpAuthenticationFactory.class);
+  /**
+   * The HTTP Authentication (WWW-Authenticate) header which is returned by a
+   * webserver requiring authentication.
+   */
+  public static final String WWW_AUTHENTICATE = "WWW-Authenticate";
 
-    private static Map auths = new TreeMap(); 
+  public static final Logger LOG = LoggerFactory
+      .getLogger(HttpAuthenticationFactory.class);
 
-    private Configuration conf = null;
-    
-    
-    public HttpAuthenticationFactory(Configuration conf) {
-      setConf(conf);
-    }
+  private static Map auths = new TreeMap();
 
-   
-    /* ---------------------------------- *
-     * <implementation:Configurable> *
-     * ---------------------------------- */
+  private Configuration conf = null;
 
-    public void setConf(Configuration conf) {
-      this.conf = conf;
-      //if (conf.getBoolean("http.auth.verbose", false)) {
-      //  LOG.setLevel(Level.FINE);
-      //} else {
-      //  LOG.setLevel(Level.WARNING);
-      //}
-    }
+  public HttpAuthenticationFactory(Configuration conf) {
+    setConf(conf);
+  }
 
-    public Configuration getConf() {
-      return conf;
-    }
- 
-    /* ---------------------------------- *
-     * <implementation:Configurable> *
-     * ---------------------------------- */
+  /*
+   * ---------------------------------- * <implementation:Configurable> *
+   * ----------------------------------
+   */
 
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+    // if (conf.getBoolean("http.auth.verbose", false)) {
+    // LOG.setLevel(Level.FINE);
+    // } else {
+    // LOG.setLevel(Level.WARNING);
+    // }
+  }
 
-    public HttpAuthentication findAuthentication(Metadata header) {
+  public Configuration getConf() {
+    return conf;
+  }
 
-        if (header == null) return null;
-        
-    	try {
-			Collection challenge = null;
-			if (header instanceof Metadata) {
-				Object o = header.get(WWW_AUTHENTICATE);
-				if (o instanceof Collection) {
-					challenge = (Collection) o;
-				} else {
-					challenge = new ArrayList();
-					challenge.add(o.toString());
-				}
-			} else {
-				String challengeString = header.get(WWW_AUTHENTICATE); 
-				if (challengeString != null) {
-					challenge = new ArrayList();
-					challenge.add(challengeString);
-				}
-			}
-			if (challenge == null) {
-                                if (LOG.isTraceEnabled()) {
-				  LOG.trace("Authentication challenge is null");
-                                }
-				return null;
-			}
-			
-			Iterator i = challenge.iterator();
-			HttpAuthentication auth = null;
-			while (i.hasNext() && auth == null) {
-				String challengeString = (String)i.next();
-				if (challengeString.equals("NTLM")) {
-				   challengeString="Basic realm=techweb";
-		                  }
-		               
-                                if (LOG.isTraceEnabled()) {  
-		                  LOG.trace("Checking challengeString=" + challengeString);
-                                }
-				auth = HttpBasicAuthentication.getAuthentication(challengeString, conf);
-				if (auth != null) return auth;
-				
-				//TODO Add additional Authentication lookups here
-			}
-		} catch (Exception e) {
-			e.printStackTrace(LogUtil.getErrorStream(LOG));
-		}
+  /*
+   * ---------------------------------- * <implementation:Configurable> *
+   * ----------------------------------
+   */
+
+  public HttpAuthentication findAuthentication(Metadata header) {
+
+    if (header == null)
+      return null;
+
+    try {
+      Collection challenge = null;
+      if (header instanceof Metadata) {
+        Object o = header.get(WWW_AUTHENTICATE);
+        if (o instanceof Collection) {
+          challenge = (Collection) o;
+        } else {
+          challenge = new ArrayList();
+          challenge.add(o.toString());
+        }
+      } else {
+        String challengeString = header.get(WWW_AUTHENTICATE);
+        if (challengeString != null) {
+          challenge = new ArrayList();
+          challenge.add(challengeString);
+        }
+      }
+      if (challenge == null) {
+        if (LOG.isTraceEnabled()) {
+          LOG.trace("Authentication challenge is null");
+        }
         return null;
+      }
+
+      Iterator i = challenge.iterator();
+      HttpAuthentication auth = null;
+      while (i.hasNext() && auth == null) {
+        String challengeString = (String) i.next();
+        if (challengeString.equals("NTLM")) {
+          challengeString = "Basic realm=techweb";
+        }
+
+        if (LOG.isTraceEnabled()) {
+          LOG.trace("Checking challengeString=" + challengeString);
+        }
+        auth = HttpBasicAuthentication.getAuthentication(challengeString, conf);
+        if (auth != null)
+          return auth;
+
+        // TODO Add additional Authentication lookups here
+      }
+    } catch (Exception e) {
+      e.printStackTrace(LogUtil.getErrorStream(LOG));
     }
+    return null;
+  }
 }
Index: src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummyX509TrustManager.java
===================================================================
--- src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummyX509TrustManager.java	(revision 1188252)
+++ src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummyX509TrustManager.java	(working copy)
@@ -1,19 +1,19 @@
 /*
-* Licensed to the Apache Software Foundation (ASF) under one or more
-* contributor license agreements.  See the NOTICE file distributed with
-* this work for additional information regarding copyright ownership.
-* The ASF licenses this file to You under the Apache License, Version 2.0
-* (the "License"); you may not use this file except in compliance with
-* the License.  You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 /*
  * Based on EasyX509TrustManager from commons-httpclient.
  */
@@ -29,59 +29,64 @@
 import javax.net.ssl.TrustManagerFactory;
 import javax.net.ssl.TrustManager;
 import javax.net.ssl.X509TrustManager;
-import org.slf4j.Logger; 
+import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-public class DummyX509TrustManager implements X509TrustManager
-{
-    private X509TrustManager standardTrustManager = null;
+public class DummyX509TrustManager implements X509TrustManager {
+  private X509TrustManager standardTrustManager = null;
 
-    /** Logger object for this class. */
-    private static final Logger LOG = LoggerFactory.getLogger(DummyX509TrustManager.class);
+  /** Logger object for this class. */
+  private static final Logger LOG = LoggerFactory
+      .getLogger(DummyX509TrustManager.class);
 
-    /**
-     * Constructor for DummyX509TrustManager.
-     */
-    public DummyX509TrustManager(KeyStore keystore) throws NoSuchAlgorithmException, KeyStoreException {
-        super();
-        String algo = TrustManagerFactory.getDefaultAlgorithm();
-        TrustManagerFactory factory = TrustManagerFactory.getInstance(algo);
-        factory.init(keystore);
-        TrustManager[] trustmanagers = factory.getTrustManagers();
-        if (trustmanagers.length == 0) {
-            throw new NoSuchAlgorithmException(algo + " trust manager not supported");
-        }
-        this.standardTrustManager = (X509TrustManager)trustmanagers[0];
+  /**
+   * Constructor for DummyX509TrustManager.
+   */
+  public DummyX509TrustManager(KeyStore keystore)
+      throws NoSuchAlgorithmException, KeyStoreException {
+    super();
+    String algo = TrustManagerFactory.getDefaultAlgorithm();
+    TrustManagerFactory factory = TrustManagerFactory.getInstance(algo);
+    factory.init(keystore);
+    TrustManager[] trustmanagers = factory.getTrustManagers();
+    if (trustmanagers.length == 0) {
+      throw new NoSuchAlgorithmException(algo + " trust manager not supported");
     }
+    this.standardTrustManager = (X509TrustManager) trustmanagers[0];
+  }
 
-    /**
-     * @see javax.net.ssl.X509TrustManager#checkClientTrusted(X509Certificate[], String)
-     */
-    public boolean isClientTrusted(X509Certificate[] certificates) {
-        return true;
-    }
+  /**
+   * @see javax.net.ssl.X509TrustManager#checkClientTrusted(X509Certificate[],
+   *      String)
+   */
+  public boolean isClientTrusted(X509Certificate[] certificates) {
+    return true;
+  }
 
-    /**
-     * @see javax.net.ssl.X509TrustManager#checkServerTrusted(X509Certificate[], String)
-     */
-    public boolean isServerTrusted(X509Certificate[] certificates) {
-      return true;
-    }
+  /**
+   * @see javax.net.ssl.X509TrustManager#checkServerTrusted(X509Certificate[],
+   *      String)
+   */
+  public boolean isServerTrusted(X509Certificate[] certificates) {
+    return true;
+  }
 
-    /**
-     * @see javax.net.ssl.X509TrustManager#getAcceptedIssuers()
-     */
-    public X509Certificate[] getAcceptedIssuers() {
-        return this.standardTrustManager.getAcceptedIssuers();
-    }
+  /**
+   * @see javax.net.ssl.X509TrustManager#getAcceptedIssuers()
+   */
+  public X509Certificate[] getAcceptedIssuers() {
+    return this.standardTrustManager.getAcceptedIssuers();
+  }
 
-    public void checkClientTrusted(X509Certificate[] arg0, String arg1) throws CertificateException {
-      // do nothing
-      
-    }
+  public void checkClientTrusted(X509Certificate[] arg0, String arg1)
+      throws CertificateException {
+    // do nothing
 
-    public void checkServerTrusted(X509Certificate[] arg0, String arg1) throws CertificateException {
-      // do nothing
-      
-    }
+  }
+
+  public void checkServerTrusted(X509Certificate[] arg0, String arg1)
+      throws CertificateException {
+    // do nothing
+
+  }
 }
Index: src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationException.java
===================================================================
--- src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationException.java	(revision 1188252)
+++ src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationException.java	(working copy)
@@ -26,40 +26,46 @@
  */
 public class HttpAuthenticationException extends Exception {
 
-    /**
-     *  Constructs a new exception with null as its detail message.
-     */
-    public HttpAuthenticationException() {
-        super();
-    }
+  /**
+   * Constructs a new exception with null as its detail message.
+   */
+  public HttpAuthenticationException() {
+    super();
+  }
 
-    /**
-     * Constructs a new exception with the specified detail message.
-     * 
-     * @param message the detail message. The detail message is saved for later retrieval by the {@link Throwable#getMessage()} method.
-     */
-    public HttpAuthenticationException(String message) {
-        super(message);
-    }
+  /**
+   * Constructs a new exception with the specified detail message.
+   * 
+   * @param message
+   *          the detail message. The detail message is saved for later
+   *          retrieval by the {@link Throwable#getMessage()} method.
+   */
+  public HttpAuthenticationException(String message) {
+    super(message);
+  }
 
-    /**
-     * Constructs a new exception with the specified message and cause.
-     *
-     * @param message the detail message. The detail message is saved for later retrieval by the {@link Throwable#getMessage()} method.
-     * @param cause the cause (use {@link #getCause()} to retrieve the cause)
-     */
-    public HttpAuthenticationException(String message, Throwable cause) {
-        super(message, cause);
-    }
+  /**
+   * Constructs a new exception with the specified message and cause.
+   * 
+   * @param message
+   *          the detail message. The detail message is saved for later
+   *          retrieval by the {@link Throwable#getMessage()} method.
+   * @param cause
+   *          the cause (use {@link #getCause()} to retrieve the cause)
+   */
+  public HttpAuthenticationException(String message, Throwable cause) {
+    super(message, cause);
+  }
 
-    /**
-     * Constructs a new exception with the specified cause and detail message from
-     * given clause if it is not null.
-     * 
-     * @param cause the cause (use {@link #getCause()} to retrieve the cause)
-     */
-    public HttpAuthenticationException(Throwable cause) {
-        super(cause);
-    }
+  /**
+   * Constructs a new exception with the specified cause and detail message from
+   * given clause if it is not null.
+   * 
+   * @param cause
+   *          the cause (use {@link #getCause()} to retrieve the cause)
+   */
+  public HttpAuthenticationException(Throwable cause) {
+    super(cause);
+  }
 
 }
Index: src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java
===================================================================
--- src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java	(revision 1188252)
+++ src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java	(working copy)
@@ -40,7 +40,7 @@
 
 /**
  * An HTTP response.
- *
+ * 
  * @author Susam Pal
  */
 public class HttpResponse implements Response {
@@ -52,18 +52,22 @@
 
   /**
    * Fetches the given <code>url</code> and prepares HTTP response.
-   *
-   * @param http                An instance of the implementation class
-   *                            of this plugin
-   * @param url                 URL to be fetched
-   * @param datum               Crawl data
-   * @param followRedirects     Whether to follow redirects; follows
-   *                            redirect if and only if this is true
-   * @return                    HTTP response
-   * @throws IOException        When an error occurs
+   * 
+   * @param http
+   *          An instance of the implementation class of this plugin
+   * @param url
+   *          URL to be fetched
+   * @param datum
+   *          Crawl data
+   * @param followRedirects
+   *          Whether to follow redirects; follows redirect if and only if this
+   *          is true
+   * @return HTTP response
+   * @throws IOException
+   *           When an error occurs
    */
-  HttpResponse(Http http, URL url, CrawlDatum datum,
-      boolean followRedirects) throws IOException {
+  HttpResponse(Http http, URL url, CrawlDatum datum, boolean followRedirects)
+      throws IOException {
 
     // Prepare GET method for HTTP request
     this.url = url;
@@ -98,7 +102,7 @@
       for (int i = 0; i < heads.length; i++) {
         headers.set(heads[i].getName(), heads[i].getValue());
       }
-      
+
       // Limit download size
       int contentLength = Integer.MAX_VALUE;
       String contentLengthString = headers.get(Response.CONTENT_LENGTH);
@@ -106,12 +110,10 @@
         try {
           contentLength = Integer.parseInt(contentLengthString.trim());
         } catch (NumberFormatException ex) {
-          throw new HttpException("bad content length: " +
-              contentLengthString);
+          throw new HttpException("bad content length: " + contentLengthString);
         }
       }
-      if (http.getMaxContent() >= 0 &&
-          contentLength > http.getMaxContent()) {
+      if (http.getMaxContent() >= 0 && contentLength > http.getMaxContent()) {
         contentLength = http.getMaxContent();
       }
 
@@ -131,7 +133,8 @@
 
         content = out.toByteArray();
       } catch (Exception e) {
-        if (code == 200) throw new IOException(e.toString());
+        if (code == 200)
+          throw new IOException(e.toString());
         // for codes other than 200 OK, we are fine with empty content
       } finally {
         if (in != null) {
@@ -139,16 +142,15 @@
         }
         get.abort();
       }
-      
+
       StringBuilder fetchTrace = null;
       if (Http.LOG.isTraceEnabled()) {
         // Trace message
-        fetchTrace = new StringBuilder("url: " + url +
-            "; status code: " + code +
-            "; bytes received: " + content.length);
+        fetchTrace = new StringBuilder("url: " + url + "; status code: " + code
+            + "; bytes received: " + content.length);
         if (getHeader(Response.CONTENT_LENGTH) != null)
-          fetchTrace.append("; Content-Length: " +
-              getHeader(Response.CONTENT_LENGTH));
+          fetchTrace.append("; Content-Length: "
+              + getHeader(Response.CONTENT_LENGTH));
         if (getHeader(Response.LOCATION) != null)
           fetchTrace.append("; Location: " + getHeader(Response.LOCATION));
       }
@@ -158,8 +160,7 @@
         String contentEncoding = headers.get(Response.CONTENT_ENCODING);
         if (contentEncoding != null && Http.LOG.isTraceEnabled())
           fetchTrace.append("; Content-Encoding: " + contentEncoding);
-        if ("gzip".equals(contentEncoding) ||
-            "x-gzip".equals(contentEncoding)) {
+        if ("gzip".equals(contentEncoding) || "x-gzip".equals(contentEncoding)) {
           content = http.processGzipEncoded(content, url);
           if (Http.LOG.isTraceEnabled())
             fetchTrace.append("; extracted to " + content.length + " bytes");
@@ -179,15 +180,15 @@
     }
   }
 
-  
-  /* ------------------------- *
-   * <implementation:Response> *
-   * ------------------------- */
-  
+  /*
+   * ------------------------- * <implementation:Response> *
+   * -------------------------
+   */
+
   public URL getUrl() {
     return url;
   }
-  
+
   public int getCode() {
     return code;
   }
@@ -195,7 +196,7 @@
   public String getHeader(String name) {
     return headers.get(name);
   }
-  
+
   public Metadata getHeaders() {
     return headers;
   }
@@ -204,8 +205,8 @@
     return content;
   }
 
-  /* -------------------------- *
-   * </implementation:Response> *
-   * -------------------------- */
+  /*
+   * -------------------------- * </implementation:Response> *
+   * --------------------------
+   */
 }
-
Index: src/plugin/urlfilter-domain/src/test/org/apache/nutch/urlfilter/domain/TestDomainURLFilter.java
===================================================================
--- src/plugin/urlfilter-domain/src/test/org/apache/nutch/urlfilter/domain/TestDomainURLFilter.java	(revision 1188252)
+++ src/plugin/urlfilter-domain/src/test/org/apache/nutch/urlfilter/domain/TestDomainURLFilter.java	(working copy)
@@ -23,10 +23,10 @@
 import org.apache.hadoop.conf.Configuration;
 import org.apache.nutch.util.NutchConfiguration;
 
-public class TestDomainURLFilter
-  extends TestCase {
+public class TestDomainURLFilter extends TestCase {
 
-  protected static final Logger LOG = LoggerFactory.getLogger(TestDomainURLFilter.class);
+  protected static final Logger LOG = LoggerFactory
+      .getLogger(TestDomainURLFilter.class);
 
   private final static String SEPARATOR = System.getProperty("file.separator");
   private final static String SAMPLES = System.getProperty("test.data", ".");
@@ -35,8 +35,7 @@
     super(testName);
   }
 
-  public void testFilter()
-    throws Exception {
+  public void testFilter() throws Exception {
 
     String domainFile = SAMPLES + SEPARATOR + "hosts.txt";
     Configuration conf = NutchConfiguration.create();
Index: src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/DomainURLFilter.java
===================================================================
--- src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/DomainURLFilter.java	(revision 1188252)
+++ src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/DomainURLFilter.java	(working copy)
@@ -35,35 +35,48 @@
 import org.apache.nutch.util.domain.DomainSuffix;
 
 /**
- * <p>Filters URLs based on a file containing domain suffixes, domain names, and
+ * <p>
+ * Filters URLs based on a file containing domain suffixes, domain names, and
  * hostnames. Only a url that matches one of the suffixes, domains, or hosts
- * present in the file is allowed.</p>
+ * present in the file is allowed.
+ * </p>
  * 
- * <p>Urls are checked in order of domain suffix, domain name, and hostname
- * against entries in the domain file. The domain file would be setup as follows
- * with one entry per line:
+ * <p>
+ * Urls are checked in order of domain suffix, domain name, and hostname against
+ * entries in the domain file. The domain file would be setup as follows with
+ * one entry per line:
  * 
- * <pre> com apache.org www.apache.org </pre>
+ * <pre>
+ * com apache.org www.apache.org
+ * </pre>
  * 
- * <p>The first line is an example of a filter that would allow all .com
- * domains. The second line allows all urls from apache.org and all of its
- * subdomains such as lucene.apache.org and hadoop.apache.org. The third line
- * would allow only urls from www.apache.org. There is no specific ordering to
- * entries. The entries are from more general to more specific with the more
- * general overridding the more specific.</p>
+ * <p>
+ * The first line is an example of a filter that would allow all .com domains.
+ * The second line allows all urls from apache.org and all of its subdomains
+ * such as lucene.apache.org and hadoop.apache.org. The third line would allow
+ * only urls from www.apache.org. There is no specific ordering to entries. The
+ * entries are from more general to more specific with the more general
+ * overridding the more specific.
+ * </p>
  * 
  * The domain file defaults to domain-urlfilter.txt in the classpath but can be
  * overridden using the:
  * 
- * <ul> <ol>property "urlfilter.domain.file" in ./conf/nutch-*.xml, and</ol>
- * <ol>attribute "file" in plugin.xml of this plugin</ol> </ul>
+ * <ul>
+ * <ol>
+ * property "urlfilter.domain.file" in ./conf/nutch-*.xml, and
+ * </ol>
+ * <ol>
+ * attribute "file" in plugin.xml of this plugin
+ * </ol>
+ * </ul>
  * 
  * the attribute "file" has higher precedence if defined.
  */
-public class DomainURLFilter
-  implements URLFilter {
+public class DomainURLFilter implements URLFilter {
 
-  private static final Logger LOG = LoggerFactory.getLogger(DomainURLFilter.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(DomainURLFilter.class);
 
   // read in attribute "file" of this plugin.
   private static String attributeFile = null;
@@ -71,8 +84,7 @@
   private String domainFile = null;
   private Set<String> domainSet = new LinkedHashSet<String>();
 
-  private void readConfiguration(Reader configReader)
-    throws IOException {
+  private void readConfiguration(Reader configReader) throws IOException {
 
     // read the configuration file, line by line
     BufferedReader reader = new BufferedReader(configReader);
@@ -95,7 +107,8 @@
   /**
    * Constructor that specifies the domain file to use.
    * 
-   * @param domainFile The domain file, overrides domain-urlfilter.text default.
+   * @param domainFile
+   *          The domain file, overrides domain-urlfilter.text default.
    * 
    * @throws IOException
    */
@@ -111,8 +124,8 @@
 
     // get the extensions for domain urlfilter
     String pluginName = "urlfilter-domain";
-    Extension[] extensions = PluginRepository.get(conf).getExtensionPoint(
-      URLFilter.class.getName()).getExtensions();
+    Extension[] extensions = PluginRepository.get(conf)
+        .getExtensionPoint(URLFilter.class.getName()).getExtensions();
     for (int i = 0; i < extensions.length; i++) {
       Extension extension = extensions[i];
       if (extension.getDescriptor().getPluginId().equals(pluginName)) {
@@ -120,32 +133,30 @@
         break;
       }
     }
-    
+
     // handle blank non empty input
     if (attributeFile != null && attributeFile.trim().equals("")) {
       attributeFile = null;
     }
-    
+
     if (attributeFile != null) {
       if (LOG.isInfoEnabled()) {
         LOG.info("Attribute \"file\" is defined for plugin " + pluginName
-          + " as " + attributeFile);
+            + " as " + attributeFile);
       }
-    }
-    else {
+    } else {
       if (LOG.isWarnEnabled()) {
         LOG.warn("Attribute \"file\" is not defined in plugin.xml for plugin "
-          + pluginName);
+            + pluginName);
       }
     }
 
     // domain file and attribute "file" take precedence if defined
-    String file = conf.get("urlfilter.domain.file");    
+    String file = conf.get("urlfilter.domain.file");
     String stringRules = conf.get("urlfilter.domain.rules");
     if (domainFile != null) {
       file = domainFile;
-    }
-    else if (attributeFile != null) {
+    } else if (attributeFile != null) {
       file = attributeFile;
     }
     Reader reader = null;
@@ -159,8 +170,7 @@
         reader = new FileReader(file);
       }
       readConfiguration(reader);
-    }
-    catch (IOException e) {
+    } catch (IOException e) {
       LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
     }
   }
@@ -173,7 +183,7 @@
 
     try {
 
-      // match for suffix, domain, and host in that order.  more general will
+      // match for suffix, domain, and host in that order. more general will
       // override more specific
       String domain = URLUtil.getDomainName(url).toLowerCase().trim();
       String host = URLUtil.getHost(url);
@@ -182,20 +192,19 @@
       if (domainSuffix != null) {
         suffix = domainSuffix.getDomain();
       }
-      
+
       if (domainSet.contains(suffix) || domainSet.contains(domain)
-        || domainSet.contains(host)) {
+          || domainSet.contains(host)) {
         return url;
       }
 
       // doesn't match, don't allow
       return null;
-    }
-    catch (Exception e) {
-      
+    } catch (Exception e) {
+
       // if an error happens, allow the url to pass
       LOG.error("Could not apply filter on url: " + url + "\n"
-        + org.apache.hadoop.util.StringUtils.stringifyException(e));
+          + org.apache.hadoop.util.StringUtils.stringifyException(e));
       return null;
     }
   }
Index: src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/Http.java
===================================================================
--- src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/Http.java	(revision 1188252)
+++ src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/Http.java	(working copy)
@@ -34,23 +34,21 @@
 import org.apache.nutch.protocol.http.api.HttpBase;
 import org.apache.nutch.util.NutchConfiguration;
 
-
 public class Http extends HttpBase {
 
   public static final Logger LOG = LoggerFactory.getLogger(Http.class);
 
-
   public Http() {
     super(LOG);
   }
 
   public void setConf(Configuration conf) {
     super.setConf(conf);
-//    Level logLevel = Level.WARNING;
-//    if (conf.getBoolean("http.verbose", false)) {
-//      logLevel = Level.FINE;
-//    }
-//    LOG.setLevel(logLevel);
+    // Level logLevel = Level.WARNING;
+    // if (conf.getBoolean("http.verbose", false)) {
+    // logLevel = Level.FINE;
+    // }
+    // LOG.setLevel(logLevel);
   }
 
   public static void main(String[] args) throws Exception {
@@ -60,7 +58,7 @@
   }
 
   protected Response getResponse(URL url, CrawlDatum datum, boolean redirect)
-    throws ProtocolException, IOException {
+      throws ProtocolException, IOException {
     return new HttpResponse(this, url, datum);
   }
 
Index: src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java
===================================================================
--- src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java	(revision 1188252)
+++ src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java	(working copy)
@@ -39,11 +39,10 @@
 import org.apache.nutch.protocol.http.api.HttpException;
 import org.apache.nutch.util.LogUtil;
 
-
 /** An HTTP response. */
 public class HttpResponse implements Response {
- 
-  private HttpBase http; 
+
+  private HttpBase http;
   private URL url;
   private String orig;
   private String base;
@@ -51,9 +50,8 @@
   private int code;
   private Metadata headers = new SpellCheckedMetadata();
 
-
   public HttpResponse(HttpBase http, URL url, CrawlDatum datum)
-    throws ProtocolException, IOException {
+      throws ProtocolException, IOException {
 
     this.http = http;
     this.url = url;
@@ -77,23 +75,22 @@
     int port;
     String portString;
     if (url.getPort() == -1) {
-      port= 80;
-      portString= "";
+      port = 80;
+      portString = "";
     } else {
-      port= url.getPort();
-      portString= ":" + port;
+      port = url.getPort();
+      portString = ":" + port;
     }
     Socket socket = null;
 
     try {
-      socket = new Socket();                    // create the socket
+      socket = new Socket(); // create the socket
       socket.setSoTimeout(http.getTimeout());
 
-
       // connect
       String sockHost = http.useProxy() ? http.getProxyHost() : host;
       int sockPort = http.useProxy() ? http.getProxyPort() : port;
-      InetSocketAddress sockAddr= new InetSocketAddress(sockHost, sockPort);
+      InetSocketAddress sockAddr = new InetSocketAddress(sockHost, sockPort);
       socket.connect(sockAddr, http.getTimeout());
 
       // make request
@@ -101,9 +98,9 @@
 
       StringBuffer reqStr = new StringBuffer("GET ");
       if (http.useProxy()) {
-      	reqStr.append(url.getProtocol()+"://"+host+portString+path);
+        reqStr.append(url.getProtocol() + "://" + host + portString + path);
       } else {
-      	reqStr.append(path);
+        reqStr.append(path);
       }
 
       reqStr.append(" HTTP/1.0\r\n");
@@ -117,42 +114,44 @@
 
       String userAgent = http.getUserAgent();
       if ((userAgent == null) || (userAgent.length() == 0)) {
-        if (Http.LOG.isErrorEnabled()) { Http.LOG.error("User-agent is not set!"); }
+        if (Http.LOG.isErrorEnabled()) {
+          Http.LOG.error("User-agent is not set!");
+        }
       } else {
         reqStr.append("User-Agent: ");
         reqStr.append(userAgent);
         reqStr.append("\r\n");
       }
-      
+
       reqStr.append("Accept-Language: ");
       reqStr.append(this.http.getAcceptLanguage());
       reqStr.append("\r\n");
 
       if (datum.getModifiedTime() > 0) {
-        reqStr.append("If-Modified-Since: " + HttpDateFormat.toString(datum.getModifiedTime()));
+        reqStr.append("If-Modified-Since: "
+            + HttpDateFormat.toString(datum.getModifiedTime()));
         reqStr.append("\r\n");
       }
       reqStr.append("\r\n");
-      
-      byte[] reqBytes= reqStr.toString().getBytes();
 
+      byte[] reqBytes = reqStr.toString().getBytes();
+
       req.write(reqBytes);
       req.flush();
-        
-      PushbackInputStream in =                  // process response
-        new PushbackInputStream(
-          new BufferedInputStream(socket.getInputStream(), Http.BUFFER_SIZE), 
-          Http.BUFFER_SIZE) ;
 
+      PushbackInputStream in = // process response
+      new PushbackInputStream(new BufferedInputStream(socket.getInputStream(),
+          Http.BUFFER_SIZE), Http.BUFFER_SIZE);
+
       StringBuffer line = new StringBuffer();
 
-      boolean haveSeenNonContinueStatus= false;
+      boolean haveSeenNonContinueStatus = false;
       while (!haveSeenNonContinueStatus) {
         // parse status code line
-        this.code = parseStatusLine(in, line); 
+        this.code = parseStatusLine(in, line);
         // parse headers
         parseHeaders(in, line);
-        haveSeenNonContinueStatus= code != 100; // 100 is "Continue"
+        haveSeenNonContinueStatus = code != 100; // 100 is "Continue"
       }
 
       readPlainContent(in);
@@ -161,7 +160,7 @@
       if ("gzip".equals(contentEncoding) || "x-gzip".equals(contentEncoding)) {
         content = http.processGzipEncoded(content, url);
       } else if ("deflate".equals(contentEncoding)) {
-       content = http.processDeflateEncoded(content, url);
+        content = http.processDeflateEncoded(content, url);
       } else {
         if (Http.LOG.isTraceEnabled()) {
           Http.LOG.trace("fetched " + content.length + " bytes from " + url);
@@ -175,15 +174,15 @@
 
   }
 
-  
-  /* ------------------------- *
-   * <implementation:Response> *
-   * ------------------------- */
-  
+  /*
+   * ------------------------- * <implementation:Response> *
+   * -------------------------
+   */
+
   public URL getUrl() {
     return url;
   }
-  
+
   public int getCode() {
     return code;
   }
@@ -191,7 +190,7 @@
   public String getHeader(String name) {
     return headers.get(name);
   }
-  
+
   public Metadata getHeaders() {
     return headers;
   }
@@ -200,33 +199,35 @@
     return content;
   }
 
-  /* ------------------------- *
-   * <implementation:Response> *
-   * ------------------------- */
-  
+  /*
+   * ------------------------- * <implementation:Response> *
+   * -------------------------
+   */
 
-  private void readPlainContent(InputStream in) 
-    throws HttpException, IOException {
+  private void readPlainContent(InputStream in) throws HttpException,
+      IOException {
 
-    int contentLength = Integer.MAX_VALUE;    // get content length
+    int contentLength = Integer.MAX_VALUE; // get content length
     String contentLengthString = headers.get(Response.CONTENT_LENGTH);
     if (contentLengthString != null) {
       contentLengthString = contentLengthString.trim();
       try {
-        if (!contentLengthString.isEmpty()) 
+        if (!contentLengthString.isEmpty())
           contentLength = Integer.parseInt(contentLengthString);
       } catch (NumberFormatException e) {
-        throw new HttpException("bad content length: "+contentLengthString);
+        throw new HttpException("bad content length: " + contentLengthString);
       }
     }
-    if (http.getMaxContent() >= 0
-      && contentLength > http.getMaxContent())   // limit download size
-      contentLength  = http.getMaxContent();
+    if (http.getMaxContent() >= 0 && contentLength > http.getMaxContent()) // limit
+      // download
+      // size
+      contentLength = http.getMaxContent();
 
     ByteArrayOutputStream out = new ByteArrayOutputStream(Http.BUFFER_SIZE);
     byte[] bytes = new byte[Http.BUFFER_SIZE];
-    int length = 0;                           // read content
-    for (int i = in.read(bytes); i != -1 && length + i <= contentLength; i = in.read(bytes)) {
+    int length = 0; // read content
+    for (int i = in.read(bytes); i != -1 && length + i <= contentLength; i = in
+        .read(bytes)) {
 
       out.write(bytes, 0, i);
       length += i;
@@ -234,11 +235,10 @@
     content = out.toByteArray();
   }
 
-  private void readChunkedContent(PushbackInputStream in,  
-                                  StringBuffer line) 
-    throws HttpException, IOException {
-    boolean doneChunks= false;
-    int contentBytesRead= 0;
+  private void readChunkedContent(PushbackInputStream in, StringBuffer line)
+      throws HttpException, IOException {
+    boolean doneChunks = false;
+    int contentBytesRead = 0;
     byte[] bytes = new byte[Http.BUFFER_SIZE];
     ByteArrayOutputStream out = new ByteArrayOutputStream(Http.BUFFER_SIZE);
 
@@ -250,51 +250,53 @@
       readLine(in, line, false);
 
       String chunkLenStr;
-      // if (LOG.isTraceEnabled()) { LOG.trace("chunk-header: '" + line + "'"); }
+      // if (LOG.isTraceEnabled()) { LOG.trace("chunk-header: '" + line +
+      // "'"); }
 
-      int pos= line.indexOf(";");
+      int pos = line.indexOf(";");
       if (pos < 0) {
-        chunkLenStr= line.toString();
+        chunkLenStr = line.toString();
       } else {
-        chunkLenStr= line.substring(0, pos);
-        // if (LOG.isTraceEnabled()) { LOG.trace("got chunk-ext: " + line.substring(pos+1)); }
+        chunkLenStr = line.substring(0, pos);
+        // if (LOG.isTraceEnabled()) { LOG.trace("got chunk-ext: " +
+        // line.substring(pos+1)); }
       }
-      chunkLenStr= chunkLenStr.trim();
+      chunkLenStr = chunkLenStr.trim();
       int chunkLen;
       try {
-        chunkLen= Integer.parseInt(chunkLenStr, 16);
-      } catch (NumberFormatException e){ 
-        throw new HttpException("bad chunk length: "+line.toString());
+        chunkLen = Integer.parseInt(chunkLenStr, 16);
+      } catch (NumberFormatException e) {
+        throw new HttpException("bad chunk length: " + line.toString());
       }
 
       if (chunkLen == 0) {
-        doneChunks= true;
+        doneChunks = true;
         break;
       }
 
-      if ( (contentBytesRead + chunkLen) > http.getMaxContent() )
-        chunkLen= http.getMaxContent() - contentBytesRead;
+      if ((contentBytesRead + chunkLen) > http.getMaxContent())
+        chunkLen = http.getMaxContent() - contentBytesRead;
 
       // read one chunk
-      int chunkBytesRead= 0;
+      int chunkBytesRead = 0;
       while (chunkBytesRead < chunkLen) {
 
-        int toRead= (chunkLen - chunkBytesRead) < Http.BUFFER_SIZE ?
-                    (chunkLen - chunkBytesRead) : Http.BUFFER_SIZE;
-        int len= in.read(bytes, 0, toRead);
+        int toRead = (chunkLen - chunkBytesRead) < Http.BUFFER_SIZE ? (chunkLen - chunkBytesRead)
+            : Http.BUFFER_SIZE;
+        int len = in.read(bytes, 0, toRead);
 
-        if (len == -1) 
+        if (len == -1)
           throw new HttpException("chunk eof after " + contentBytesRead
-                                      + " bytes in successful chunks"
-                                      + " and " + chunkBytesRead 
-                                      + " in current chunk");
+              + " bytes in successful chunks" + " and " + chunkBytesRead
+              + " in current chunk");
 
         // DANGER!!! Will printed GZIPed stuff right to your
         // terminal!
-        // if (LOG.isTraceEnabled()) { LOG.trace("read: " +  new String(bytes, 0, len)); }
+        // if (LOG.isTraceEnabled()) { LOG.trace("read: " + new
+        // String(bytes, 0, len)); }
 
         out.write(bytes, 0, len);
-        chunkBytesRead+= len;  
+        chunkBytesRead += len;
       }
 
       readLine(in, line, false);
@@ -302,7 +304,7 @@
     }
 
     if (!doneChunks) {
-      if (contentBytesRead != http.getMaxContent()) 
+      if (contentBytesRead != http.getMaxContent())
         throw new HttpException("chunk eof: !doneChunk && didn't max out");
       return;
     }
@@ -313,36 +315,35 @@
   }
 
   private int parseStatusLine(PushbackInputStream in, StringBuffer line)
-    throws IOException, HttpException {
+      throws IOException, HttpException {
     readLine(in, line, false);
 
     int codeStart = line.indexOf(" ");
-    int codeEnd = line.indexOf(" ", codeStart+1);
+    int codeEnd = line.indexOf(" ", codeStart + 1);
 
     // handle lines with no plaintext result code, ie:
     // "HTTP/1.1 200" vs "HTTP/1.1 200 OK"
-    if (codeEnd == -1) 
-      codeEnd= line.length();
+    if (codeEnd == -1)
+      codeEnd = line.length();
 
     int code;
     try {
-      code= Integer.parseInt(line.substring(codeStart+1, codeEnd));
+      code = Integer.parseInt(line.substring(codeStart + 1, codeEnd));
     } catch (NumberFormatException e) {
-      throw new HttpException("bad status line '" + line 
-                              + "': " + e.getMessage(), e);
+      throw new HttpException("bad status line '" + line + "': "
+          + e.getMessage(), e);
     }
 
     return code;
   }
 
+  private void processHeaderLine(StringBuffer line) throws IOException,
+      HttpException {
 
-  private void processHeaderLine(StringBuffer line)
-    throws IOException, HttpException {
-
-    int colonIndex = line.indexOf(":");       // key is up to colon
+    int colonIndex = line.indexOf(":"); // key is up to colon
     if (colonIndex == -1) {
       int i;
-      for (i= 0; i < line.length(); i++)
+      for (i = 0; i < line.length(); i++)
         if (!Character.isWhitespace(line.charAt(i)))
           break;
       if (i == line.length())
@@ -351,7 +352,7 @@
     }
     String key = line.substring(0, colonIndex);
 
-    int valueStart = colonIndex+1;            // skip whitespace
+    int valueStart = colonIndex + 1; // skip whitespace
     while (valueStart < line.length()) {
       int c = line.charAt(valueStart);
       if (c != ' ' && c != '\t')
@@ -362,28 +363,27 @@
     headers.set(key, value);
   }
 
-
   // Adds headers to our headers Metadata
   private void parseHeaders(PushbackInputStream in, StringBuffer line)
-    throws IOException, HttpException {
+      throws IOException, HttpException {
 
     while (readLine(in, line, true) != 0) {
 
       // handle HTTP responses with missing blank line after headers
       int pos;
-      if ( ((pos= line.indexOf("<!DOCTYPE")) != -1) 
-           || ((pos= line.indexOf("<HTML")) != -1) 
-           || ((pos= line.indexOf("<html")) != -1) ) {
+      if (((pos = line.indexOf("<!DOCTYPE")) != -1)
+          || ((pos = line.indexOf("<HTML")) != -1)
+          || ((pos = line.indexOf("<html")) != -1)) {
 
         in.unread(line.substring(pos).getBytes("UTF-8"));
         line.setLength(pos);
 
         try {
-            //TODO: (CM) We don't know the header names here
-            //since we're just handling them generically. It would
-            //be nice to provide some sort of mapping function here
-            //for the returned header names to the standard metadata
-            //names in the ParseData class
+          // TODO: (CM) We don't know the header names here
+          // since we're just handling them generically. It would
+          // be nice to provide some sort of mapping function here
+          // for the returned header names to the standard metadata
+          // names in the ParseData class
           processHeaderLine(line);
         } catch (Exception e) {
           // fixme:
@@ -397,29 +397,29 @@
   }
 
   private static int readLine(PushbackInputStream in, StringBuffer line,
-                      boolean allowContinuedLine)
-    throws IOException {
+      boolean allowContinuedLine) throws IOException {
     line.setLength(0);
     for (int c = in.read(); c != -1; c = in.read()) {
       switch (c) {
-        case '\r':
-          if (peek(in) == '\n') {
-            in.read();
-          }
-        case '\n': 
-          if (line.length() > 0) {
-            // at EOL -- check for continued line if the current
-            // (possibly continued) line wasn't blank
-            if (allowContinuedLine) 
-              switch (peek(in)) {
-                case ' ' : case '\t':                   // line is continued
-                  in.read();
-                  continue;
-              }
-          }
-          return line.length();      // else complete
-        default :
-          line.append((char)c);
+      case '\r':
+        if (peek(in) == '\n') {
+          in.read();
+        }
+      case '\n':
+        if (line.length() > 0) {
+          // at EOL -- check for continued line if the current
+          // (possibly continued) line wasn't blank
+          if (allowContinuedLine)
+            switch (peek(in)) {
+            case ' ':
+            case '\t': // line is continued
+              in.read();
+              continue;
+            }
+        }
+        return line.length(); // else complete
+      default:
+        line.append((char) c);
       }
     }
     throw new EOFException();