Index: eclipse-codeformat.xml
===================================================================
--- eclipse-codeformat.xml	(revision 0)
+++ eclipse-codeformat.xml	(revision 0)
@@ -0,0 +1,269 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<profiles version="11">
+<profile kind="CodeFormatterProfile" name="Nutch" version="11">
+<setting id="org.eclipse.jdt.core.formatter.comment.insert_new_line_before_root_tags" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_annotation" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_parameters" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_type_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_arguments" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_anonymous_type_declaration" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_case" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_brace_in_array_initializer" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_annotation_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_closing_brace_in_array_initializer" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_annotation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_field" value="0"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_while" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_annotation_type_member_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_else_in_if_statement" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_prefix_operator" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.keep_else_statement_on_same_line" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_ellipsis" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.insert_new_line_for_parameter" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_annotation_type_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_breaks_compare_to_cases" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_multiple_fields" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_conditional_expression" value="80"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_for" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_binary_operator" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_question_in_wildcard" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_array_initializer" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_enum_constant" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_finally_in_try_statement" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_local_variable" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_catch_in_try_statement" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_while" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_after_package" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_parameters" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.continuation_indentation" value="2"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_postfix_operator" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_method_invocation" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_arguments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_superinterfaces" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_new_chunk" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_binary_operator" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_package" value="0"/>
+<setting id="org.eclipse.jdt.core.compiler.source" value="1.5"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_constant_arguments" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_constructor_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_arguments" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.format_line_comments" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_declarations" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.join_wrapped_lines" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_block" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_explicit_constructor_call" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_invocation_arguments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_member_type" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.align_type_members_on_columns" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_enum_constant" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_for" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_method_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_selector_in_method_invocation" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_switch" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_unary_operator" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_case" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.indent_parameter_description" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_switch" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_parameters" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_type_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_block_comment" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.lineSplit" value="80"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_if" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_brackets_in_array_type_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_parenthesized_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_explicitconstructorcall_arguments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_constructor_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_first_class_body_declaration" value="0"/>
+<setting id="org.eclipse.jdt.core.formatter.indentation.size" value="2"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_enum_constant" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_superclass_in_type_declaration" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_assignment" value="0"/>
+<setting id="org.eclipse.jdt.core.compiler.problem.assertIdentifier" value="error"/>
+<setting id="org.eclipse.jdt.core.formatter.tabulation.char" value="space"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_parameters" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_prefix_operator" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_statements_compare_to_body" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_method" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.format_guardian_clause_on_one_line" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_for" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_cast" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_parameters_in_constructor_declaration" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_labeled_statement" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_annotation_type_declaration" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_method_body" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_invocation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_allocation_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_constant" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_annotation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation_type_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_throws" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_if" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_switch" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_throws" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_return" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_question_in_conditional" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_question_in_wildcard" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_allocation_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_throw" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_arguments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.compiler.problem.enumIdentifier" value="error"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_switch" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_ellipsis" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_block" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_inits" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_method_declaration" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.compact_else_if" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_array_initializer" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_increments" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_enum_constant" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.indent_root_tags" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_declarations" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_explicitconstructorcall_arguments" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_switch" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_superinterfaces" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_parameters" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_allocation_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.tabulation.size" value="2"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_type_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_opening_brace_in_array_initializer" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_brace_in_block" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_constant" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_arguments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_constructor_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_if" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_throws" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_javadoc_comment" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_constructor_declaration" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_assignment_operator" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_assignment_operator" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_empty_lines" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_synchronized" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_paren_in_cast" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_parameters" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_block_in_case" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.number_of_empty_lines_to_preserve" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_catch" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_constructor_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_invocation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_and_in_type_parameter" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_qualified_allocation_expression" value="16"/>
+<setting id="org.eclipse.jdt.core.compiler.compliance" value="1.5"/>
+<setting id="org.eclipse.jdt.core.formatter.continuation_indentation_for_array_initializer" value="2"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_brackets_in_array_allocation_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_at_in_annotation_type_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_cast" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_unary_operator" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_parameterized_type_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_anonymous_type_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.keep_empty_array_initializer_on_one_line" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.keep_imple_if_on_one_line" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_parameters" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_parameters" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_at_end_of_file_if_missing" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_for" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_labeled_statement" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_parameterized_type_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_type_declaration" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_binary_expression" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_enum_declaration" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_while" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode" value="enabled"/>
+<setting id="org.eclipse.jdt.core.formatter.put_empty_statement_on_new_line" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_parameter" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_parameters" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_invocation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_while_in_do_statement" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.format_javadoc_comments" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.line_length" value="80"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_between_import_groups" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_constant_arguments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_semicolon" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_constructor_declaration" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.number_of_blank_lines_at_beginning_of_method_body" value="0"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_conditional" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_type_header" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation_type_member_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.wrap_before_binary_operator" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_declaration_header" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_between_type_declarations" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_synchronized" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_statements_compare_to_block" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.join_lines_in_comments" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_question_in_conditional" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_field_declarations" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_compact_if" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_inits" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_cases" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_array_initializer" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_default" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_and_in_type_parameter" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_constructor_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_assert" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_before_imports" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.format_html" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_method_declaration" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_parameters" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_allocation_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_anonymous_type_declaration" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_conditional" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_parameterized_type_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_for" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_postfix_operator" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.format_source_code" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_synchronized" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_allocation_expression" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_throws" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_parameters_in_method_declaration" value="16"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_brace_in_array_initializer" value="insert"/>
+<setting id="org.eclipse.jdt.core.compiler.codegen.targetPlatform" value="1.5"/>
+<setting id="org.eclipse.jdt.core.formatter.use_tabs_only_for_leading_indentations" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_member" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.format_header" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.comment.format_block_comments" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_enum_constant" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.alignment_for_enum_constants" value="0"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_block" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_annotation_declaration_header" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_parenthesized_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_parenthesized_expression" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_catch" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_local_declarations" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_switch" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_increments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_invocation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_assert" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.brace_position_for_type_declaration" value="end_of_line"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_array_initializer" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_braces_in_array_initializer" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_declaration" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_for" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_catch" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_parameterized_type_reference" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_field_declarations" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_annotation" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_parameterized_type_reference" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_invocation_arguments" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.blank_lines_after_imports" value="1"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_local_declarations" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_constant_header" value="true"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_for" value="insert"/>
+<setting id="org.eclipse.jdt.core.formatter.never_indent_line_comments_on_first_column" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_arguments" value="do not insert"/>
+<setting id="org.eclipse.jdt.core.formatter.never_indent_block_comments_on_first_column" value="false"/>
+<setting id="org.eclipse.jdt.core.formatter.keep_then_statement_on_same_line" value="false"/>
+</profile>
+</profiles>
Index: src/test/org/apache/nutch/fetcher/TestFetcher.java
===================================================================
--- src/test/org/apache/nutch/fetcher/TestFetcher.java	(revision 1188252)
+++ src/test/org/apache/nutch/fetcher/TestFetcher.java	(working copy)
@@ -37,18 +37,15 @@
 import junit.framework.TestCase;
 
 /**
- * Basic fetcher test
- * 1. generate seedlist
- * 2. inject
- * 3. generate
- * 3. fetch
- * 4. Verify contents
+ * Basic fetcher test 1. generate seedlist 2. inject 3. generate 3. fetch 4.
+ * Verify contents
+ * 
  * @author nutch-dev <nutch-dev at lucene.apache.org>
- *
+ * 
  */
 public class TestFetcher extends TestCase {
 
-  final static Path testdir=new Path("build/test/fetch-test");
+  final static Path testdir = new Path("build/test/fetch-test");
   Configuration conf;
   FileSystem fs;
   Path crawldbPath;
@@ -56,109 +53,115 @@
   Path urlPath;
   Server server;
 
-  protected void setUp() throws Exception{
-    conf=CrawlDBTestUtil.createConfiguration();
-    fs=FileSystem.get(conf);
+  protected void setUp() throws Exception {
+    conf = CrawlDBTestUtil.createConfiguration();
+    fs = FileSystem.get(conf);
     fs.delete(testdir, true);
-    urlPath=new Path(testdir,"urls");
-    crawldbPath=new Path(testdir,"crawldb");
-    segmentsPath=new Path(testdir,"segments");
-    server=CrawlDBTestUtil.getServer(conf.getInt("content.server.port",50000), "build/test/data/fetch-test-site");
+    urlPath = new Path(testdir, "urls");
+    crawldbPath = new Path(testdir, "crawldb");
+    segmentsPath = new Path(testdir, "segments");
+    server = CrawlDBTestUtil.getServer(
+        conf.getInt("content.server.port", 50000),
+        "build/test/data/fetch-test-site");
     server.start();
   }
 
-  protected void tearDown() throws Exception{
+  protected void tearDown() throws Exception {
     server.stop();
     fs.delete(testdir, true);
   }
-  
+
   public void testFetch() throws IOException {
-    
-    //generate seedlist
-    ArrayList<String> urls=new ArrayList<String>();
-    
-    addUrl(urls,"index.html");
-    addUrl(urls,"pagea.html");
-    addUrl(urls,"pageb.html");
-    addUrl(urls,"dup_of_pagea.html");
-    addUrl(urls,"nested_spider_trap.html");
-    addUrl(urls,"exception.html");
-    
+
+    // generate seedlist
+    ArrayList<String> urls = new ArrayList<String>();
+
+    addUrl(urls, "index.html");
+    addUrl(urls, "pagea.html");
+    addUrl(urls, "pageb.html");
+    addUrl(urls, "dup_of_pagea.html");
+    addUrl(urls, "nested_spider_trap.html");
+    addUrl(urls, "exception.html");
+
     CrawlDBTestUtil.generateSeedList(fs, urlPath, urls);
-    
-    //inject
-    Injector injector=new Injector(conf);
+
+    // inject
+    Injector injector = new Injector(conf);
     injector.inject(crawldbPath, urlPath);
 
-    //generate
-    Generator g=new Generator(conf);
+    // generate
+    Generator g = new Generator(conf);
     Path[] generatedSegment = g.generate(crawldbPath, segmentsPath, 1,
         Long.MAX_VALUE, Long.MAX_VALUE, false, false);
 
-    long time=System.currentTimeMillis();
-    //fetch
-    Fetcher fetcher=new Fetcher(conf);
+    long time = System.currentTimeMillis();
+    // fetch
+    Fetcher fetcher = new Fetcher(conf);
 
     // Set fetcher.parse to true
     conf.setBoolean("fetcher.parse", true);
 
     fetcher.fetch(generatedSegment[0], 1);
 
-    time=System.currentTimeMillis()-time;
-    
-    //verify politeness, time taken should be more than (num_of_pages +1)*delay
-    int minimumTime=(int) ((urls.size()+1)*1000*conf.getFloat("fetcher.server.delay",5));
+    time = System.currentTimeMillis() - time;
+
+    // verify politeness, time taken should be more than (num_of_pages +1)*delay
+    int minimumTime = (int) ((urls.size() + 1) * 1000 * conf.getFloat(
+        "fetcher.server.delay", 5));
     assertTrue(time > minimumTime);
-    
-    //verify content
-    Path content=new Path(new Path(generatedSegment[0], Content.DIR_NAME),"part-00000/data");
-    SequenceFile.Reader reader=new SequenceFile.Reader(fs, content, conf);
-    
-    ArrayList<String> handledurls=new ArrayList<String>();
-    
-    READ_CONTENT:
-      do {
-      Text key=new Text();
-      Content value=new Content();
-      if(!reader.next(key, value)) break READ_CONTENT;
-      String contentString=new String(value.getContent());
-      if(contentString.indexOf("Nutch fetcher test page")!=-1) { 
+
+    // verify content
+    Path content = new Path(new Path(generatedSegment[0], Content.DIR_NAME),
+        "part-00000/data");
+    SequenceFile.Reader reader = new SequenceFile.Reader(fs, content, conf);
+
+    ArrayList<String> handledurls = new ArrayList<String>();
+
+    READ_CONTENT: do {
+      Text key = new Text();
+      Content value = new Content();
+      if (!reader.next(key, value))
+        break READ_CONTENT;
+      String contentString = new String(value.getContent());
+      if (contentString.indexOf("Nutch fetcher test page") != -1) {
         handledurls.add(key.toString());
       }
-    } while(true);
+    } while (true);
 
     reader.close();
 
     Collections.sort(urls);
     Collections.sort(handledurls);
 
-    //verify that enough pages were handled
+    // verify that enough pages were handled
     assertEquals(urls.size(), handledurls.size());
 
-    //verify that correct pages were handled
+    // verify that correct pages were handled
     assertTrue(handledurls.containsAll(urls));
     assertTrue(urls.containsAll(handledurls));
-    
+
     handledurls.clear();
 
-    //verify parse data
-    Path parseData = new Path(new Path(generatedSegment[0], ParseData.DIR_NAME),"part-00000/data");
+    // verify parse data
+    Path parseData = new Path(
+        new Path(generatedSegment[0], ParseData.DIR_NAME), "part-00000/data");
     reader = new SequenceFile.Reader(fs, parseData, conf);
-    
-    READ_PARSE_DATA:
-      do {
+
+    READ_PARSE_DATA: do {
       Text key = new Text();
       ParseData value = new ParseData();
-      if(!reader.next(key, value)) break READ_PARSE_DATA;
-      // make sure they all contain "nutch.segment.name" and "nutch.content.digest" 
+      if (!reader.next(key, value))
+        break READ_PARSE_DATA;
+      // make sure they all contain "nutch.segment.name" and
+      // "nutch.content.digest"
       // keys in parse metadata
       Metadata contentMeta = value.getContentMeta();
-      if (contentMeta.get(Nutch.SEGMENT_NAME_KEY) != null 
-            && contentMeta.get(Nutch.SIGNATURE_KEY) != null) {
+      if (contentMeta.get(Nutch.SEGMENT_NAME_KEY) != null
+          && contentMeta.get(Nutch.SIGNATURE_KEY) != null) {
         handledurls.add(key.toString());
       }
-    } while(true);
-    
+    } while (true);
+
     Collections.sort(handledurls);
 
     assertEquals(urls.size(), handledurls.size());
@@ -168,9 +171,10 @@
   }
 
   private void addUrl(ArrayList<String> urls, String page) {
-    urls.add("http://127.0.0.1:" + server.getConnectors()[0].getPort() + "/" + page);
+    urls.add("http://127.0.0.1:" + server.getConnectors()[0].getPort() + "/"
+        + page);
   }
-  
+
   public void testAgentNameCheck() {
 
     boolean failedNoAgentName = false;
Index: src/test/org/apache/nutch/metadata/TestMetadata.java
===================================================================
--- src/test/org/apache/nutch/metadata/TestMetadata.java	(revision 1188252)
+++ src/test/org/apache/nutch/metadata/TestMetadata.java	(working copy)
@@ -45,7 +45,7 @@
   public static void main(String[] args) {
     TestRunner.run(suite());
   }
-  
+
   /**
    * Test to ensure that only non-null values get written when the
    * {@link Metadata} object is written using a Writeable.
@@ -282,4 +282,3 @@
   }
 
 }
-
Index: src/test/org/apache/nutch/metadata/TestSpellCheckedMetadata.java
===================================================================
--- src/test/org/apache/nutch/metadata/TestSpellCheckedMetadata.java	(revision 1188252)
+++ src/test/org/apache/nutch/metadata/TestSpellCheckedMetadata.java	(working copy)
@@ -30,7 +30,7 @@
 /**
  * JUnit based tests of class
  * {@link org.apache.nutch.metadata.SpellCheckedMetadata}.
- *
+ * 
  * @author Chris Mattmann
  * @author J&eacute;r&ocirc;me Charron
  */
@@ -52,20 +52,20 @@
 
   /** Test for the <code>getNormalizedName(String)</code> method. */
   public void testGetNormalizedName() {
-    assertEquals("Content-Type", SpellCheckedMetadata
-        .getNormalizedName("Content-Type"));
-    assertEquals("Content-Type", SpellCheckedMetadata
-        .getNormalizedName("ContentType"));
-    assertEquals("Content-Type", SpellCheckedMetadata
-        .getNormalizedName("Content-type"));
-    assertEquals("Content-Type", SpellCheckedMetadata
-        .getNormalizedName("contenttype"));
-    assertEquals("Content-Type", SpellCheckedMetadata
-        .getNormalizedName("contentype"));
-    assertEquals("Content-Type", SpellCheckedMetadata
-        .getNormalizedName("contntype"));
+    assertEquals("Content-Type",
+        SpellCheckedMetadata.getNormalizedName("Content-Type"));
+    assertEquals("Content-Type",
+        SpellCheckedMetadata.getNormalizedName("ContentType"));
+    assertEquals("Content-Type",
+        SpellCheckedMetadata.getNormalizedName("Content-type"));
+    assertEquals("Content-Type",
+        SpellCheckedMetadata.getNormalizedName("contenttype"));
+    assertEquals("Content-Type",
+        SpellCheckedMetadata.getNormalizedName("contentype"));
+    assertEquals("Content-Type",
+        SpellCheckedMetadata.getNormalizedName("contntype"));
   }
-  
+
   /** Test for the <code>add(String, String)</code> method. */
   public void testAdd() {
     String[] values = null;
@@ -256,8 +256,8 @@
   }
 
   /**
-   * IO Test method, usable only when you plan to do changes in metadata
-   * to measure relative performance impact.
+   * IO Test method, usable only when you plan to do changes in metadata to
+   * measure relative performance impact.
    */
   public final void testHandlingSpeed() {
     SpellCheckedMetadata result;
Index: src/test/org/apache/nutch/protocol/TestProtocolFactory.java
===================================================================
--- src/test/org/apache/nutch/protocol/TestProtocolFactory.java	(revision 1188252)
+++ src/test/org/apache/nutch/protocol/TestProtocolFactory.java	(working copy)
@@ -26,55 +26,56 @@
 
   Configuration conf;
   ProtocolFactory factory;
-  
+
   protected void setUp() throws Exception {
     conf = NutchConfiguration.create();
     conf.set("plugin.includes", ".*");
     conf.set("http.agent.name", "test-bot");
-    factory=new ProtocolFactory(conf);
+    factory = new ProtocolFactory(conf);
   }
 
-  public void testGetProtocol(){
+  public void testGetProtocol() {
 
-    //non existing protocol
+    // non existing protocol
     try {
       factory.getProtocol("xyzxyz://somehost");
       fail("Must throw ProtocolNotFound");
     } catch (ProtocolNotFound e) {
-      //all is ok
-    } catch (Exception ex){
+      // all is ok
+    } catch (Exception ex) {
       fail("Must not throw any other exception");
     }
-    
-    Protocol httpProtocol=null;
-    
-    //existing protocol
+
+    Protocol httpProtocol = null;
+
+    // existing protocol
     try {
-      httpProtocol=factory.getProtocol("http://somehost");
+      httpProtocol = factory.getProtocol("http://somehost");
       assertNotNull(httpProtocol);
-    } catch (Exception ex){
+    } catch (Exception ex) {
       fail("Must not throw any other exception");
     }
 
-    //cache key
-    Object protocol = ObjectCache.get(conf).getObject(Protocol.X_POINT_ID + "http");
+    // cache key
+    Object protocol = ObjectCache.get(conf).getObject(
+        Protocol.X_POINT_ID + "http");
     assertNotNull(protocol);
     assertEquals(httpProtocol, protocol);
-    
-    //test same object instance
+
+    // test same object instance
     try {
-      assertTrue(httpProtocol==factory.getProtocol("http://somehost"));
+      assertTrue(httpProtocol == factory.getProtocol("http://somehost"));
     } catch (ProtocolNotFound e) {
       fail("Must not throw any exception");
     }
   }
-  
-  public void testContains(){
+
+  public void testContains() {
     assertTrue(factory.contains("http", "http"));
     assertTrue(factory.contains("http", "http,ftp"));
     assertTrue(factory.contains("http", "   http ,   ftp"));
     assertTrue(factory.contains("smb", "ftp,smb,http"));
     assertFalse(factory.contains("smb", "smbb"));
   }
-  
+
 }
Index: src/test/org/apache/nutch/protocol/TestContent.java
===================================================================
--- src/test/org/apache/nutch/protocol/TestContent.java	(revision 1188252)
+++ src/test/org/apache/nutch/protocol/TestContent.java	(working copy)
@@ -26,14 +26,15 @@
 
 import junit.framework.TestCase;
 
-
 /** Unit tests for Content. */
 
 public class TestContent extends TestCase {
 
   private static Configuration conf = NutchConfiguration.create();
 
-  public TestContent(String name) { super(name); }
+  public TestContent(String name) {
+    super(name);
+  }
 
   public void testContent() throws Exception {
 
@@ -46,7 +47,7 @@
     metaData.add("Content-Type", "text/html");
 
     Content r = new Content(url, url, page.getBytes("UTF8"), "text/html",
-                            metaData, conf);
+        metaData, conf);
 
     WritableTestUtils.testWritable(r);
     assertEquals("text/html", r.getMetadata().get("Content-Type"));
@@ -59,52 +60,36 @@
     Content c = null;
     Metadata p = new Metadata();
 
-    c = new Content("http://www.foo.com/",
-                    "http://www.foo.com/",
-                    "".getBytes("UTF8"),
-                    "text/html; charset=UTF-8", p, conf);
+    c = new Content("http://www.foo.com/", "http://www.foo.com/",
+        "".getBytes("UTF8"), "text/html; charset=UTF-8", p, conf);
     assertEquals("text/html", c.getContentType());
 
-    c = new Content("http://www.foo.com/foo.html",
-                    "http://www.foo.com/",
-                    "".getBytes("UTF8"),
-                    "", p, conf);
+    c = new Content("http://www.foo.com/foo.html", "http://www.foo.com/",
+        "".getBytes("UTF8"), "", p, conf);
     assertEquals("text/html", c.getContentType());
 
-    c = new Content("http://www.foo.com/foo.html",
-                    "http://www.foo.com/",
-                    "".getBytes("UTF8"),
-                    null, p, conf);
+    c = new Content("http://www.foo.com/foo.html", "http://www.foo.com/",
+        "".getBytes("UTF8"), null, p, conf);
     assertEquals("text/html", c.getContentType());
 
-    c = new Content("http://www.foo.com/",
-                    "http://www.foo.com/",
-                    "<html></html>".getBytes("UTF8"),
-                    "", p, conf);
+    c = new Content("http://www.foo.com/", "http://www.foo.com/",
+        "<html></html>".getBytes("UTF8"), "", p, conf);
     assertEquals("text/html", c.getContentType());
 
-    c = new Content("http://www.foo.com/foo.html",
-                    "http://www.foo.com/",
-                    "<html></html>".getBytes("UTF8"),
-                    "text/plain", p, conf);
+    c = new Content("http://www.foo.com/foo.html", "http://www.foo.com/",
+        "<html></html>".getBytes("UTF8"), "text/plain", p, conf);
     assertEquals("text/html", c.getContentType());
 
-    c = new Content("http://www.foo.com/foo.png",
-                    "http://www.foo.com/",
-                    "<html></html>".getBytes("UTF8"),
-                    "text/plain", p, conf);
+    c = new Content("http://www.foo.com/foo.png", "http://www.foo.com/",
+        "<html></html>".getBytes("UTF8"), "text/plain", p, conf);
     assertEquals("text/html", c.getContentType());
 
-    c = new Content("http://www.foo.com/",
-                    "http://www.foo.com/",
-                    "".getBytes("UTF8"),
-                    "", p, conf);
+    c = new Content("http://www.foo.com/", "http://www.foo.com/",
+        "".getBytes("UTF8"), "", p, conf);
     assertEquals(MimeTypes.OCTET_STREAM, c.getContentType());
 
-    c = new Content("http://www.foo.com/",
-                    "http://www.foo.com/",
-                    "".getBytes("UTF8"),
-                    null, p, conf);
+    c = new Content("http://www.foo.com/", "http://www.foo.com/",
+        "".getBytes("UTF8"), null, p, conf);
     assertNotNull(c.getContentType());
   }
 
Index: src/test/org/apache/nutch/segment/TestSegmentMerger.java
===================================================================
--- src/test/org/apache/nutch/segment/TestSegmentMerger.java	(revision 1188252)
+++ src/test/org/apache/nutch/segment/TestSegmentMerger.java	(working copy)
@@ -38,12 +38,13 @@
   Path seg2;
   Path out;
   int countSeg1, countSeg2;
-  
+
   public void setUp() throws Exception {
     conf = NutchConfiguration.create();
     fs = FileSystem.get(conf);
     long blkSize = fs.getDefaultBlockSize();
-    testDir = new Path(conf.get("hadoop.tmp.dir"), "merge-" + System.currentTimeMillis());
+    testDir = new Path(conf.get("hadoop.tmp.dir"), "merge-"
+        + System.currentTimeMillis());
     seg1 = new Path(testDir, "seg1");
     seg2 = new Path(testDir, "seg2");
     out = new Path(testDir, "out");
@@ -52,7 +53,8 @@
     DecimalFormat df = new DecimalFormat("0000000");
     Text k = new Text();
     Path ptPath = new Path(new Path(seg1, ParseText.DIR_NAME), "part-00000");
-    MapFile.Writer w = new MapFile.Writer(conf, fs, ptPath.toString(), Text.class, ParseText.class);
+    MapFile.Writer w = new MapFile.Writer(conf, fs, ptPath.toString(),
+        Text.class, ParseText.class);
     long curSize = 0;
     countSeg1 = 0;
     while (curSize < blkSize * 2) {
@@ -65,7 +67,8 @@
     System.err.println(" - done: " + countSeg1 + " records.");
     System.err.println("Creating large segment 2...");
     ptPath = new Path(new Path(seg2, ParseText.DIR_NAME), "part-00000");
-    w = new MapFile.Writer(conf, fs, ptPath.toString(), Text.class, ParseText.class);
+    w = new MapFile.Writer(conf, fs, ptPath.toString(), Text.class,
+        ParseText.class);
     curSize = 0;
     countSeg2 = 0;
     while (curSize < blkSize * 2) {
@@ -77,14 +80,14 @@
     w.close();
     System.err.println(" - done: " + countSeg2 + " records.");
   }
-  
+
   public void tearDown() throws Exception {
     fs.delete(testDir, true);
   }
-  
+
   public void testLargeMerge() throws Exception {
     SegmentMerger merger = new SegmentMerger(conf);
-    merger.merge(out, new Path[]{seg1, seg2}, false, false, -1);
+    merger.merge(out, new Path[] { seg1, seg2 }, false, false, -1);
     // verify output
     FileStatus[] stats = fs.listStatus(out);
     // there should be just one path
@@ -92,7 +95,8 @@
     Path outSeg = stats[0].getPath();
     Text k = new Text();
     ParseText v = new ParseText();
-    MapFile.Reader[] readers = MapFileOutputFormat.getReaders(fs, new Path(outSeg, ParseText.DIR_NAME), conf);
+    MapFile.Reader[] readers = MapFileOutputFormat.getReaders(fs, new Path(
+        outSeg, ParseText.DIR_NAME), conf);
     int cnt1 = 0, cnt2 = 0;
     for (MapFile.Reader r : readers) {
       while (r.next(k, v)) {
@@ -111,5 +115,4 @@
     assertEquals(countSeg1, cnt1);
     assertEquals(countSeg2, cnt2);
   }
-
 }
Index: src/test/org/apache/nutch/net/TestURLNormalizers.java
===================================================================
--- src/test/org/apache/nutch/net/TestURLNormalizers.java	(revision 1188252)
+++ src/test/org/apache/nutch/net/TestURLNormalizers.java	(working copy)
@@ -30,30 +30,38 @@
     String clazz1 = "org.apache.nutch.net.urlnormalizer.regex.RegexURLNormalizer";
     String clazz2 = "org.apache.nutch.net.urlnormalizer.basic.BasicURLNormalizer";
     conf.set("urlnormalizer.order", clazz1 + " " + clazz2);
-    
-    URLNormalizers normalizers = new URLNormalizers(conf, URLNormalizers.SCOPE_DEFAULT);
-    
+
+    URLNormalizers normalizers = new URLNormalizers(conf,
+        URLNormalizers.SCOPE_DEFAULT);
+
     assertNotNull(normalizers);
     try {
-      normalizers.normalize("http://www.example.com/", URLNormalizers.SCOPE_DEFAULT);
+      normalizers.normalize("http://www.example.com/",
+          URLNormalizers.SCOPE_DEFAULT);
     } catch (MalformedURLException mue) {
       fail(mue.toString());
     }
 
     // NUTCH-1011 - Get rid of superfluous slashes
     try {
-      String normalizedSlashes = normalizers.normalize("http://www.example.org//path/to//somewhere.html", URLNormalizers.SCOPE_DEFAULT);
-      assertEquals(normalizedSlashes, "http://www.example.org/path/to/somewhere.html");
+      String normalizedSlashes = normalizers.normalize(
+          "http://www.example.org//path/to//somewhere.html",
+          URLNormalizers.SCOPE_DEFAULT);
+      assertEquals(normalizedSlashes,
+          "http://www.example.org/path/to/somewhere.html");
     } catch (MalformedURLException mue) {
       fail(mue.toString());
     }
 
     // check the order
     int pos1 = -1, pos2 = -1;
-    URLNormalizer[] impls = normalizers.getURLNormalizers(URLNormalizers.SCOPE_DEFAULT);
+    URLNormalizer[] impls = normalizers
+        .getURLNormalizers(URLNormalizers.SCOPE_DEFAULT);
     for (int i = 0; i < impls.length; i++) {
-      if (impls[i].getClass().getName().equals(clazz1)) pos1 = i;
-      if (impls[i].getClass().getName().equals(clazz2)) pos2 = i;
+      if (impls[i].getClass().getName().equals(clazz1))
+        pos1 = i;
+      if (impls[i].getClass().getName().equals(clazz2))
+        pos2 = i;
     }
     if (pos1 != -1 && pos2 != -1) {
       assertTrue("RegexURLNormalizer before BasicURLNormalizer", pos1 < pos2);
Index: src/test/org/apache/nutch/net/TestURLFilters.java
===================================================================
--- src/test/org/apache/nutch/net/TestURLFilters.java	(revision 1188252)
+++ src/test/org/apache/nutch/net/TestURLFilters.java	(working copy)
@@ -25,6 +25,7 @@
 
   /**
    * Testcase for NUTCH-325.
+   * 
    * @throws URLFilterException
    */
   public void testNonExistingUrlFilter() throws URLFilterException {
Index: src/test/org/apache/nutch/crawl/TestCrawlDbMerger.java
===================================================================
--- src/test/org/apache/nutch/crawl/TestCrawlDbMerger.java	(revision 1188252)
+++ src/test/org/apache/nutch/crawl/TestCrawlDbMerger.java	(working copy)
@@ -32,18 +32,15 @@
 import junit.framework.TestCase;
 
 public class TestCrawlDbMerger extends TestCase {
-  private static final Logger LOG = Logger.getLogger(CrawlDbMerger.class.getName());
-  
+  private static final Logger LOG = Logger.getLogger(CrawlDbMerger.class
+      .getName());
+
   String url10 = "http://example.com/";
   String url11 = "http://example.com/foo";
   String url20 = "http://example.com/";
   String url21 = "http://example.com/bar";
-  String[] urls_expected = new String[] {
-          url10,
-          url11,
-          url21
-  };
-  
+  String[] urls_expected = new String[] { url10, url11, url21 };
+
   TreeSet init1 = new TreeSet();
   TreeSet init2 = new TreeSet();
   HashMap expected = new HashMap();
@@ -52,7 +49,7 @@
   FileSystem fs;
   Path testDir;
   CrawlDbReader reader;
-  
+
   public void setUp() throws Exception {
     init1.add(url10);
     init1.add(url11);
@@ -78,19 +75,20 @@
     expected.put(url21, cd2);
     conf = NutchConfiguration.create();
     fs = FileSystem.get(conf);
-    testDir = new Path("test-crawldb-" +
-            new java.util.Random().nextInt());
+    testDir = new Path("test-crawldb-" + new java.util.Random().nextInt());
     fs.mkdirs(testDir);
   }
-  
+
   public void tearDown() {
     try {
       if (fs.exists(testDir))
         fs.delete(testDir);
-    } catch (Exception e) { }
+    } catch (Exception e) {
+    }
     try {
       reader.close();
-    } catch (Exception e) { }
+    } catch (Exception e) {
+    }
   }
 
   public void testMerge() throws Exception {
@@ -101,15 +99,15 @@
     createCrawlDb(conf, fs, crawldb2, init2, cd2);
     CrawlDbMerger merger = new CrawlDbMerger(conf);
     LOG.fine("* merging crawldbs to " + output);
-    merger.merge(output, new Path[]{crawldb1, crawldb2}, false, false);
+    merger.merge(output, new Path[] { crawldb1, crawldb2 }, false, false);
     LOG.fine("* reading crawldb: " + output);
     reader = new CrawlDbReader();
     String crawlDb = output.toString();
     Iterator it = expected.keySet().iterator();
     while (it.hasNext()) {
-      String url = (String)it.next();
+      String url = (String) it.next();
       LOG.fine("url=" + url);
-      CrawlDatum cd = (CrawlDatum)expected.get(url);
+      CrawlDatum cd = (CrawlDatum) expected.get(url);
       CrawlDatum res = reader.get(crawlDb, url, conf);
       LOG.fine(" -> " + res);
       System.out.println("url=" + url);
@@ -122,14 +120,16 @@
     reader.close();
     fs.delete(testDir);
   }
-  
-  private void createCrawlDb(Configuration config, FileSystem fs, Path crawldb, TreeSet init, CrawlDatum cd) throws Exception {
+
+  private void createCrawlDb(Configuration config, FileSystem fs, Path crawldb,
+      TreeSet init, CrawlDatum cd) throws Exception {
     LOG.fine("* creating crawldb: " + crawldb);
     Path dir = new Path(crawldb, CrawlDb.CURRENT_NAME);
-    MapFile.Writer writer = new MapFile.Writer(config, fs, new Path(dir, "part-00000").toString(), Text.class, CrawlDatum.class);
+    MapFile.Writer writer = new MapFile.Writer(config, fs, new Path(dir,
+        "part-00000").toString(), Text.class, CrawlDatum.class);
     Iterator it = init.iterator();
     while (it.hasNext()) {
-      String key = (String)it.next();
+      String key = (String) it.next();
       writer.append(new Text(key), cd);
     }
     writer.close();
Index: src/test/org/apache/nutch/crawl/DummyWritable.java
===================================================================
--- src/test/org/apache/nutch/crawl/DummyWritable.java	(revision 1188252)
+++ src/test/org/apache/nutch/crawl/DummyWritable.java	(working copy)
@@ -21,12 +21,12 @@
 
 public class DummyWritable extends IntWritable {
 
-    public DummyWritable() {
+  public DummyWritable() {
 
-    }
+  }
 
-    public DummyWritable(int i) {
-        super(i);
-    }
+  public DummyWritable(int i) {
+    super(i);
+  }
 
 }
Index: src/test/org/apache/nutch/crawl/TestLinkDbMerger.java
===================================================================
--- src/test/org/apache/nutch/crawl/TestLinkDbMerger.java	(revision 1188252)
+++ src/test/org/apache/nutch/crawl/TestLinkDbMerger.java	(working copy)
@@ -33,41 +33,28 @@
 import junit.framework.TestCase;
 
 public class TestLinkDbMerger extends TestCase {
-  private static final Logger LOG = Logger.getLogger(TestLinkDbMerger.class.getName());
-  
+  private static final Logger LOG = Logger.getLogger(TestLinkDbMerger.class
+      .getName());
+
   String url10 = "http://example.com/foo";
-  String[] urls10 = new String[] {
-          "http://example.com/100",
-          "http://example.com/101"
-        };
+  String[] urls10 = new String[] { "http://example.com/100",
+      "http://example.com/101" };
 
   String url11 = "http://example.com/";
-  String[] urls11 = new String[] {
-          "http://example.com/110",
-          "http://example.com/111"
-        };
-  
+  String[] urls11 = new String[] { "http://example.com/110",
+      "http://example.com/111" };
+
   String url20 = "http://example.com/";
-  String[] urls20 = new String[] {
-          "http://foo.com/200",
-          "http://foo.com/201"
-        };
+  String[] urls20 = new String[] { "http://foo.com/200", "http://foo.com/201" };
   String url21 = "http://example.com/bar";
-  String[] urls21 = new String[] {
-          "http://foo.com/210",
-          "http://foo.com/211"
-        };
-  
+  String[] urls21 = new String[] { "http://foo.com/210", "http://foo.com/211" };
+
   String[] urls10_expected = urls10;
-  String[] urls11_expected = new String[] {
-          urls11[0],
-          urls11[1],
-          urls20[0],
-          urls20[1]
-  };
+  String[] urls11_expected = new String[] { urls11[0], urls11[1], urls20[0],
+      urls20[1] };
   String[] urls20_expected = urls11_expected;
   String[] urls21_expected = urls21;
-  
+
   TreeMap init1 = new TreeMap();
   TreeMap init2 = new TreeMap();
   HashMap expected = new HashMap();
@@ -75,7 +62,7 @@
   Path testDir;
   FileSystem fs;
   LinkDbReader reader;
-  
+
   public void setUp() throws Exception {
     init1.put(url10, urls10);
     init1.put(url11, urls11);
@@ -87,19 +74,21 @@
     expected.put(url21, urls21_expected);
     conf = NutchConfiguration.create();
     fs = FileSystem.get(conf);
-    testDir = new Path("build/test/test-linkdb-" +
-            new java.util.Random().nextInt());
+    testDir = new Path("build/test/test-linkdb-"
+        + new java.util.Random().nextInt());
     fs.mkdirs(testDir);
   }
-  
+
   public void tearDown() {
     try {
       if (fs.exists(testDir))
         fs.delete(testDir, true);
-    } catch (Exception e) { }
+    } catch (Exception e) {
+    }
     try {
       reader.close();
-    } catch (Exception e) { }
+    } catch (Exception e) {
+    }
   }
 
   public void testMerge() throws Exception {
@@ -113,21 +102,21 @@
     createLinkDb(conf, fs, linkdb2, init2);
     LinkDbMerger merger = new LinkDbMerger(conf);
     LOG.fine("* merging linkdbs to " + output);
-    merger.merge(output, new Path[]{linkdb1, linkdb2}, false, false);
+    merger.merge(output, new Path[] { linkdb1, linkdb2 }, false, false);
     LOG.fine("* reading linkdb: " + output);
     reader = new LinkDbReader(conf, output);
     Iterator it = expected.keySet().iterator();
     while (it.hasNext()) {
-      String url = (String)it.next();
+      String url = (String) it.next();
       LOG.fine("url=" + url);
-      String[] vals = (String[])expected.get(url);
+      String[] vals = (String[]) expected.get(url);
       Inlinks inlinks = reader.getInlinks(new Text(url));
       // may not be null
       assertNotNull(inlinks);
       ArrayList links = new ArrayList();
       Iterator it2 = inlinks.iterator();
       while (it2.hasNext()) {
-        Inlink in = (Inlink)it2.next();
+        Inlink in = (Inlink) it2.next();
         links.add(in.getFromUrl());
       }
       for (int i = 0; i < vals.length; i++) {
@@ -138,16 +127,18 @@
     reader.close();
     fs.delete(testDir, true);
   }
-  
-  private void createLinkDb(Configuration config, FileSystem fs, Path linkdb, TreeMap init) throws Exception {
+
+  private void createLinkDb(Configuration config, FileSystem fs, Path linkdb,
+      TreeMap init) throws Exception {
     LOG.fine("* creating linkdb: " + linkdb);
     Path dir = new Path(linkdb, LinkDb.CURRENT_NAME);
-    MapFile.Writer writer = new MapFile.Writer(config, fs, new Path(dir, "part-00000").toString(), Text.class, Inlinks.class);
+    MapFile.Writer writer = new MapFile.Writer(config, fs, new Path(dir,
+        "part-00000").toString(), Text.class, Inlinks.class);
     Iterator it = init.keySet().iterator();
     while (it.hasNext()) {
-      String key = (String)it.next();
+      String key = (String) it.next();
       Inlinks inlinks = new Inlinks();
-      String[] vals = (String[])init.get(key);
+      String[] vals = (String[]) init.get(key);
       for (int i = 0; i < vals.length; i++) {
         Inlink in = new Inlink(vals[i], vals[i]);
         inlinks.add(in);
Index: src/test/org/apache/nutch/crawl/TestGenerator.java
===================================================================
--- src/test/org/apache/nutch/crawl/TestGenerator.java	(revision 1188252)
+++ src/test/org/apache/nutch/crawl/TestGenerator.java	(working copy)
@@ -34,9 +34,9 @@
  * Basic generator test. 1. Insert entries in crawldb 2. Generates entries to
  * fetch 3. Verifies that number of generated urls match 4. Verifies that
  * highest scoring urls are generated
- *
+ * 
  * @author nutch-dev <nutch-dev at lucene.apache.org>
- *
+ * 
  */
 public class TestGenerator extends TestCase {
 
@@ -69,7 +69,7 @@
 
   /**
    * Test that generator generates fetchlish ordered by score (desc).
-   *
+   * 
    * @throws Exception
    */
   public void testGenerateHighest() throws Exception {
@@ -79,8 +79,7 @@
     ArrayList<URLCrawlDatum> list = new ArrayList<URLCrawlDatum>();
 
     for (int i = 0; i <= 100; i++) {
-      list.add(createURLCrawlDatum("http://aaa/" + pad(i),
-          1, i));
+      list.add(createURLCrawlDatum("http://aaa/" + pad(i), 1, i));
     }
 
     createCrawlDB(list);
@@ -91,7 +90,7 @@
         CrawlDatum.GENERATE_DIR_NAME), "part-00000");
 
     ArrayList<URLCrawlDatum> l = readContents(fetchlist);
-    
+
     // sort urls by score desc
     Collections.sort(l, new ScoreComparator());
 
@@ -129,17 +128,15 @@
 
   /**
    * Test that generator obeys the property "generate.max.per.host".
-   * @throws Exception 
+   * 
+   * @throws Exception
    */
-  public void testGenerateHostLimit() throws Exception{
+  public void testGenerateHostLimit() throws Exception {
     ArrayList<URLCrawlDatum> list = new ArrayList<URLCrawlDatum>();
 
-    list.add(createURLCrawlDatum("http://www.example.com/index1.html",
-        1, 1));
-    list.add(createURLCrawlDatum("http://www.example.com/index2.html",
-        1, 1));
-    list.add(createURLCrawlDatum("http://www.example.com/index3.html",
-        1, 1));
+    list.add(createURLCrawlDatum("http://www.example.com/index1.html", 1, 1));
+    list.add(createURLCrawlDatum("http://www.example.com/index2.html", 1, 1));
+    list.add(createURLCrawlDatum("http://www.example.com/index3.html", 1, 1));
 
     createCrawlDB(list);
 
@@ -186,9 +183,10 @@
   /**
    * Test that generator obeys the property "generator.max.count" and
    * "generator.count.per.domain".
-   * @throws Exception 
+   * 
+   * @throws Exception
    */
-  public void testGenerateDomainLimit() throws Exception{
+  public void testGenerateDomainLimit() throws Exception {
     ArrayList<URLCrawlDatum> list = new ArrayList<URLCrawlDatum>();
 
     list.add(createURLCrawlDatum("http://a.example.com/index.html", 1, 1));
@@ -199,7 +197,8 @@
 
     Configuration myConfiguration = new Configuration(conf);
     myConfiguration.setInt(Generator.GENERATOR_MAX_COUNT, 1);
-    myConfiguration.set(Generator.GENERATOR_COUNT_MODE, Generator.GENERATOR_COUNT_VALUE_DOMAIN);
+    myConfiguration.set(Generator.GENERATOR_COUNT_MODE,
+        Generator.GENERATOR_COUNT_VALUE_DOMAIN);
 
     Path generatedSegment = generateFetchlist(Integer.MAX_VALUE,
         myConfiguration, false);
@@ -214,7 +213,8 @@
 
     myConfiguration = new Configuration(myConfiguration);
     myConfiguration.setInt(Generator.GENERATOR_MAX_COUNT, 2);
-    generatedSegment = generateFetchlist(Integer.MAX_VALUE, myConfiguration, false);
+    generatedSegment = generateFetchlist(Integer.MAX_VALUE, myConfiguration,
+        false);
 
     fetchlistPath = new Path(new Path(generatedSegment,
         CrawlDatum.GENERATE_DIR_NAME), "part-00000");
@@ -240,10 +240,11 @@
 
   /**
    * Test generator obeys the filter setting.
-   * @throws Exception 
-   * @throws IOException 
+   * 
+   * @throws Exception
+   * @throws IOException
    */
-  public void testFilter() throws IOException, Exception{
+  public void testFilter() throws IOException, Exception {
 
     ArrayList<URLCrawlDatum> list = new ArrayList<URLCrawlDatum>();
 
@@ -261,7 +262,8 @@
 
     assertNull("should be null (0 entries)", generatedSegment);
 
-    generatedSegment = generateFetchlist(Integer.MAX_VALUE, myConfiguration, false);
+    generatedSegment = generateFetchlist(Integer.MAX_VALUE, myConfiguration,
+        false);
 
     Path fetchlistPath = new Path(new Path(generatedSegment,
         CrawlDatum.GENERATE_DIR_NAME), "part-00000");
@@ -273,14 +275,16 @@
 
   }
 
-
   /**
    * Read contents of fetchlist.
-   * @param fetchlist  path to Generated fetchlist
+   * 
+   * @param fetchlist
+   *          path to Generated fetchlist
    * @return Generated {@link URLCrawlDatum} objects
    * @throws IOException
    */
-  private ArrayList<URLCrawlDatum> readContents(Path fetchlist) throws IOException {
+  private ArrayList<URLCrawlDatum> readContents(Path fetchlist)
+      throws IOException {
     // verify results
     SequenceFile.Reader reader = new SequenceFile.Reader(fs, fetchlist, conf);
 
@@ -301,8 +305,11 @@
 
   /**
    * Generate Fetchlist.
-   * @param numResults number of results to generate
-   * @param config Configuration to use
+   * 
+   * @param numResults
+   *          number of results to generate
+   * @param config
+   *          Configuration to use
    * @return path to generated segment
    * @throws IOException
    */
@@ -312,14 +319,16 @@
     Generator g = new Generator(config);
     Path[] generatedSegment = g.generate(dbDir, segmentsDir, -1, numResults,
         Long.MAX_VALUE, filter, false);
-    if (generatedSegment==null) return null;
+    if (generatedSegment == null)
+      return null;
     return generatedSegment[0];
   }
 
   /**
    * Creates CrawlDB.
-   *
-   * @param list database contents
+   * 
+   * @param list
+   *          database contents
    * @throws IOException
    * @throws Exception
    */
@@ -336,9 +345,13 @@
 
   /**
    * Constructs new {@link URLCrawlDatum} from submitted parameters.
-   * @param url url to use
-   * @param fetchInterval {@link CrawlDatum#setFetchInterval(float)}
-   * @param score {@link CrawlDatum#setScore(float)}
+   * 
+   * @param url
+   *          url to use
+   * @param fetchInterval
+   *          {@link CrawlDatum#setFetchInterval(float)}
+   * @param score
+   *          {@link CrawlDatum#setScore(float)}
    * @return Constructed object
    */
   private URLCrawlDatum createURLCrawlDatum(final String url,
Index: src/test/org/apache/nutch/crawl/TestSignatureFactory.java
===================================================================
--- src/test/org/apache/nutch/crawl/TestSignatureFactory.java	(revision 1188252)
+++ src/test/org/apache/nutch/crawl/TestSignatureFactory.java	(working copy)
@@ -24,9 +24,9 @@
 public class TestSignatureFactory extends TestCase {
 
   public void testGetSignature() {
-    Configuration conf=NutchConfiguration.create();
-    Signature signature1=SignatureFactory.getSignature(conf);
-    Signature signature2=SignatureFactory.getSignature(conf);
+    Configuration conf = NutchConfiguration.create();
+    Signature signature1 = SignatureFactory.getSignature(conf);
+    Signature signature2 = SignatureFactory.getSignature(conf);
     assertNotNull(signature1);
     assertNotNull(signature2);
     assertEquals(signature1, signature2);
Index: src/test/org/apache/nutch/crawl/CrawlDBTestUtil.java
===================================================================
--- src/test/org/apache/nutch/crawl/CrawlDBTestUtil.java	(revision 1188252)
+++ src/test/org/apache/nutch/crawl/CrawlDBTestUtil.java	(working copy)
@@ -35,10 +35,10 @@
 import org.mortbay.jetty.handler.ContextHandler;
 import org.mortbay.jetty.handler.ResourceHandler;
 
-
 public class CrawlDBTestUtil {
 
-  private static final Logger LOG = LoggerFactory.getLogger(CrawlDBTestUtil.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(CrawlDBTestUtil.class);
 
   /**
    * Creates synthetic crawldb
@@ -51,12 +51,12 @@
    *          urls to be inserted, objects are of type URLCrawlDatum
    * @throws Exception
    */
-  public static void createCrawlDb(Configuration conf, FileSystem fs, Path crawldb, List<URLCrawlDatum> init)
-      throws Exception {
+  public static void createCrawlDb(Configuration conf, FileSystem fs,
+      Path crawldb, List<URLCrawlDatum> init) throws Exception {
     LOG.trace("* creating crawldb: " + crawldb);
     Path dir = new Path(crawldb, CrawlDb.CURRENT_NAME);
-    MapFile.Writer writer = new MapFile.Writer(conf, fs, new Path(dir, "part-00000")
-        .toString(), Text.class, CrawlDatum.class);
+    MapFile.Writer writer = new MapFile.Writer(conf, fs, new Path(dir,
+        "part-00000").toString(), Text.class, CrawlDatum.class);
     Iterator<URLCrawlDatum> it = init.iterator();
     while (it.hasNext()) {
       URLCrawlDatum row = it.next();
@@ -68,24 +68,24 @@
 
   /**
    * For now we need to manually construct our Configuration, because we need to
-   * override the default one and it is currently not possible to use dynamically
-   * set values.
+   * override the default one and it is currently not possible to use
+   * dynamically set values.
    * 
    * @return
    * @deprecated Use {@link #createConfiguration()} instead
    */
-  public static Configuration create(){
+  public static Configuration create() {
     return createConfiguration();
   }
 
   /**
    * For now we need to manually construct our Configuration, because we need to
-   * override the default one and it is currently not possible to use dynamically
-   * set values.
+   * override the default one and it is currently not possible to use
+   * dynamically set values.
    * 
    * @return
    */
-  public static Configuration createConfiguration(){
+  public static Configuration createConfiguration() {
     Configuration conf = new Configuration();
     conf.addResource("nutch-default.xml");
     conf.addResource("crawl-tests.xml");
@@ -103,34 +103,39 @@
       this.datum = datum;
     }
   }
-  
+
   /**
    * Generate seedlist
-   * @throws IOException 
+   * 
+   * @throws IOException
    */
-  public static void generateSeedList(FileSystem fs, Path urlPath, List<String> contents) throws IOException{
+  public static void generateSeedList(FileSystem fs, Path urlPath,
+      List<String> contents) throws IOException {
     FSDataOutputStream out;
-    Path file=new Path(urlPath,"urls.txt");
+    Path file = new Path(urlPath, "urls.txt");
     fs.mkdirs(urlPath);
-    out=fs.create(file);
-    Iterator<String> iterator=contents.iterator();
-    while(iterator.hasNext()){
-      String url=iterator.next();
+    out = fs.create(file);
+    Iterator<String> iterator = contents.iterator();
+    while (iterator.hasNext()) {
+      String url = iterator.next();
       out.writeBytes(url);
       out.writeBytes("\n");
     }
     out.flush();
     out.close();
   }
-  
+
   /**
    * Creates a new JettyServer with one static root context
    * 
-   * @param port port to listen to
-   * @param staticContent folder where static content lives
-   * @throws UnknownHostException 
+   * @param port
+   *          port to listen to
+   * @param staticContent
+   *          folder where static content lives
+   * @throws UnknownHostException
    */
-  public static Server getServer(int port, String staticContent) throws UnknownHostException{
+  public static Server getServer(int port, String staticContent)
+      throws UnknownHostException {
     Server webServer = new org.mortbay.jetty.Server();
     SocketConnector listener = new SocketConnector();
     listener.setPort(port);
Index: src/test/org/apache/nutch/crawl/TestInjector.java
===================================================================
--- src/test/org/apache/nutch/crawl/TestInjector.java	(revision 1188252)
+++ src/test/org/apache/nutch/crawl/TestInjector.java	(working copy)
@@ -30,12 +30,9 @@
 import junit.framework.TestCase;
 
 /**
- * Basic injector test:
- * 1. Creates a text file with urls
- * 2. Injects them into crawldb
- * 3. Reads crawldb entries and verifies contents
- * 4. Injects more urls into webdb
- * 5. Reads crawldb entries and verifies contents
+ * Basic injector test: 1. Creates a text file with urls 2. Injects them into
+ * crawldb 3. Reads crawldb entries and verifies contents 4. Injects more urls
+ * into webdb 5. Reads crawldb entries and verifies contents
  * 
  * @author nutch-dev <nutch-dev at lucene.apache.org>
  */
@@ -43,80 +40,82 @@
 
   private Configuration conf;
   private FileSystem fs;
-  final static Path testdir=new Path("build/test/inject-test");
+  final static Path testdir = new Path("build/test/inject-test");
   Path crawldbPath;
   Path urlPath;
-  
+
   protected void setUp() throws Exception {
     conf = CrawlDBTestUtil.createConfiguration();
-    urlPath=new Path(testdir,"urls");
-    crawldbPath=new Path(testdir,"crawldb");
-    fs=FileSystem.get(conf);
-    if (fs.exists(urlPath)) fs.delete(urlPath, false);
-    if (fs.exists(crawldbPath)) fs.delete(crawldbPath, true);
+    urlPath = new Path(testdir, "urls");
+    crawldbPath = new Path(testdir, "crawldb");
+    fs = FileSystem.get(conf);
+    if (fs.exists(urlPath))
+      fs.delete(urlPath, false);
+    if (fs.exists(crawldbPath))
+      fs.delete(crawldbPath, true);
   }
-  
-  protected void tearDown() throws IOException{
+
+  protected void tearDown() throws IOException {
     fs.delete(testdir, true);
   }
 
   public void testInject() throws IOException {
-    ArrayList<String> urls=new ArrayList<String>();
-    for(int i=0;i<100;i++) {
+    ArrayList<String> urls = new ArrayList<String>();
+    for (int i = 0; i < 100; i++) {
       urls.add("http://zzz.com/" + i + ".html");
     }
     CrawlDBTestUtil.generateSeedList(fs, urlPath, urls);
-    
-    Injector injector=new Injector(conf);
+
+    Injector injector = new Injector(conf);
     injector.inject(crawldbPath, urlPath);
-    
+
     // verify results
-    List<String>read=readCrawldb();
-    
+    List<String> read = readCrawldb();
+
     Collections.sort(read);
     Collections.sort(urls);
 
     assertEquals(urls.size(), read.size());
-    
+
     assertTrue(read.containsAll(urls));
     assertTrue(urls.containsAll(read));
-    
-    //inject more urls
-    ArrayList<String> urls2=new ArrayList<String>();
-    for(int i=0;i<100;i++) {
+
+    // inject more urls
+    ArrayList<String> urls2 = new ArrayList<String>();
+    for (int i = 0; i < 100; i++) {
       urls2.add("http://xxx.com/" + i + ".html");
     }
     CrawlDBTestUtil.generateSeedList(fs, urlPath, urls2);
     injector.inject(crawldbPath, urlPath);
     urls.addAll(urls2);
-    
+
     // verify results
-    read=readCrawldb();
-    
+    read = readCrawldb();
 
     Collections.sort(read);
     Collections.sort(urls);
 
     assertEquals(urls.size(), read.size());
-    
+
     assertTrue(read.containsAll(urls));
     assertTrue(urls.containsAll(read));
-    
+
   }
-  
-  private List<String> readCrawldb() throws IOException{
-    Path dbfile=new Path(crawldbPath,CrawlDb.CURRENT_NAME + "/part-00000/data");
+
+  private List<String> readCrawldb() throws IOException {
+    Path dbfile = new Path(crawldbPath, CrawlDb.CURRENT_NAME
+        + "/part-00000/data");
     System.out.println("reading:" + dbfile);
-    SequenceFile.Reader reader=new SequenceFile.Reader(fs, dbfile, conf);
-    ArrayList<String> read=new ArrayList<String>();
-    
-    READ:
-      do {
-      Text key=new Text();
-      CrawlDatum value=new CrawlDatum();
-      if(!reader.next(key, value)) break READ;
+    SequenceFile.Reader reader = new SequenceFile.Reader(fs, dbfile, conf);
+    ArrayList<String> read = new ArrayList<String>();
+
+    READ: do {
+      Text key = new Text();
+      CrawlDatum value = new CrawlDatum();
+      if (!reader.next(key, value))
+        break READ;
       read.add(key.toString());
-    } while(true);
+    } while (true);
 
     return read;
   }
Index: src/test/org/apache/nutch/parse/TestParseText.java
===================================================================
--- src/test/org/apache/nutch/parse/TestParseText.java	(revision 1188252)
+++ src/test/org/apache/nutch/parse/TestParseText.java	(working copy)
@@ -24,15 +24,17 @@
 /** Unit tests for ParseText. */
 
 public class TestParseText extends TestCase {
-  public TestParseText(String name) { super(name); }
+  public TestParseText(String name) {
+    super(name);
+  }
 
   public void testParseText() throws Exception {
 
     String page = "Hello World The Quick Brown Fox Jumped Over the Lazy Fox";
 
     ParseText s = new ParseText(page);
-                        
+
     WritableTestUtils.testWritable(s);
   }
-	
+
 }
Index: src/test/org/apache/nutch/parse/TestOutlinkExtractor.java
===================================================================
--- src/test/org/apache/nutch/parse/TestOutlinkExtractor.java	(revision 1188252)
+++ src/test/org/apache/nutch/parse/TestOutlinkExtractor.java	(working copy)
@@ -14,7 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
- 
+
 package org.apache.nutch.parse;
 
 import org.apache.nutch.parse.Outlink;
@@ -34,47 +34,57 @@
 public class TestOutlinkExtractor extends TestCase {
 
   private static Configuration conf = NutchConfiguration.create();
+
   public void testGetNoOutlinks() {
-    Outlink[]  outlinks = null;
-            
+    Outlink[] outlinks = null;
+
     outlinks = OutlinkExtractor.getOutlinks(null, conf);
     assertNotNull(outlinks);
     assertEquals(0, outlinks.length);
-    
+
     outlinks = OutlinkExtractor.getOutlinks("", conf);
     assertNotNull(outlinks);
     assertEquals(0, outlinks.length);
   }
-  
+
   public void testGetOutlinksHttp() {
-    Outlink[] outlinks = OutlinkExtractor.getOutlinks(
-        "Test with http://www.nutch.org/index.html is it found? " +
-        "What about www.google.com at http://www.google.de " +
-        "A longer URL could be http://www.sybit.com/solutions/portals.html", conf);
-    
+    Outlink[] outlinks = OutlinkExtractor
+        .getOutlinks(
+            "Test with http://www.nutch.org/index.html is it found? "
+                + "What about www.google.com at http://www.google.de "
+                + "A longer URL could be http://www.sybit.com/solutions/portals.html",
+            conf);
+
     assertTrue("Url not found!", outlinks.length == 3);
-    assertEquals("Wrong URL", "http://www.nutch.org/index.html", outlinks[0].getToUrl());
+    assertEquals("Wrong URL", "http://www.nutch.org/index.html",
+        outlinks[0].getToUrl());
     assertEquals("Wrong URL", "http://www.google.de", outlinks[1].getToUrl());
-    assertEquals("Wrong URL", "http://www.sybit.com/solutions/portals.html", outlinks[2].getToUrl());
+    assertEquals("Wrong URL", "http://www.sybit.com/solutions/portals.html",
+        outlinks[2].getToUrl());
   }
-  
+
   public void testGetOutlinksHttp2() {
-    Outlink[] outlinks = OutlinkExtractor.getOutlinks(
-        "Test with http://www.nutch.org/index.html is it found? " +
-        "What about www.google.com at http://www.google.de " +
-        "A longer URL could be http://www.sybit.com/solutions/portals.html", "http://www.sybit.de", conf);
-    
+    Outlink[] outlinks = OutlinkExtractor
+        .getOutlinks(
+            "Test with http://www.nutch.org/index.html is it found? "
+                + "What about www.google.com at http://www.google.de "
+                + "A longer URL could be http://www.sybit.com/solutions/portals.html",
+            "http://www.sybit.de", conf);
+
     assertTrue("Url not found!", outlinks.length == 3);
-    assertEquals("Wrong URL", "http://www.nutch.org/index.html", outlinks[0].getToUrl());
+    assertEquals("Wrong URL", "http://www.nutch.org/index.html",
+        outlinks[0].getToUrl());
     assertEquals("Wrong URL", "http://www.google.de", outlinks[1].getToUrl());
-    assertEquals("Wrong URL", "http://www.sybit.com/solutions/portals.html", outlinks[2].getToUrl());
+    assertEquals("Wrong URL", "http://www.sybit.com/solutions/portals.html",
+        outlinks[2].getToUrl());
   }
+
   public void testGetOutlinksFtp() {
     Outlink[] outlinks = OutlinkExtractor.getOutlinks(
-        "Test with ftp://www.nutch.org is it found? " +
-        "What about www.google.com at ftp://www.google.de", conf);
-    
-    assertTrue("Url not found!", outlinks.length >1);
+        "Test with ftp://www.nutch.org is it found? "
+            + "What about www.google.com at ftp://www.google.de", conf);
+
+    assertTrue("Url not found!", outlinks.length > 1);
     assertEquals("Wrong URL", "ftp://www.nutch.org", outlinks[0].getToUrl());
     assertEquals("Wrong URL", "ftp://www.google.de", outlinks[1].getToUrl());
   }
Index: src/test/org/apache/nutch/parse/TestParserFactory.java
===================================================================
--- src/test/org/apache/nutch/parse/TestParserFactory.java	(revision 1188252)
+++ src/test/org/apache/nutch/parse/TestParserFactory.java	(working copy)
@@ -27,76 +27,80 @@
 
 /**
  * Unit test for new parse plugin selection.
- *
+ * 
  * @author Sebastien Le Callonnec
  * @version 1.0
  */
 public class TestParserFactory extends TestCase {
-	
+
   private Configuration conf;
   private ParserFactory parserFactory;
-    
-  public TestParserFactory(String name) { super(name); }
 
+  public TestParserFactory(String name) {
+    super(name);
+  }
+
   /** Inits the Test Case with the test parse-plugin file */
   protected void setUp() throws Exception {
-      conf = NutchConfiguration.create();
-      conf.set("plugin.includes", ".*");
-      conf.set("parse.plugin.file",
-               "org/apache/nutch/parse/parse-plugin-test.xml");
-      parserFactory = new ParserFactory(conf);
+    conf = NutchConfiguration.create();
+    conf.set("plugin.includes", ".*");
+    conf.set("parse.plugin.file",
+        "org/apache/nutch/parse/parse-plugin-test.xml");
+    parserFactory = new ParserFactory(conf);
   }
-    
+
   /** Unit test for <code>getExtensions(String)</code> method. */
   public void testGetExtensions() throws Exception {
-    Extension ext = (Extension)parserFactory.getExtensions("text/html").get(0);
+    Extension ext = (Extension) parserFactory.getExtensions("text/html").get(0);
     assertEquals("parse-tika", ext.getDescriptor().getPluginId());
-    ext = (Extension) parserFactory.getExtensions("text/html; charset=ISO-8859-1").get(0);
+    ext = (Extension) parserFactory.getExtensions(
+        "text/html; charset=ISO-8859-1").get(0);
     assertEquals("parse-tika", ext.getDescriptor().getPluginId());
-    ext = (Extension)parserFactory.getExtensions("foo/bar").get(0);
+    ext = (Extension) parserFactory.getExtensions("foo/bar").get(0);
     assertEquals("parse-tika", ext.getDescriptor().getPluginId());
   }
-  
+
   /** Unit test to check <code>getParsers</code> method */
   public void testGetParsers() throws Exception {
-    Parser [] parsers = parserFactory.getParsers("text/html", "http://foo.com");
+    Parser[] parsers = parserFactory.getParsers("text/html", "http://foo.com");
     assertNotNull(parsers);
     assertEquals(1, parsers.length);
-    assertEquals("org.apache.nutch.parse.tika.TikaParser",
-                 parsers[0].getClass().getName());
+    assertEquals("org.apache.nutch.parse.tika.TikaParser", parsers[0]
+        .getClass().getName());
 
     parsers = parserFactory.getParsers("text/html; charset=ISO-8859-1",
-                                       "http://foo.com");
+        "http://foo.com");
     assertNotNull(parsers);
     assertEquals(1, parsers.length);
-    assertEquals("org.apache.nutch.parse.tika.TikaParser",
-                 parsers[0].getClass().getName());
-    
+    assertEquals("org.apache.nutch.parse.tika.TikaParser", parsers[0]
+        .getClass().getName());
+
     parsers = parserFactory.getParsers("application/x-javascript",
-                                       "http://foo.com");
+        "http://foo.com");
     assertNotNull(parsers);
     assertEquals(1, parsers.length);
-    assertEquals("org.apache.nutch.parse.js.JSParseFilter",
-                 parsers[0].getClass().getName());
-    
+    assertEquals("org.apache.nutch.parse.js.JSParseFilter", parsers[0]
+        .getClass().getName());
+
     parsers = parserFactory.getParsers("text/plain", "http://foo.com");
     assertNotNull(parsers);
     assertEquals(1, parsers.length);
-    assertEquals("org.apache.nutch.parse.tika.TikaParser",
-                 parsers[0].getClass().getName());
-    
+    assertEquals("org.apache.nutch.parse.tika.TikaParser", parsers[0]
+        .getClass().getName());
+
     Parser parser1 = parserFactory.getParsers("text/plain", "http://foo.com")[0];
     Parser parser2 = parserFactory.getParsers("*", "http://foo.com")[0];
-   
+
     assertEquals("Different instances!", parser1.hashCode(), parser2.hashCode());
-    
-    //test and make sure that the rss parser is loaded even though its plugin.xml
-    //doesn't claim to support text/rss, only application/rss+xml
-    parsers = parserFactory.getParsers("text/rss","http://foo.com");
+
+    // test and make sure that the rss parser is loaded even though its
+    // plugin.xml
+    // doesn't claim to support text/rss, only application/rss+xml
+    parsers = parserFactory.getParsers("text/rss", "http://foo.com");
     assertNotNull(parsers);
-    assertEquals(1,parsers.length);
-    assertEquals("org.apache.nutch.parse.tika.TikaParser",
-                 parsers[0].getClass().getName());
+    assertEquals(1, parsers.length);
+    assertEquals("org.apache.nutch.parse.tika.TikaParser", parsers[0]
+        .getClass().getName());
   }
- 
+
 }
Index: src/test/org/apache/nutch/parse/TestParseData.java
===================================================================
--- src/test/org/apache/nutch/parse/TestParseData.java	(revision 1188252)
+++ src/test/org/apache/nutch/parse/TestParseData.java	(working copy)
@@ -28,38 +28,37 @@
 /** Unit tests for ParseData. */
 
 public class TestParseData extends TestCase {
-    
+
   private Configuration conf = NutchConfiguration.create();
-  
-  public TestParseData(String name) { super(name); }
 
+  public TestParseData(String name) {
+    super(name);
+  }
+
   public void testParseData() throws Exception {
 
     String title = "The Foo Page";
 
-    Outlink[] outlinks = new Outlink[] {
-      new Outlink("http://foo.com/", "Foo"),
-      new Outlink("http://bar.com/", "Bar")
-    };
+    Outlink[] outlinks = new Outlink[] { new Outlink("http://foo.com/", "Foo"),
+        new Outlink("http://bar.com/", "Bar") };
 
     Metadata metaData = new Metadata();
     metaData.add("Language", "en/us");
     metaData.add("Charset", "UTF-8");
 
-    ParseData r = new ParseData(ParseStatus.STATUS_SUCCESS, title, outlinks, metaData);
-                        
+    ParseData r = new ParseData(ParseStatus.STATUS_SUCCESS, title, outlinks,
+        metaData);
+
     WritableTestUtils.testWritable(r, null);
   }
-	
+
   public void testMaxOutlinks() throws Exception {
     Outlink[] outlinks = new Outlink[128];
-    for (int i=0; i<outlinks.length; i++) {
+    for (int i = 0; i < outlinks.length; i++) {
       outlinks[i] = new Outlink("http://outlink.com/" + i, "Outlink" + i);
     }
     ParseData original = new ParseData(ParseStatus.STATUS_SUCCESS,
-                                       "Max Outlinks Title",
-                                       outlinks,
-                                       new Metadata());
+        "Max Outlinks Title", outlinks, new Metadata());
     ParseData data = (ParseData) WritableTestUtils.writeRead(original, null);
     assertEquals(outlinks.length, data.getOutlinks().length);
   }
Index: src/test/org/apache/nutch/util/TestSuffixStringMatcher.java
===================================================================
--- src/test/org/apache/nutch/util/TestSuffixStringMatcher.java	(revision 1188252)
+++ src/test/org/apache/nutch/util/TestSuffixStringMatcher.java	(working copy)
@@ -21,101 +21,94 @@
 
 /** Unit tests for SuffixStringMatcher. */
 public class TestSuffixStringMatcher extends TestCase {
-  public TestSuffixStringMatcher(String name) { 
-    super(name); 
+  public TestSuffixStringMatcher(String name) {
+    super(name);
   }
 
-  private final static int NUM_TEST_ROUNDS= 20;
-  private final static int MAX_TEST_SUFFIXES= 100;
-  private final static int MAX_SUFFIX_LEN= 10;
-  private final static int NUM_TEST_INPUTS_PER_ROUND= 100;
-  private final static int MAX_INPUT_LEN= 20;
+  private final static int NUM_TEST_ROUNDS = 20;
+  private final static int MAX_TEST_SUFFIXES = 100;
+  private final static int MAX_SUFFIX_LEN = 10;
+  private final static int NUM_TEST_INPUTS_PER_ROUND = 100;
+  private final static int MAX_INPUT_LEN = 20;
 
-  private final static char[] alphabet= 
-    new char[] {
-      'a', 'b', 'c', 'd',
-//      'e', 'f', 'g', 'h', 'i', 'j',
-//      'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
-//      'u', 'v', 'w', 'x', 'y', 'z', '1', '2', '3', '4',
-//      '5', '6', '7', '8', '9', '0'
-    };
+  private final static char[] alphabet = new char[] { 'a', 'b', 'c', 'd',
+  // 'e', 'f', 'g', 'h', 'i', 'j',
+  // 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
+  // 'u', 'v', 'w', 'x', 'y', 'z', '1', '2', '3', '4',
+  // '5', '6', '7', '8', '9', '0'
+  };
 
   private String makeRandString(int minLen, int maxLen) {
-    int len= minLen + (int) (Math.random() * (maxLen - minLen));
-    char[] chars= new char[len];
-    
-    for (int pos= 0; pos < len; pos++) {
-      chars[pos]= alphabet[(int) (Math.random() * alphabet.length)];
+    int len = minLen + (int) (Math.random() * (maxLen - minLen));
+    char[] chars = new char[len];
+
+    for (int pos = 0; pos < len; pos++) {
+      chars[pos] = alphabet[(int) (Math.random() * alphabet.length)];
     }
-    
+
     return new String(chars);
   }
-  
+
   public void testSuffixMatcher() {
-    int numMatches= 0;
-    int numInputsTested= 0;
+    int numMatches = 0;
+    int numInputsTested = 0;
 
-    for (int round= 0; round < NUM_TEST_ROUNDS; round++) {
+    for (int round = 0; round < NUM_TEST_ROUNDS; round++) {
 
       // build list of suffixes
-      int numSuffixes= (int) (Math.random() * MAX_TEST_SUFFIXES);
-      String[] suffixes= new String[numSuffixes];
-      for (int i= 0; i < numSuffixes; i++) {
-        suffixes[i]= makeRandString(0, MAX_SUFFIX_LEN);
+      int numSuffixes = (int) (Math.random() * MAX_TEST_SUFFIXES);
+      String[] suffixes = new String[numSuffixes];
+      for (int i = 0; i < numSuffixes; i++) {
+        suffixes[i] = makeRandString(0, MAX_SUFFIX_LEN);
       }
 
-      SuffixStringMatcher sufmatcher= new SuffixStringMatcher(suffixes);
+      SuffixStringMatcher sufmatcher = new SuffixStringMatcher(suffixes);
 
       // test random strings for suffix matches
-      for (int i= 0; i < NUM_TEST_INPUTS_PER_ROUND; i++) {
-        String input= makeRandString(0, MAX_INPUT_LEN);
-        boolean matches= false;
-        int longestMatch= -1;
-        int shortestMatch= -1;
+      for (int i = 0; i < NUM_TEST_INPUTS_PER_ROUND; i++) {
+        String input = makeRandString(0, MAX_INPUT_LEN);
+        boolean matches = false;
+        int longestMatch = -1;
+        int shortestMatch = -1;
 
-        for (int j= 0; j < suffixes.length; j++) {
+        for (int j = 0; j < suffixes.length; j++) {
 
-          if ((suffixes[j].length() > 0) 
-              && input.endsWith(suffixes[j])) {
+          if ((suffixes[j].length() > 0) && input.endsWith(suffixes[j])) {
 
-            matches= true;
-            int matchSize= suffixes[j].length();
+            matches = true;
+            int matchSize = suffixes[j].length();
 
-            if (matchSize > longestMatch) 
-              longestMatch= matchSize;
+            if (matchSize > longestMatch)
+              longestMatch = matchSize;
 
-            if ( (matchSize < shortestMatch)
-                 || (shortestMatch == -1) )
-              shortestMatch= matchSize;
+            if ((matchSize < shortestMatch) || (shortestMatch == -1))
+              shortestMatch = matchSize;
           }
 
         }
 
-        if (matches) 
+        if (matches)
           numMatches++;
 
         numInputsTested++;
 
-        assertTrue( "'" + input + "' should " + (matches ? "" : "not ") 
-                    + "match!",
-                    matches == sufmatcher.matches(input) );
+        assertTrue("'" + input + "' should " + (matches ? "" : "not ")
+            + "match!", matches == sufmatcher.matches(input));
         if (matches) {
-          assertTrue( shortestMatch 
-                      == sufmatcher.shortestMatch(input).length());
-          assertTrue( input.substring(input.length() - shortestMatch).equals(
-                        sufmatcher.shortestMatch(input)) );
+          assertTrue(shortestMatch == sufmatcher.shortestMatch(input).length());
+          assertTrue(input.substring(input.length() - shortestMatch).equals(
+              sufmatcher.shortestMatch(input)));
 
-          assertTrue( longestMatch 
-                      == sufmatcher.longestMatch(input).length());
-          assertTrue( input.substring(input.length() - longestMatch).equals(
-                        sufmatcher.longestMatch(input)) );
+          assertTrue(longestMatch == sufmatcher.longestMatch(input).length());
+          assertTrue(input.substring(input.length() - longestMatch).equals(
+              sufmatcher.longestMatch(input)));
 
         }
       }
     }
 
-    System.out.println("got " + numMatches + " matches out of " 
-                       + numInputsTested + " tests");
+    System.out.println("got " + numMatches + " matches out of "
+        + numInputsTested + " tests");
   }
 
 }
Index: src/test/org/apache/nutch/util/TestURLUtil.java
===================================================================
--- src/test/org/apache/nutch/util/TestURLUtil.java	(revision 1188252)
+++ src/test/org/apache/nutch/util/TestURLUtil.java	(working copy)
@@ -22,17 +22,14 @@
 import junit.framework.TestCase;
 
 /** Test class for URLUtil */
-public class TestURLUtil
-  extends TestCase {
+public class TestURLUtil extends TestCase {
 
   @Override
-  protected void setUp()
-    throws Exception {
+  protected void setUp() throws Exception {
     super.setUp();
   }
 
-  public void testGetDomainName()
-    throws Exception {
+  public void testGetDomainName() throws Exception {
 
     URL url = null;
 
@@ -81,8 +78,7 @@
 
   }
 
-  public void testGetDomainSuffix()
-    throws Exception {
+  public void testGetDomainSuffix() throws Exception {
     URL url = null;
 
     url = new URL("http://lucene.apache.org/nutch");
@@ -133,8 +129,7 @@
 
   }
 
-  public void testGetHostSegments()
-    throws Exception {
+  public void testGetHostSegments() throws Exception {
     URL url;
     String[] segments;
 
@@ -165,9 +160,8 @@
 
   }
 
-  public void testChooseRepr()
-    throws Exception {
-    
+  public void testChooseRepr() throws Exception {
+
     String aDotCom = "http://www.a.com";
     String bDotCom = "http://www.b.com";
     String aSubDotCom = "http://www.news.a.com";
@@ -175,40 +169,41 @@
     String aPath = "http://www.a.com/xyz/index.html";
     String aPath2 = "http://www.a.com/abc/page.html";
     String aPath3 = "http://www.news.a.com/abc/page.html";
-    
+
     // 1) different domain them keep dest, temp or perm
     // a.com -> b.com*
     assertEquals(bDotCom, URLUtil.chooseRepr(aDotCom, bDotCom, true));
     assertEquals(bDotCom, URLUtil.chooseRepr(aDotCom, bDotCom, false));
-    
+
     // 2) permanent and root, keep src
     // *a.com -> a.com?y=1 || *a.com -> a.com/xyz/index.html
     assertEquals(aDotCom, URLUtil.chooseRepr(aDotCom, aQStr, false));
     assertEquals(aDotCom, URLUtil.chooseRepr(aDotCom, aPath, false));
-    
-    //3) permanent and not root and dest root, keep dest
-    //a.com/xyz/index.html -> a.com*
+
+    // 3) permanent and not root and dest root, keep dest
+    // a.com/xyz/index.html -> a.com*
     assertEquals(aDotCom, URLUtil.chooseRepr(aPath, aDotCom, false));
-    
-    //4) permanent and neither root keep dest
+
+    // 4) permanent and neither root keep dest
     // a.com/xyz/index.html -> a.com/abc/page.html*
     assertEquals(aPath2, URLUtil.chooseRepr(aPath, aPath2, false));
-    
-    //5) temp and root and dest not root keep src
-    //*a.com -> a.com/xyz/index.html
+
+    // 5) temp and root and dest not root keep src
+    // *a.com -> a.com/xyz/index.html
     assertEquals(aDotCom, URLUtil.chooseRepr(aDotCom, aPath, true));
-    
-    //6) temp and not root and dest root keep dest
+
+    // 6) temp and not root and dest root keep dest
     // a.com/xyz/index.html -> a.com*
     assertEquals(aDotCom, URLUtil.chooseRepr(aPath, aDotCom, true));
 
-    //7) temp and neither root, keep shortest, if hosts equal by path else by hosts
-    //  a.com/xyz/index.html -> a.com/abc/page.html*
+    // 7) temp and neither root, keep shortest, if hosts equal by path else by
+    // hosts
+    // a.com/xyz/index.html -> a.com/abc/page.html*
     // *www.a.com/xyz/index.html -> www.news.a.com/xyz/index.html
     assertEquals(aPath2, URLUtil.chooseRepr(aPath, aPath2, true));
     assertEquals(aPath, URLUtil.chooseRepr(aPath, aPath3, true));
 
-    //8) temp and both root keep shortest sub domain
+    // 8) temp and both root keep shortest sub domain
     // *www.a.com -> www.news.a.com
     assertEquals(aDotCom, URLUtil.chooseRepr(aDotCom, aSubDotCom, true));
   }
@@ -216,30 +211,18 @@
   // from RFC3986 section 5.4.1
   private static String baseString = "http://a/b/c/d;p?q";
   private static String[][] targets = new String[][] {
-    // unknown protocol {"g:h"           ,  "g:h"},
-    {"g"             ,  "http://a/b/c/g"},
-    { "./g"           ,  "http://a/b/c/g"},
-    { "g/"            ,  "http://a/b/c/g/"},
-    { "/g"            ,  "http://a/g"},
-    { "//g"           ,  "http://g"},
-    { "?y"            ,  "http://a/b/c/d;p?y"},
-    { "g?y"           ,  "http://a/b/c/g?y"},
-    { "#s"            ,  "http://a/b/c/d;p?q#s"},
-    { "g#s"           ,  "http://a/b/c/g#s"},
-    { "g?y#s"         ,  "http://a/b/c/g?y#s"},
-    { ";x"            ,  "http://a/b/c/;x"},
-    { "g;x"           ,  "http://a/b/c/g;x"},
-    { "g;x?y#s"       ,  "http://a/b/c/g;x?y#s"},
-    { ""              ,  "http://a/b/c/d;p?q"},
-    { "."             ,  "http://a/b/c/"},
-    { "./"            ,  "http://a/b/c/"},
-    { ".."            ,  "http://a/b/"},
-    { "../"           ,  "http://a/b/"},
-    { "../g"          ,  "http://a/b/g"},
-    { "../.."         ,  "http://a/"},
-    { "../../"        ,  "http://a/"},
-    { "../../g"       ,  "http://a/g"}
-  };
+      // unknown protocol {"g:h" , "g:h"},
+      { "g", "http://a/b/c/g" }, { "./g", "http://a/b/c/g" },
+      { "g/", "http://a/b/c/g/" }, { "/g", "http://a/g" },
+      { "//g", "http://g" }, { "?y", "http://a/b/c/d;p?y" },
+      { "g?y", "http://a/b/c/g?y" }, { "#s", "http://a/b/c/d;p?q#s" },
+      { "g#s", "http://a/b/c/g#s" }, { "g?y#s", "http://a/b/c/g?y#s" },
+      { ";x", "http://a/b/c/;x" }, { "g;x", "http://a/b/c/g;x" },
+      { "g;x?y#s", "http://a/b/c/g;x?y#s" }, { "", "http://a/b/c/d;p?q" },
+      { ".", "http://a/b/c/" }, { "./", "http://a/b/c/" },
+      { "..", "http://a/b/" }, { "../", "http://a/b/" },
+      { "../g", "http://a/b/g" }, { "../..", "http://a/" },
+      { "../../", "http://a/" }, { "../../g", "http://a/g" } };
 
   public void testResolveURL() throws Exception {
     // test NUTCH-436
@@ -250,7 +233,8 @@
     // test NUTCH-566
     URL u566 = new URL("http://www.fleurie.org/entreprise.asp");
     abs = URLUtil.resolveURL(u566, "?id_entrep=111");
-    assertEquals("http://www.fleurie.org/entreprise.asp?id_entrep=111", abs.toString());
+    assertEquals("http://www.fleurie.org/entreprise.asp?id_entrep=111",
+        abs.toString());
     URL base = new URL(baseString);
     assertEquals("base url parsing", baseString, base.toString());
     for (int i = 0; i < targets.length; i++) {
Index: src/test/org/apache/nutch/util/TestStringUtil.java
===================================================================
--- src/test/org/apache/nutch/util/TestStringUtil.java	(revision 1188252)
+++ src/test/org/apache/nutch/util/TestStringUtil.java	(working copy)
@@ -21,41 +21,41 @@
 
 /** Unit tests for StringUtil methods. */
 public class TestStringUtil extends TestCase {
-  public TestStringUtil(String name) { 
-    super(name); 
+  public TestStringUtil(String name) {
+    super(name);
   }
 
   public void testRightPad() {
-    String s= "my string";
+    String s = "my string";
 
-    String ps= StringUtil.rightPad(s, 0);
+    String ps = StringUtil.rightPad(s, 0);
     assertTrue(s.equals(ps));
 
-    ps= StringUtil.rightPad(s, 9);
+    ps = StringUtil.rightPad(s, 9);
     assertTrue(s.equals(ps));
 
-    ps= StringUtil.rightPad(s, 10);
-    assertTrue( (s+" ").equals(ps) );
+    ps = StringUtil.rightPad(s, 10);
+    assertTrue((s + " ").equals(ps));
 
-    ps= StringUtil.rightPad(s, 15);
-    assertTrue( (s+"      ").equals(ps) );
+    ps = StringUtil.rightPad(s, 15);
+    assertTrue((s + "      ").equals(ps));
 
   }
 
   public void testLeftPad() {
-    String s= "my string";
+    String s = "my string";
 
-    String ps= StringUtil.leftPad(s, 0);
+    String ps = StringUtil.leftPad(s, 0);
     assertTrue(s.equals(ps));
 
-    ps= StringUtil.leftPad(s, 9);
+    ps = StringUtil.leftPad(s, 9);
     assertTrue(s.equals(ps));
 
-    ps= StringUtil.leftPad(s, 10);
-    assertTrue( (" "+s).equals(ps) );
+    ps = StringUtil.leftPad(s, 10);
+    assertTrue((" " + s).equals(ps));
 
-    ps= StringUtil.leftPad(s, 15);
-    assertTrue( ("      "+s).equals(ps) );
+    ps = StringUtil.leftPad(s, 15);
+    assertTrue(("      " + s).equals(ps));
 
   }
 
Index: src/test/org/apache/nutch/util/TestPrefixStringMatcher.java
===================================================================
--- src/test/org/apache/nutch/util/TestPrefixStringMatcher.java	(revision 1188252)
+++ src/test/org/apache/nutch/util/TestPrefixStringMatcher.java	(working copy)
@@ -21,101 +21,94 @@
 
 /** Unit tests for PrefixStringMatcher. */
 public class TestPrefixStringMatcher extends TestCase {
-  public TestPrefixStringMatcher(String name) { 
-    super(name); 
+  public TestPrefixStringMatcher(String name) {
+    super(name);
   }
 
-  private final static int NUM_TEST_ROUNDS= 20;
-  private final static int MAX_TEST_PREFIXES= 100;
-  private final static int MAX_PREFIX_LEN= 10;
-  private final static int NUM_TEST_INPUTS_PER_ROUND= 100;
-  private final static int MAX_INPUT_LEN= 20;
+  private final static int NUM_TEST_ROUNDS = 20;
+  private final static int MAX_TEST_PREFIXES = 100;
+  private final static int MAX_PREFIX_LEN = 10;
+  private final static int NUM_TEST_INPUTS_PER_ROUND = 100;
+  private final static int MAX_INPUT_LEN = 20;
 
-  private final static char[] alphabet= 
-    new char[] {
-      'a', 'b', 'c', 'd',
-//      'e', 'f', 'g', 'h', 'i', 'j',
-//      'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
-//      'u', 'v', 'w', 'x', 'y', 'z', '1', '2', '3', '4',
-//      '5', '6', '7', '8', '9', '0'
-    };
+  private final static char[] alphabet = new char[] { 'a', 'b', 'c', 'd',
+  // 'e', 'f', 'g', 'h', 'i', 'j',
+  // 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
+  // 'u', 'v', 'w', 'x', 'y', 'z', '1', '2', '3', '4',
+  // '5', '6', '7', '8', '9', '0'
+  };
 
   private String makeRandString(int minLen, int maxLen) {
-    int len= minLen + (int) (Math.random() * (maxLen - minLen));
-    char[] chars= new char[len];
-    
-    for (int pos= 0; pos < len; pos++) {
-      chars[pos]= alphabet[(int) (Math.random() * alphabet.length)];
+    int len = minLen + (int) (Math.random() * (maxLen - minLen));
+    char[] chars = new char[len];
+
+    for (int pos = 0; pos < len; pos++) {
+      chars[pos] = alphabet[(int) (Math.random() * alphabet.length)];
     }
-    
+
     return new String(chars);
   }
-  
+
   public void testPrefixMatcher() {
-    int numMatches= 0;
-    int numInputsTested= 0;
+    int numMatches = 0;
+    int numInputsTested = 0;
 
-    for (int round= 0; round < NUM_TEST_ROUNDS; round++) {
+    for (int round = 0; round < NUM_TEST_ROUNDS; round++) {
 
       // build list of prefixes
-      int numPrefixes= (int) (Math.random() * MAX_TEST_PREFIXES);
-      String[] prefixes= new String[numPrefixes];
-      for (int i= 0; i < numPrefixes; i++) {
-        prefixes[i]= makeRandString(0, MAX_PREFIX_LEN);
+      int numPrefixes = (int) (Math.random() * MAX_TEST_PREFIXES);
+      String[] prefixes = new String[numPrefixes];
+      for (int i = 0; i < numPrefixes; i++) {
+        prefixes[i] = makeRandString(0, MAX_PREFIX_LEN);
       }
 
-      PrefixStringMatcher prematcher= new PrefixStringMatcher(prefixes);
+      PrefixStringMatcher prematcher = new PrefixStringMatcher(prefixes);
 
       // test random strings for prefix matches
-      for (int i= 0; i < NUM_TEST_INPUTS_PER_ROUND; i++) {
-        String input= makeRandString(0, MAX_INPUT_LEN);
-        boolean matches= false;
-        int longestMatch= -1;
-        int shortestMatch= -1;
+      for (int i = 0; i < NUM_TEST_INPUTS_PER_ROUND; i++) {
+        String input = makeRandString(0, MAX_INPUT_LEN);
+        boolean matches = false;
+        int longestMatch = -1;
+        int shortestMatch = -1;
 
-        for (int j= 0; j < prefixes.length; j++) {
+        for (int j = 0; j < prefixes.length; j++) {
 
-          if ((prefixes[j].length() > 0) 
-              && input.startsWith(prefixes[j])) {
+          if ((prefixes[j].length() > 0) && input.startsWith(prefixes[j])) {
 
-            matches= true;
-            int matchSize= prefixes[j].length();
+            matches = true;
+            int matchSize = prefixes[j].length();
 
-            if (matchSize > longestMatch) 
-              longestMatch= matchSize;
+            if (matchSize > longestMatch)
+              longestMatch = matchSize;
 
-            if ( (matchSize < shortestMatch)
-                 || (shortestMatch == -1) )
-              shortestMatch= matchSize;
+            if ((matchSize < shortestMatch) || (shortestMatch == -1))
+              shortestMatch = matchSize;
           }
 
         }
 
-        if (matches) 
+        if (matches)
           numMatches++;
 
         numInputsTested++;
 
-        assertTrue( "'" + input + "' should " + (matches ? "" : "not ") 
-                    + "match!",
-                    matches == prematcher.matches(input) );
+        assertTrue("'" + input + "' should " + (matches ? "" : "not ")
+            + "match!", matches == prematcher.matches(input));
         if (matches) {
-          assertTrue( shortestMatch 
-                      == prematcher.shortestMatch(input).length());
-          assertTrue( input.substring(0, shortestMatch).equals(
-                        prematcher.shortestMatch(input)) );
+          assertTrue(shortestMatch == prematcher.shortestMatch(input).length());
+          assertTrue(input.substring(0, shortestMatch).equals(
+              prematcher.shortestMatch(input)));
 
-          assertTrue( longestMatch 
-                      == prematcher.longestMatch(input).length());
-          assertTrue( input.substring(0, longestMatch).equals(
-                        prematcher.longestMatch(input)) );
+          assertTrue(longestMatch == prematcher.longestMatch(input).length());
+          assertTrue(input.substring(0, longestMatch).equals(
+              prematcher.longestMatch(input)));
 
         }
       }
     }
 
-    System.out.println("got " + numMatches + " matches out of " 
-                       + numInputsTested + " tests");
+    System.out.println("got " + numMatches + " matches out of "
+        + numInputsTested + " tests");
   }
 
 }
Index: src/test/org/apache/nutch/util/TestGZIPUtils.java
===================================================================
--- src/test/org/apache/nutch/util/TestGZIPUtils.java	(revision 1188252)
+++ src/test/org/apache/nutch/util/TestGZIPUtils.java	(working copy)
@@ -23,223 +23,216 @@
 
 /** Unit tests for GZIPUtils methods. */
 public class TestGZIPUtils extends TestCase {
-  public TestGZIPUtils(String name) { 
-    super(name); 
+  public TestGZIPUtils(String name) {
+    super(name);
   }
 
   /* a short, highly compressable, string */
-  String SHORT_TEST_STRING= 
-    "aaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbcccccccccccccccc";
+  String SHORT_TEST_STRING = "aaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbcccccccccccccccc";
 
   /* a short, highly compressable, string */
-  String LONGER_TEST_STRING= 
-    SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING 
-    + SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING 
-    + SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING 
-    + SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING;
+  String LONGER_TEST_STRING = SHORT_TEST_STRING + SHORT_TEST_STRING
+      + SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING
+      + SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING
+      + SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING
+      + SHORT_TEST_STRING;
 
   /* a snapshot of the nutch webpage */
-  String WEBPAGE= 
-  "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n"
-  + "<html>\n"
-  + "<head>\n"
-  + "  <meta http-equiv=\"content-type\"\n"
-  + " content=\"text/html; charset=ISO-8859-1\">\n"
-  + "  <title>Nutch</title>\n"
-  + "</head>\n"
-  + "<body>\n"
-  + "<h1\n"
-  + " style=\"font-family: helvetica,arial,sans-serif; text-align: center; color: rgb(255, 153, 0);\"><a\n"
-  + " href=\"http://www.nutch.org/\"><font style=\"color: rgb(255, 153, 0);\">Nutch</font></a><br>\n"
-  + "<small>an open source web-search engine</small></h1>\n"
-  + "<hr style=\"width: 100%; height: 1px;\" noshade=\"noshade\">\n"
-  + "<table\n"
-  + " style=\"width: 100%; text-align: left; margin-left: auto; margin-right: auto;\"\n"
-  + " border=\"0\" cellspacing=\"0\" cellpadding=\"0\">\n"
-  + "  <tbody>\n"
-  + "    <tr>\n"
-  + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
-  + " href=\"http://sourceforge.net/project/showfiles.php?group_id=59548\">Download</a><br>\n"
-  + "      </td>\n"
-  + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
-  + " href=\"tutorial.html\">Tutorial</a><br>\n"
-  + "      </td>\n"
-  + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
-  + " href=\"http://cvs.sourceforge.net/cgi-bin/viewcvs.cgi/nutch/nutch/\">CVS</a><br>\n"
-  + "      </td>\n"
-  + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
-  + " href=\"api/index.html\">Javadoc</a><br>\n"
-  + "      </td>\n"
-  + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
-  + " href=\"http://sourceforge.net/tracker/?atid=491356&amp;group_id=59548&amp;func=browse\">Bugs</a><br>\n"
-  + "      </td>\n"
-  + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
-  + " href=\"http://sourceforge.net/mail/?group_id=59548\">Lists</a></td>\n"
-  + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
-  + " href=\"policies.html\">Policies</a><br>\n"
-  + "      </td>\n"
-  + "    </tr>\n"
-  + "  </tbody>\n"
-  + "</table>\n"
-  + "<hr style=\"width: 100%; height: 1px;\" noshade=\"noshade\">\n"
-  + "<h2>Introduction</h2>\n"
-  + "Nutch is a nascent effort to implement an open-source web search\n"
-  + "engine. Web search is a basic requirement for internet navigation, yet\n"
-  + "the number of web search engines is decreasing. Today's oligopoly could\n"
-  + "soon be a monopoly, with a single company controlling nearly all web\n"
-  + "search for its commercial gain. &nbsp;That would not be good for the\n"
-  + "users of internet. &nbsp;Nutch aims to enable anyone to easily and\n"
-  + "cost-effectively deploy a world-class web search engine.<br>\n"
-  + "<br>\n"
-  + "To succeed, the Nutch software must be able to:<br>\n"
-  + "<ul>\n"
-  + "  <li> crawl several billion pages per month</li>\n"
-  + "  <li>maintain an index of these pages</li>\n"
-  + "  <li>search that index up to 1000 times per second</li>\n"
-  + "  <li>provide very high quality search results</li>\n"
-  + "  <li>operate at minimal cost</li>\n"
-  + "</ul>\n"
-  + "<h2>Status</h2>\n"
-  + "Currently we're just a handful of developers working part-time to put\n"
-  + "together a demo. &nbsp;The demo is coded entirely in Java. &nbsp;However\n"
-  + "persistent data is written in well-documented formats so that modules\n"
-  + "may eventually be re-written in other languages (e.g., Perl, C++) as the\n"
-  + "project progresses.<br>\n"
-  + "<br>\n"
-  + "<hr style=\"width: 100%; height: 1px;\" noshade=\"noshade\"> <a\n"
-  + " href=\"http://sourceforge.net\"> </a>\n"
-  + "<div style=\"text-align: center;\"><a href=\"http://sourceforge.net\"><img\n"
-  + " src=\"http://sourceforge.net/sflogo.php?group_id=59548&amp;type=1\"\n"
-  + " style=\"border: 0px solid ; width: 88px; height: 31px;\"\n"
-  + " alt=\"SourceForge.net Logo\" title=\"\"></a></div>\n"
-  + "</body>\n"
-  + "</html>\n";
+  String WEBPAGE = "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n"
+      + "<html>\n"
+      + "<head>\n"
+      + "  <meta http-equiv=\"content-type\"\n"
+      + " content=\"text/html; charset=ISO-8859-1\">\n"
+      + "  <title>Nutch</title>\n"
+      + "</head>\n"
+      + "<body>\n"
+      + "<h1\n"
+      + " style=\"font-family: helvetica,arial,sans-serif; text-align: center; color: rgb(255, 153, 0);\"><a\n"
+      + " href=\"http://www.nutch.org/\"><font style=\"color: rgb(255, 153, 0);\">Nutch</font></a><br>\n"
+      + "<small>an open source web-search engine</small></h1>\n"
+      + "<hr style=\"width: 100%; height: 1px;\" noshade=\"noshade\">\n"
+      + "<table\n"
+      + " style=\"width: 100%; text-align: left; margin-left: auto; margin-right: auto;\"\n"
+      + " border=\"0\" cellspacing=\"0\" cellpadding=\"0\">\n"
+      + "  <tbody>\n"
+      + "    <tr>\n"
+      + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
+      + " href=\"http://sourceforge.net/project/showfiles.php?group_id=59548\">Download</a><br>\n"
+      + "      </td>\n"
+      + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
+      + " href=\"tutorial.html\">Tutorial</a><br>\n"
+      + "      </td>\n"
+      + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
+      + " href=\"http://cvs.sourceforge.net/cgi-bin/viewcvs.cgi/nutch/nutch/\">CVS</a><br>\n"
+      + "      </td>\n"
+      + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
+      + " href=\"api/index.html\">Javadoc</a><br>\n"
+      + "      </td>\n"
+      + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
+      + " href=\"http://sourceforge.net/tracker/?atid=491356&amp;group_id=59548&amp;func=browse\">Bugs</a><br>\n"
+      + "      </td>\n"
+      + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
+      + " href=\"http://sourceforge.net/mail/?group_id=59548\">Lists</a></td>\n"
+      + "      <td style=\"vertical-align: top; text-align: center;\"><a\n"
+      + " href=\"policies.html\">Policies</a><br>\n"
+      + "      </td>\n"
+      + "    </tr>\n"
+      + "  </tbody>\n"
+      + "</table>\n"
+      + "<hr style=\"width: 100%; height: 1px;\" noshade=\"noshade\">\n"
+      + "<h2>Introduction</h2>\n"
+      + "Nutch is a nascent effort to implement an open-source web search\n"
+      + "engine. Web search is a basic requirement for internet navigation, yet\n"
+      + "the number of web search engines is decreasing. Today's oligopoly could\n"
+      + "soon be a monopoly, with a single company controlling nearly all web\n"
+      + "search for its commercial gain. &nbsp;That would not be good for the\n"
+      + "users of internet. &nbsp;Nutch aims to enable anyone to easily and\n"
+      + "cost-effectively deploy a world-class web search engine.<br>\n"
+      + "<br>\n"
+      + "To succeed, the Nutch software must be able to:<br>\n"
+      + "<ul>\n"
+      + "  <li> crawl several billion pages per month</li>\n"
+      + "  <li>maintain an index of these pages</li>\n"
+      + "  <li>search that index up to 1000 times per second</li>\n"
+      + "  <li>provide very high quality search results</li>\n"
+      + "  <li>operate at minimal cost</li>\n"
+      + "</ul>\n"
+      + "<h2>Status</h2>\n"
+      + "Currently we're just a handful of developers working part-time to put\n"
+      + "together a demo. &nbsp;The demo is coded entirely in Java. &nbsp;However\n"
+      + "persistent data is written in well-documented formats so that modules\n"
+      + "may eventually be re-written in other languages (e.g., Perl, C++) as the\n"
+      + "project progresses.<br>\n"
+      + "<br>\n"
+      + "<hr style=\"width: 100%; height: 1px;\" noshade=\"noshade\"> <a\n"
+      + " href=\"http://sourceforge.net\"> </a>\n"
+      + "<div style=\"text-align: center;\"><a href=\"http://sourceforge.net\"><img\n"
+      + " src=\"http://sourceforge.net/sflogo.php?group_id=59548&amp;type=1\"\n"
+      + " style=\"border: 0px solid ; width: 88px; height: 31px;\"\n"
+      + " alt=\"SourceForge.net Logo\" title=\"\"></a></div>\n"
+      + "</body>\n"
+      + "</html>\n";
 
   // tests
 
   public void testZipUnzip() {
-    byte[] testBytes= SHORT_TEST_STRING.getBytes();
+    byte[] testBytes = SHORT_TEST_STRING.getBytes();
     testZipUnzip(testBytes);
-    testBytes= LONGER_TEST_STRING.getBytes();
+    testBytes = LONGER_TEST_STRING.getBytes();
     testZipUnzip(testBytes);
-    testBytes= WEBPAGE.getBytes();
+    testBytes = WEBPAGE.getBytes();
     testZipUnzip(testBytes);
   }
 
   public void testZipUnzipBestEffort() {
-    byte[] testBytes= SHORT_TEST_STRING.getBytes();
+    byte[] testBytes = SHORT_TEST_STRING.getBytes();
     testZipUnzipBestEffort(testBytes);
-    testBytes= LONGER_TEST_STRING.getBytes();
+    testBytes = LONGER_TEST_STRING.getBytes();
     testZipUnzipBestEffort(testBytes);
-    testBytes= WEBPAGE.getBytes();
+    testBytes = WEBPAGE.getBytes();
     testZipUnzipBestEffort(testBytes);
   }
-  
+
   public void testTruncation() {
-    byte[] testBytes= SHORT_TEST_STRING.getBytes();
+    byte[] testBytes = SHORT_TEST_STRING.getBytes();
     testTruncation(testBytes);
-    testBytes= LONGER_TEST_STRING.getBytes();
+    testBytes = LONGER_TEST_STRING.getBytes();
     testTruncation(testBytes);
-    testBytes= WEBPAGE.getBytes();
+    testBytes = WEBPAGE.getBytes();
     testTruncation(testBytes);
   }
 
   public void testLimit() {
-    byte[] testBytes= SHORT_TEST_STRING.getBytes();
+    byte[] testBytes = SHORT_TEST_STRING.getBytes();
     testLimit(testBytes);
-    testBytes= LONGER_TEST_STRING.getBytes();
+    testBytes = LONGER_TEST_STRING.getBytes();
     testLimit(testBytes);
-    testBytes= WEBPAGE.getBytes();
+    testBytes = WEBPAGE.getBytes();
     testLimit(testBytes);
   }
 
   // helpers
 
   public void testZipUnzip(byte[] origBytes) {
-    byte[] compressedBytes= GZIPUtils.zip(origBytes);
+    byte[] compressedBytes = GZIPUtils.zip(origBytes);
 
     assertTrue("compressed array is not smaller!",
-	       compressedBytes.length < origBytes.length);
+        compressedBytes.length < origBytes.length);
 
-    byte[] uncompressedBytes= null;
+    byte[] uncompressedBytes = null;
     try {
-      uncompressedBytes= GZIPUtils.unzip(compressedBytes);
+      uncompressedBytes = GZIPUtils.unzip(compressedBytes);
     } catch (IOException e) {
       e.printStackTrace();
-      assertTrue("caught exception '" + e + "' during unzip()",
-		 false);
+      assertTrue("caught exception '" + e + "' during unzip()", false);
     }
-    assertTrue("uncompressedBytes is wrong size", 
-	       uncompressedBytes.length == origBytes.length);
+    assertTrue("uncompressedBytes is wrong size",
+        uncompressedBytes.length == origBytes.length);
 
-    for (int i= 0; i < origBytes.length; i++) 
+    for (int i = 0; i < origBytes.length; i++)
       if (origBytes[i] != uncompressedBytes[i])
-	assertTrue("uncompressedBytes does not match origBytes", false);
+        assertTrue("uncompressedBytes does not match origBytes", false);
   }
 
   public void testZipUnzipBestEffort(byte[] origBytes) {
-    byte[] compressedBytes= GZIPUtils.zip(origBytes);
+    byte[] compressedBytes = GZIPUtils.zip(origBytes);
 
     assertTrue("compressed array is not smaller!",
-	       compressedBytes.length < origBytes.length);
+        compressedBytes.length < origBytes.length);
 
-    byte[] uncompressedBytes= GZIPUtils.unzipBestEffort(compressedBytes);
-    assertTrue("uncompressedBytes is wrong size", 
-	       uncompressedBytes.length == origBytes.length);
+    byte[] uncompressedBytes = GZIPUtils.unzipBestEffort(compressedBytes);
+    assertTrue("uncompressedBytes is wrong size",
+        uncompressedBytes.length == origBytes.length);
 
-    for (int i= 0; i < origBytes.length; i++) 
+    for (int i = 0; i < origBytes.length; i++)
       if (origBytes[i] != uncompressedBytes[i])
-	assertTrue("uncompressedBytes does not match origBytes", false);
+        assertTrue("uncompressedBytes does not match origBytes", false);
   }
 
   public void testTruncation(byte[] origBytes) {
-    byte[] compressedBytes= GZIPUtils.zip(origBytes);
+    byte[] compressedBytes = GZIPUtils.zip(origBytes);
 
     System.out.println("original data has len " + origBytes.length);
-    System.out.println("compressed data has len " 
-		       + compressedBytes.length);
+    System.out.println("compressed data has len " + compressedBytes.length);
 
-    for (int i= compressedBytes.length; i >= 0; i--) {
+    for (int i = compressedBytes.length; i >= 0; i--) {
 
-      byte[] truncCompressed= new byte[i];
+      byte[] truncCompressed = new byte[i];
 
-      for (int j= 0; j < i; j++)
-	truncCompressed[j]= compressedBytes[j];
+      for (int j = 0; j < i; j++)
+        truncCompressed[j] = compressedBytes[j];
 
-      byte[] trunc= GZIPUtils.unzipBestEffort(truncCompressed);
+      byte[] trunc = GZIPUtils.unzipBestEffort(truncCompressed);
 
       if (trunc == null) {
-	System.out.println("truncated to len "
-			   + i + ", trunc is null");
+        System.out.println("truncated to len " + i + ", trunc is null");
       } else {
-	System.out.println("truncated to len "
-			   + i + ", trunc.length=  " 
-			   + trunc.length);
+        System.out.println("truncated to len " + i + ", trunc.length=  "
+            + trunc.length);
 
-	for (int j= 0; j < trunc.length; j++)
-	  if (trunc[j] != origBytes[j]) 
-	    assertTrue("truncated/uncompressed array differs at pos "
-		       + j + " (compressed data had been truncated to len "
-		       + i + ")", false);
+        for (int j = 0; j < trunc.length; j++)
+          if (trunc[j] != origBytes[j])
+            assertTrue("truncated/uncompressed array differs at pos " + j
+                + " (compressed data had been truncated to len " + i + ")",
+                false);
       }
     }
   }
 
   public void testLimit(byte[] origBytes) {
-    byte[] compressedBytes= GZIPUtils.zip(origBytes);
+    byte[] compressedBytes = GZIPUtils.zip(origBytes);
 
     assertTrue("compressed array is not smaller!",
-               compressedBytes.length < origBytes.length);
+        compressedBytes.length < origBytes.length);
 
-    for (int i= 0; i < origBytes.length; i++) {
+    for (int i = 0; i < origBytes.length; i++) {
 
-      byte[] uncompressedBytes= 
-        GZIPUtils.unzipBestEffort(compressedBytes, i);
+      byte[] uncompressedBytes = GZIPUtils.unzipBestEffort(compressedBytes, i);
 
-      assertTrue("uncompressedBytes is wrong size", 
-                 uncompressedBytes.length == i);
+      assertTrue("uncompressedBytes is wrong size",
+          uncompressedBytes.length == i);
 
-      for (int j= 0; j < i; j++) 
+      for (int j = 0; j < i; j++)
         if (origBytes[j] != uncompressedBytes[j])
           assertTrue("uncompressedBytes does not match origBytes", false);
     }
Index: src/test/org/apache/nutch/util/WritableTestUtils.java
===================================================================
--- src/test/org/apache/nutch/util/WritableTestUtils.java	(revision 1188252)
+++ src/test/org/apache/nutch/util/WritableTestUtils.java	(working copy)
@@ -35,23 +35,22 @@
     TestCase.assertEquals(before, writeRead(before, conf));
   }
 
-  
   /** Utility method for testing writables. */
   public static Writable writeRead(Writable before, Configuration conf)
-    throws Exception {
-    
+      throws Exception {
+
     DataOutputBuffer dob = new DataOutputBuffer();
     before.write(dob);
-    
+
     DataInputBuffer dib = new DataInputBuffer();
     dib.reset(dob.getData(), dob.getLength());
-    
-    Writable after = (Writable)before.getClass().newInstance();
+
+    Writable after = (Writable) before.getClass().newInstance();
     if (conf != null) {
-      ((Configurable)after).setConf(conf);
+      ((Configurable) after).setConf(conf);
     }
     after.readFields(dib);
     return after;
   }
-  
+
 }
Index: src/test/org/apache/nutch/util/TestNodeWalker.java
===================================================================
--- src/test/org/apache/nutch/util/TestNodeWalker.java	(revision 1188252)
+++ src/test/org/apache/nutch/util/TestNodeWalker.java	(working copy)
@@ -24,49 +24,45 @@
 import org.w3c.dom.Node;
 import org.xml.sax.InputSource;
 
-
-
-
 /** Unit tests for NodeWalker methods. */
 public class TestNodeWalker extends TestCase {
-  public TestNodeWalker(String name) { 
-    super(name); 
+  public TestNodeWalker(String name) {
+    super(name);
   }
 
   /* a snapshot of the nutch webpage */
-  private final static String WEBPAGE= 
-  "<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"en\" xml:lang=\"en\"><head><title>Nutch</title></head>"
-  + "<body>"
-  + "<ul>"
-  + "<li>crawl several billion pages per month</li>"
-  + "<li>maintain an index of these pages</li>"
-  + "<li>search that index up to 1000 times per second</li>"
-  + "<li>provide very high quality search results</li>"
-  + "<li>operate at minimal cost</li>"
-  + "</ul>"
-  + "</body>"
-  + "</html>";
+  private final static String WEBPAGE = "<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"en\" xml:lang=\"en\"><head><title>Nutch</title></head>"
+      + "<body>"
+      + "<ul>"
+      + "<li>crawl several billion pages per month</li>"
+      + "<li>maintain an index of these pages</li>"
+      + "<li>search that index up to 1000 times per second</li>"
+      + "<li>provide very high quality search results</li>"
+      + "<li>operate at minimal cost</li>" + "</ul>" + "</body>" + "</html>";
 
   private final static String[] ULCONTENT = new String[4];
-  
-  protected void setUp() throws Exception{
-    ULCONTENT[0]="crawl several billion pages per month" ;
-    ULCONTENT[1]="maintain an index of these pages" ;
-    ULCONTENT[2]="search that index up to 1000 times per second"  ;
-    ULCONTENT[3]="operate at minimal cost" ;
+
+  protected void setUp() throws Exception {
+    ULCONTENT[0] = "crawl several billion pages per month";
+    ULCONTENT[1] = "maintain an index of these pages";
+    ULCONTENT[2] = "search that index up to 1000 times per second";
+    ULCONTENT[3] = "operate at minimal cost";
   }
 
   public void testSkipChildren() {
-    DOMParser parser= new DOMParser();
-    
+    DOMParser parser = new DOMParser();
+
     try {
       parser.setFeature("http://xml.org/sax/features/validation", false);
-      parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
-      parser.parse(new InputSource(new ByteArrayInputStream(WEBPAGE.getBytes())));
+      parser.setFeature(
+          "http://apache.org/xml/features/nonvalidating/load-external-dtd",
+          false);
+      parser
+          .parse(new InputSource(new ByteArrayInputStream(WEBPAGE.getBytes())));
     } catch (Exception e) {
       e.printStackTrace();
     }
-     
+
     StringBuffer sb = new StringBuffer();
     NodeWalker walker = new NodeWalker(parser.getDocument());
     while (walker.hasNext()) {
@@ -78,30 +74,33 @@
         sb.append(text);
       }
     }
-   assertTrue("UL Content can NOT be found in the node", findSomeUlContent(sb.toString()));
-     
-   StringBuffer sbSkip = new StringBuffer();
-   NodeWalker walkerSkip = new NodeWalker(parser.getDocument());
-   while (walkerSkip.hasNext()) {
-     Node currentNode = walkerSkip.nextNode();
-     String nodeName = currentNode.getNodeName();
-     short nodeType = currentNode.getNodeType();
-     if ("ul".equalsIgnoreCase(nodeName)) {
-       walkerSkip.skipChildren();
-     }
-     if (nodeType == Node.TEXT_NODE) {
-       String text = currentNode.getNodeValue();
-       text = text.replaceAll("\\s+", " ");
-       sbSkip.append(text);
-     }
-   }
-   assertFalse("UL Content can be found in the node", findSomeUlContent(sbSkip.toString()));
+    assertTrue("UL Content can NOT be found in the node",
+        findSomeUlContent(sb.toString()));
+
+    StringBuffer sbSkip = new StringBuffer();
+    NodeWalker walkerSkip = new NodeWalker(parser.getDocument());
+    while (walkerSkip.hasNext()) {
+      Node currentNode = walkerSkip.nextNode();
+      String nodeName = currentNode.getNodeName();
+      short nodeType = currentNode.getNodeType();
+      if ("ul".equalsIgnoreCase(nodeName)) {
+        walkerSkip.skipChildren();
+      }
+      if (nodeType == Node.TEXT_NODE) {
+        String text = currentNode.getNodeValue();
+        text = text.replaceAll("\\s+", " ");
+        sbSkip.append(text);
+      }
+    }
+    assertFalse("UL Content can be found in the node",
+        findSomeUlContent(sbSkip.toString()));
   }
-  
+
   public boolean findSomeUlContent(String str) {
-    for(int i=0; i<ULCONTENT.length ; i++){
-      if(str.contains(ULCONTENT[i])) return true;
-    }    
+    for (int i = 0; i < ULCONTENT.length; i++) {
+      if (str.contains(ULCONTENT[i]))
+        return true;
+    }
     return false;
   }
 }
Index: src/test/org/apache/nutch/indexer/TestIndexingFilters.java
===================================================================
--- src/test/org/apache/nutch/indexer/TestIndexingFilters.java	(revision 1188252)
+++ src/test/org/apache/nutch/indexer/TestIndexingFilters.java	(working copy)
@@ -33,6 +33,7 @@
 
   /**
    * Test behaviour when defined filter does not exist.
+   * 
    * @throws IndexingException
    */
   public void testNonExistingIndexingFilter() throws IndexingException {
Index: src/test/org/apache/nutch/plugin/TestPluginSystem.java
===================================================================
--- src/test/org/apache/nutch/plugin/TestPluginSystem.java	(revision 1188252)
+++ src/test/org/apache/nutch/plugin/TestPluginSystem.java	(working copy)
@@ -39,262 +39,256 @@
  * @author joa23
  */
 public class TestPluginSystem extends TestCase {
-    private int fPluginCount;
+  private int fPluginCount;
 
-    private LinkedList fFolders = new LinkedList();
-    private Configuration conf ;
-    private PluginRepository repository;
+  private LinkedList fFolders = new LinkedList();
+  private Configuration conf;
+  private PluginRepository repository;
 
-    protected void setUp() throws Exception {
-        this.conf = NutchConfiguration.create();
-        conf.set("plugin.includes", ".*");
-//        String string = this.conf.get("plugin.includes", "");
-//        conf.set("plugin.includes", string + "|Dummy*");
-        fPluginCount = 5;
-        createDummyPlugins(fPluginCount);
-        this.repository = PluginRepository.get(conf);
-    }
+  protected void setUp() throws Exception {
+    this.conf = NutchConfiguration.create();
+    conf.set("plugin.includes", ".*");
+    // String string = this.conf.get("plugin.includes", "");
+    // conf.set("plugin.includes", string + "|Dummy*");
+    fPluginCount = 5;
+    createDummyPlugins(fPluginCount);
+    this.repository = PluginRepository.get(conf);
+  }
 
-    /*
-     * (non-Javadoc)
-     * 
-     * @see junit.framework.TestCase#tearDown()
-     */
-    protected void tearDown() throws Exception {
-        for (int i = 0; i < fFolders.size(); i++) {
-            File folder = (File) fFolders.get(i);
-            delete(folder);
-            folder.delete();
-        }
-
+  /*
+   * (non-Javadoc)
+   * 
+   * @see junit.framework.TestCase#tearDown()
+   */
+  protected void tearDown() throws Exception {
+    for (int i = 0; i < fFolders.size(); i++) {
+      File folder = (File) fFolders.get(i);
+      delete(folder);
+      folder.delete();
     }
 
-    /**
+  }
+
+  /**
      */
-    public void testPluginConfiguration() {
-        String string = getPluginFolder();
-        File file = new File(string);
-        if (!file.exists()) {
-            file.mkdir();
-        }
-        assertTrue(file.exists());
+  public void testPluginConfiguration() {
+    String string = getPluginFolder();
+    File file = new File(string);
+    if (!file.exists()) {
+      file.mkdir();
     }
+    assertTrue(file.exists());
+  }
 
-    /**
+  /**
      */
-    public void testLoadPlugins() {
-        PluginDescriptor[] descriptors = repository
-                .getPluginDescriptors();
-        int k = descriptors.length;
-        assertTrue(fPluginCount <= k);
-        for (int i = 0; i < descriptors.length; i++) {
-            PluginDescriptor descriptor = descriptors[i];
-            if (!descriptor.getPluginId().startsWith("getPluginFolder()")) {
-                continue;
-            }
-            assertEquals(1, descriptor.getExportedLibUrls().length);
-            assertEquals(1, descriptor.getNotExportedLibUrls().length);
-        }
+  public void testLoadPlugins() {
+    PluginDescriptor[] descriptors = repository.getPluginDescriptors();
+    int k = descriptors.length;
+    assertTrue(fPluginCount <= k);
+    for (int i = 0; i < descriptors.length; i++) {
+      PluginDescriptor descriptor = descriptors[i];
+      if (!descriptor.getPluginId().startsWith("getPluginFolder()")) {
+        continue;
+      }
+      assertEquals(1, descriptor.getExportedLibUrls().length);
+      assertEquals(1, descriptor.getNotExportedLibUrls().length);
     }
+  }
 
-    public void testRepositoryCache() {
-      Configuration config = NutchConfiguration.create();
-      PluginRepository repo = PluginRepository.get(config);
-      JobConf job = new NutchJob(config);
-      PluginRepository repo1 = PluginRepository.get(job);
-      assertTrue(repo == repo1);
-      // now construct a config without UUID
-      config = new Configuration();
-      config.addResource("nutch-default.xml");
-      config.addResource("nutch-site.xml");
-      repo = PluginRepository.get(config);
-      job = new NutchJob(config);
-      repo1 = PluginRepository.get(job);
-      assertTrue(repo1 != repo);
-    }
+  public void testRepositoryCache() {
+    Configuration config = NutchConfiguration.create();
+    PluginRepository repo = PluginRepository.get(config);
+    JobConf job = new NutchJob(config);
+    PluginRepository repo1 = PluginRepository.get(job);
+    assertTrue(repo == repo1);
+    // now construct a config without UUID
+    config = new Configuration();
+    config.addResource("nutch-default.xml");
+    config.addResource("nutch-site.xml");
+    repo = PluginRepository.get(config);
+    job = new NutchJob(config);
+    repo1 = PluginRepository.get(job);
+    assertTrue(repo1 != repo);
+  }
 
-    /**
+  /**
      *  
      */
-    public void testGetExtensionAndAttributes() {
-        String xpId = " sdsdsd";
-        ExtensionPoint extensionPoint =repository
-                .getExtensionPoint(xpId);
-        assertEquals(extensionPoint, null);
-        Extension[] extension1 = repository
-                .getExtensionPoint(getGetExtensionId()).getExtensions();
-        assertEquals(extension1.length, fPluginCount);
-        for (int i = 0; i < extension1.length; i++) {
-            Extension extension2 = extension1[i];
-            String string = extension2.getAttribute(getGetConfigElementName());
-            assertEquals(string, getParameterValue());
-        }
+  public void testGetExtensionAndAttributes() {
+    String xpId = " sdsdsd";
+    ExtensionPoint extensionPoint = repository.getExtensionPoint(xpId);
+    assertEquals(extensionPoint, null);
+    Extension[] extension1 = repository.getExtensionPoint(getGetExtensionId())
+        .getExtensions();
+    assertEquals(extension1.length, fPluginCount);
+    for (int i = 0; i < extension1.length; i++) {
+      Extension extension2 = extension1[i];
+      String string = extension2.getAttribute(getGetConfigElementName());
+      assertEquals(string, getParameterValue());
     }
+  }
 
-    /**
-     * @throws PluginRuntimeException
-     */
-    public void testGetExtensionInstances() throws PluginRuntimeException {
-        Extension[] extensions = repository
-                .getExtensionPoint(getGetExtensionId()).getExtensions();
-        assertEquals(extensions.length, fPluginCount);
-        for (int i = 0; i < extensions.length; i++) {
-            Extension extension = extensions[i];
-            Object object = extension.getExtensionInstance();
-            if (!(object instanceof HelloWorldExtension))
-                fail(" object is not a instance of HelloWorldExtension");
-            ((ITestExtension) object).testGetExtension("Bla ");
-            String string = ((ITestExtension) object).testGetExtension("Hello");
-            assertEquals("Hello World", string);
-        }
+  /**
+   * @throws PluginRuntimeException
+   */
+  public void testGetExtensionInstances() throws PluginRuntimeException {
+    Extension[] extensions = repository.getExtensionPoint(getGetExtensionId())
+        .getExtensions();
+    assertEquals(extensions.length, fPluginCount);
+    for (int i = 0; i < extensions.length; i++) {
+      Extension extension = extensions[i];
+      Object object = extension.getExtensionInstance();
+      if (!(object instanceof HelloWorldExtension))
+        fail(" object is not a instance of HelloWorldExtension");
+      ((ITestExtension) object).testGetExtension("Bla ");
+      String string = ((ITestExtension) object).testGetExtension("Hello");
+      assertEquals("Hello World", string);
     }
+  }
 
-    /**
+  /**
      * 
      *  
      */
-    public void testGetClassLoader() {
-        PluginDescriptor[] descriptors = repository
-                .getPluginDescriptors();
-        for (int i = 0; i < descriptors.length; i++) {
-            PluginDescriptor descriptor = descriptors[i];
-            assertNotNull(descriptor.getClassLoader());
-        }
+  public void testGetClassLoader() {
+    PluginDescriptor[] descriptors = repository.getPluginDescriptors();
+    for (int i = 0; i < descriptors.length; i++) {
+      PluginDescriptor descriptor = descriptors[i];
+      assertNotNull(descriptor.getClassLoader());
     }
+  }
 
-    /**
-     * @throws IOException
-     */
-    public void testGetResources() throws IOException {
-        PluginDescriptor[] descriptors = repository
-                .getPluginDescriptors();
-        for (int i = 0; i < descriptors.length; i++) {
-            PluginDescriptor descriptor = descriptors[i];
-            if (!descriptor.getPluginId().startsWith("getPluginFolder()")) {
-                continue;
-            }
-            String value = descriptor.getResourceString("key", Locale.UK);
-            assertEquals("value", value);
-            value = descriptor.getResourceString("key",
-                    Locale.TRADITIONAL_CHINESE);
-            assertEquals("value", value);
+  /**
+   * @throws IOException
+   */
+  public void testGetResources() throws IOException {
+    PluginDescriptor[] descriptors = repository.getPluginDescriptors();
+    for (int i = 0; i < descriptors.length; i++) {
+      PluginDescriptor descriptor = descriptors[i];
+      if (!descriptor.getPluginId().startsWith("getPluginFolder()")) {
+        continue;
+      }
+      String value = descriptor.getResourceString("key", Locale.UK);
+      assertEquals("value", value);
+      value = descriptor.getResourceString("key", Locale.TRADITIONAL_CHINESE);
+      assertEquals("value", value);
 
-        }
     }
+  }
 
-    /**
-     * @return a PluginFolderPath
-     */
-    private String getPluginFolder() {
-        String[] strings = conf.getStrings("plugin.folders");
-        if (strings == null || strings.length == 0)
-            fail("no plugin directory setuped..");
+  /**
+   * @return a PluginFolderPath
+   */
+  private String getPluginFolder() {
+    String[] strings = conf.getStrings("plugin.folders");
+    if (strings == null || strings.length == 0)
+      fail("no plugin directory setuped..");
 
-        String name = strings[0];
-        return new PluginManifestParser(conf, this.repository).getPluginFolder(name).toString();
-    }
+    String name = strings[0];
+    return new PluginManifestParser(conf, this.repository)
+        .getPluginFolder(name).toString();
+  }
 
-    /**
-     * Creates some Dummy Plugins
-     * 
-     * @param pCount
-     */
-    private void createDummyPlugins(int pCount) {
-        String string = getPluginFolder();
-        try {
-            File folder = new File(string);
-            folder.mkdir();
-            for (int i = 0; i < pCount; i++) {
-                String pluginFolder = string + File.separator + "DummyPlugin"
-                        + i;
-                File file = new File(pluginFolder);
-                file.mkdir();
-                fFolders.add(file);
-                createPluginManifest(i, file.getAbsolutePath());
-                createResourceFile(file.getAbsolutePath());
-            }
-        } catch (IOException e) {
-            e.printStackTrace();
-        }
+  /**
+   * Creates some Dummy Plugins
+   * 
+   * @param pCount
+   */
+  private void createDummyPlugins(int pCount) {
+    String string = getPluginFolder();
+    try {
+      File folder = new File(string);
+      folder.mkdir();
+      for (int i = 0; i < pCount; i++) {
+        String pluginFolder = string + File.separator + "DummyPlugin" + i;
+        File file = new File(pluginFolder);
+        file.mkdir();
+        fFolders.add(file);
+        createPluginManifest(i, file.getAbsolutePath());
+        createResourceFile(file.getAbsolutePath());
+      }
+    } catch (IOException e) {
+      e.printStackTrace();
     }
+  }
 
-    /**
-     * Creates an ResourceFile
-     * 
-     * @param pFolderPath
-     * @throws FileNotFoundException
-     * @throws IOException
-     */
-    private void createResourceFile(String pFolderPath)
-            throws FileNotFoundException, IOException {
-        Properties properties = new Properties();
-        properties.setProperty("key", "value");
-        properties.store(new FileOutputStream(pFolderPath + File.separator
-                + "messages" + ".properties"), "");
-    }
+  /**
+   * Creates an ResourceFile
+   * 
+   * @param pFolderPath
+   * @throws FileNotFoundException
+   * @throws IOException
+   */
+  private void createResourceFile(String pFolderPath)
+      throws FileNotFoundException, IOException {
+    Properties properties = new Properties();
+    properties.setProperty("key", "value");
+    properties.store(new FileOutputStream(pFolderPath + File.separator
+        + "messages" + ".properties"), "");
+  }
 
-    /**
-     * Deletes files in path
-     * 
-     * @param path
-     * @throws IOException
-     */
-    private void delete(File path) throws IOException {
-        File[] files = path.listFiles();
-        for (int i = 0; i < files.length; ++i) {
-            if (files[i].isDirectory())
-                delete(files[i]);
-            files[i].delete();
-        }
+  /**
+   * Deletes files in path
+   * 
+   * @param path
+   * @throws IOException
+   */
+  private void delete(File path) throws IOException {
+    File[] files = path.listFiles();
+    for (int i = 0; i < files.length; ++i) {
+      if (files[i].isDirectory())
+        delete(files[i]);
+      files[i].delete();
     }
+  }
 
-    /**
-     * Creates an Plugin Manifest File
-     * 
-     * @param i
-     * @param pFolderPath
-     * @throws IOException
-     */
-    private void createPluginManifest(int i, String pFolderPath)
-            throws IOException {
-        FileWriter out = new FileWriter(pFolderPath + File.separator
-                + "plugin.xml");
-        String xml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" 
-                + "<!--this is just a simple plugin for testing issues.-->"
-                + "<plugin id=\"org.apache.nutch.plugin."
-                + i
-                + "\" name=\""
-                + i
-                + "\" version=\"1.0\" provider-name=\"joa23\" "
-                + "class=\"org.apache.nutch.plugin.SimpleTestPlugin\">"
-                + "<extension-point id=\"aExtensioID\" "
-                + "name=\"simple Parser Extension\" "
-                + "schema=\"schema/testExtensionPoint.exsd\"/>"
-                + "<runtime><library name=\"libs/exported.jar\"><extport/></library>"
-                + "<library name=\"libs/not_exported.jar\"/></runtime>"
-                + "<extension point=\"aExtensioID\">"
-                + "<implementation name=\"simple Parser Extension\" "
-                + "id=\"aExtensionId.\" class=\"org.apache.nutch.plugin.HelloWorldExtension\">"
-                + "<parameter name=\"dummy-name\" value=\"a simple param value\"/>"
-                + "</implementation></extension></plugin>";
-        out.write(xml);
-        out.flush();
-        out.close();
-    }
+  /**
+   * Creates an Plugin Manifest File
+   * 
+   * @param i
+   * @param pFolderPath
+   * @throws IOException
+   */
+  private void createPluginManifest(int i, String pFolderPath)
+      throws IOException {
+    FileWriter out = new FileWriter(pFolderPath + File.separator + "plugin.xml");
+    String xml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
+        + "<!--this is just a simple plugin for testing issues.-->"
+        + "<plugin id=\"org.apache.nutch.plugin."
+        + i
+        + "\" name=\""
+        + i
+        + "\" version=\"1.0\" provider-name=\"joa23\" "
+        + "class=\"org.apache.nutch.plugin.SimpleTestPlugin\">"
+        + "<extension-point id=\"aExtensioID\" "
+        + "name=\"simple Parser Extension\" "
+        + "schema=\"schema/testExtensionPoint.exsd\"/>"
+        + "<runtime><library name=\"libs/exported.jar\"><extport/></library>"
+        + "<library name=\"libs/not_exported.jar\"/></runtime>"
+        + "<extension point=\"aExtensioID\">"
+        + "<implementation name=\"simple Parser Extension\" "
+        + "id=\"aExtensionId.\" class=\"org.apache.nutch.plugin.HelloWorldExtension\">"
+        + "<parameter name=\"dummy-name\" value=\"a simple param value\"/>"
+        + "</implementation></extension></plugin>";
+    out.write(xml);
+    out.flush();
+    out.close();
+  }
 
-    private String getParameterValue() {
-        return "a simple param value";
-    }
+  private String getParameterValue() {
+    return "a simple param value";
+  }
 
-    private static String getGetExtensionId() {
-        return "aExtensioID";
-    }
+  private static String getGetExtensionId() {
+    return "aExtensioID";
+  }
 
-    private static String getGetConfigElementName() {
-        return "dummy-name";
-    }
+  private static String getGetConfigElementName() {
+    return "dummy-name";
+  }
 
-    public static void main(String[] args) throws IOException {
-        new TestPluginSystem().createPluginManifest(1, "/");
-    }
+  public static void main(String[] args) throws IOException {
+    new TestPluginSystem().createPluginManifest(1, "/");
+  }
 }
Index: src/test/org/apache/nutch/plugin/ITestExtension.java
===================================================================
--- src/test/org/apache/nutch/plugin/ITestExtension.java	(revision 1188252)
+++ src/test/org/apache/nutch/plugin/ITestExtension.java	(working copy)
@@ -15,11 +15,12 @@
  * limitations under the License.
  */
 package org.apache.nutch.plugin;
+
 /**
  * A Simple Test Extension Interface.
  * 
  * @author joa23
- *
+ * 
  */
 public interface ITestExtension {
   public String testGetExtension(String hello);
Index: src/test/org/apache/nutch/plugin/HelloWorldExtension.java
===================================================================
--- src/test/org/apache/nutch/plugin/HelloWorldExtension.java	(revision 1188252)
+++ src/test/org/apache/nutch/plugin/HelloWorldExtension.java	(working copy)
@@ -24,8 +24,11 @@
  */
 public class HelloWorldExtension implements ITestExtension {
 
-  /* (non-Javadoc)
-   * @see org.apache.nutch.plugin.ITestExtension#testGetExtension(java.lang.String)
+  /*
+   * (non-Javadoc)
+   * 
+   * @see
+   * org.apache.nutch.plugin.ITestExtension#testGetExtension(java.lang.String)
    */
   public String testGetExtension(String hello) {
     return hello + " World";
Index: src/test/org/apache/nutch/plugin/SimpleTestPlugin.java
===================================================================
--- src/test/org/apache/nutch/plugin/SimpleTestPlugin.java	(revision 1188252)
+++ src/test/org/apache/nutch/plugin/SimpleTestPlugin.java	(working copy)
@@ -28,8 +28,8 @@
 public class SimpleTestPlugin extends Plugin {
 
   /**
-   * @param pDescriptor 
-   * @param conf 
+   * @param pDescriptor
+   * @param conf
    */
   public SimpleTestPlugin(PluginDescriptor pDescriptor, Configuration conf) {
 
@@ -55,4 +55,3 @@
   }
 
 }
-
Index: src/java/org/apache/nutch/fetcher/OldFetcher.java
===================================================================
--- src/java/org/apache/nutch/fetcher/OldFetcher.java	(revision 1188252)
+++ src/java/org/apache/nutch/fetcher/OldFetcher.java	(working copy)
@@ -43,29 +43,29 @@
 import org.apache.nutch.scoring.ScoringFilters;
 import org.apache.nutch.util.*;
 
-
 /** The fetcher. Most of the work is done by plugins. */
-public class OldFetcher extends Configured implements Tool, MapRunnable<WritableComparable, Writable, Text, NutchWritable> { 
+public class OldFetcher extends Configured implements Tool,
+    MapRunnable<WritableComparable, Writable, Text, NutchWritable> {
 
   public static final Logger LOG = LoggerFactory.getLogger(OldFetcher.class);
-  
+
   public static final int PERM_REFRESH_TIME = 5;
 
   public static final String CONTENT_REDIR = "content";
 
   public static final String PROTOCOL_REDIR = "protocol";
 
-  public static class InputFormat extends SequenceFileInputFormat<WritableComparable, Writable> {
+  public static class InputFormat extends
+      SequenceFileInputFormat<WritableComparable, Writable> {
     /** Don't split inputs, to keep things polite. */
-    public InputSplit[] getSplits(JobConf job, int nSplits)
-      throws IOException {
+    public InputSplit[] getSplits(JobConf job, int nSplits) throws IOException {
       FileStatus[] files = listStatus(job);
       FileSystem fs = FileSystem.get(job);
       InputSplit[] splits = new InputSplit[files.length];
       for (int i = 0; i < files.length; i++) {
         FileStatus cur = files[i];
-        splits[i] = new FileSplit(cur.getPath(), 0,
-            cur.getLen(), (String[])null);
+        splits[i] = new FileSplit(cur.getPath(), 0, cur.getLen(),
+            (String[]) null);
       }
       return splits;
     }
@@ -82,9 +82,9 @@
   private long start = System.currentTimeMillis(); // start time of fetcher run
   private long lastRequestStart = start;
 
-  private long bytes;                             // total bytes fetched
-  private int pages;                              // total pages fetched
-  private int errors;                             // total pages errored
+  private long bytes; // total bytes fetched
+  private int pages; // total pages fetched
+  private int errors; // total pages errored
 
   private boolean storingContent;
   private boolean parsing;
@@ -101,8 +101,8 @@
     private String reprUrl;
 
     public FetcherThread(Configuration conf) {
-      this.setDaemon(true);                       // don't hang JVM on exit
-      this.setName("FetcherThread");              // use an informative name
+      this.setDaemon(true); // don't hang JVM on exit
+      this.setName("FetcherThread"); // use an informative name
       this.conf = conf;
       this.urlFilters = new URLFilters(conf);
       this.scfilters = new ScoringFilters(conf);
@@ -112,27 +112,29 @@
     }
 
     public void run() {
-      synchronized (OldFetcher.this) {activeThreads++;} // count threads
-      
+      synchronized (OldFetcher.this) {
+        activeThreads++;
+      } // count threads
+
       try {
         Text key = new Text();
         CrawlDatum datum = new CrawlDatum();
-        
+
         while (true) {
           // TODO : NUTCH-258 ...
           // If something bad happened, then exit
           // if (conf.getBoolean("fetcher.exit", false)) {
-          //   break;
+          // break;
           // ]
-          
-          try {                                   // get next entry from input
+
+          try { // get next entry from input
             if (!input.next(key, datum)) {
-              break;                              // at eof, exit
+              break; // at eof, exit
             }
           } catch (IOException e) {
             if (LOG.isErrorEnabled()) {
               e.printStackTrace(LogUtil.getErrorStream(LOG));
-              LOG.error("fetcher caught:"+e.toString());
+              LOG.error("fetcher caught:" + e.toString());
             }
             break;
           }
@@ -144,8 +146,8 @@
           // url may be changed through redirects.
           Text url = new Text(key);
 
-          Text reprUrlWritable =
-            (Text) datum.getMetaData().get(Nutch.WRITABLE_REPR_URL_KEY);
+          Text reprUrlWritable = (Text) datum.getMetaData().get(
+              Nutch.WRITABLE_REPR_URL_KEY);
           if (reprUrlWritable == null) {
             reprUrl = key.toString();
           } else {
@@ -153,7 +155,9 @@
           }
 
           try {
-            if (LOG.isInfoEnabled()) { LOG.info("fetching " + url); }
+            if (LOG.isInfoEnabled()) {
+              LOG.info("fetching " + url);
+            }
 
             // fetch the page
             redirectCount = 0;
@@ -162,7 +166,8 @@
                 LOG.debug("redirectCount=" + redirectCount);
               }
               redirecting = false;
-              Protocol protocol = this.protocolFactory.getProtocol(url.toString());
+              Protocol protocol = this.protocolFactory.getProtocol(url
+                  .toString());
               ProtocolOutput output = protocol.getProtocolOutput(url, datum);
               ProtocolStatus status = output.getStatus();
               Content content = output.getContent();
@@ -174,22 +179,22 @@
                     new Text(reprUrl));
               }
 
-              switch(status.getCode()) {
+              switch (status.getCode()) {
 
-              case ProtocolStatus.SUCCESS:        // got a page
-                pstatus = output(url, datum, content, status, CrawlDatum.STATUS_FETCH_SUCCESS);
+              case ProtocolStatus.SUCCESS: // got a page
+                pstatus = output(url, datum, content, status,
+                    CrawlDatum.STATUS_FETCH_SUCCESS);
                 updateStatus(content.getContent().length);
-                if (pstatus != null && pstatus.isSuccess() &&
-                        pstatus.getMinorCode() == ParseStatus.SUCCESS_REDIRECT) {
+                if (pstatus != null && pstatus.isSuccess()
+                    && pstatus.getMinorCode() == ParseStatus.SUCCESS_REDIRECT) {
                   String newUrl = pstatus.getMessage();
                   int refreshTime = Integer.valueOf(pstatus.getArgs()[1]);
                   url = handleRedirect(url, datum, urlString, newUrl,
-                                       refreshTime < PERM_REFRESH_TIME,
-                                       CONTENT_REDIR);
+                      refreshTime < PERM_REFRESH_TIME, CONTENT_REDIR);
                 }
                 break;
 
-              case ProtocolStatus.MOVED:         // redirect
+              case ProtocolStatus.MOVED: // redirect
               case ProtocolStatus.TEMP_MOVED:
                 int code;
                 boolean temp;
@@ -202,22 +207,22 @@
                 }
                 output(url, datum, content, status, code);
                 String newUrl = status.getMessage();
-                url = handleRedirect(url, datum, urlString, newUrl,
-                                     temp, PROTOCOL_REDIR);
+                url = handleRedirect(url, datum, urlString, newUrl, temp,
+                    PROTOCOL_REDIR);
                 break;
 
               // failures - increase the retry counter
               case ProtocolStatus.EXCEPTION:
                 logError(url, status.getMessage());
-              /* FALLTHROUGH */
-              case ProtocolStatus.RETRY:          // retry
+                /* FALLTHROUGH */
+              case ProtocolStatus.RETRY: // retry
               case ProtocolStatus.WOULDBLOCK:
               case ProtocolStatus.BLOCKED:
                 output(url, datum, null, status, CrawlDatum.STATUS_FETCH_RETRY);
                 break;
-                
+
               // permanent failures
-              case ProtocolStatus.GONE:           // gone
+              case ProtocolStatus.GONE: // gone
               case ProtocolStatus.NOTFOUND:
               case ProtocolStatus.ACCESS_DENIED:
               case ProtocolStatus.ROBOTS_DENIED:
@@ -225,9 +230,10 @@
                 break;
 
               case ProtocolStatus.NOTMODIFIED:
-                output(url, datum, null, status, CrawlDatum.STATUS_FETCH_NOTMODIFIED);
+                output(url, datum, null, status,
+                    CrawlDatum.STATUS_FETCH_NOTMODIFIED);
                 break;
-                
+
               default:
                 if (LOG.isWarnEnabled()) {
                   LOG.warn("Unknown ProtocolStatus: " + status.getCode());
@@ -244,28 +250,28 @@
 
             } while (redirecting && (redirectCount < maxRedirect));
 
-            
-          } catch (Throwable t) {                 // unexpected exception
+          } catch (Throwable t) { // unexpected exception
             logError(url, t.toString());
             output(url, datum, null, null, CrawlDatum.STATUS_FETCH_RETRY);
-            
+
           }
         }
 
       } catch (Throwable e) {
         if (LOG.isErrorEnabled()) {
           e.printStackTrace(LogUtil.getErrorStream(LOG));
-          LOG.error("fetcher caught:"+e.toString());
+          LOG.error("fetcher caught:" + e.toString());
         }
       } finally {
-        synchronized (OldFetcher.this) {activeThreads--;} // count threads
+        synchronized (OldFetcher.this) {
+          activeThreads--;
+        } // count threads
       }
     }
 
-    private Text handleRedirect(Text url, CrawlDatum datum,
-                                String urlString, String newUrl,
-                                boolean temp, String redirType)
-    throws MalformedURLException, URLFilterException {
+    private Text handleRedirect(Text url, CrawlDatum datum, String urlString,
+        String newUrl, boolean temp, String redirType)
+        throws MalformedURLException, URLFilterException {
       newUrl = normalizers.normalize(newUrl, URLNormalizers.SCOPE_FETCHER);
       newUrl = urlFilters.filter(newUrl);
       if (newUrl != null && !newUrl.equals(urlString)) {
@@ -275,8 +281,8 @@
           redirecting = true;
           redirectCount++;
           if (LOG.isDebugEnabled()) {
-            LOG.debug(" - " + redirType + " redirect to " +
-                      url + " (fetching now)");
+            LOG.debug(" - " + redirType + " redirect to " + url
+                + " (fetching now)");
           }
           return url;
         } else {
@@ -287,15 +293,15 @@
           }
           output(url, newDatum, null, null, CrawlDatum.STATUS_LINKED);
           if (LOG.isDebugEnabled()) {
-            LOG.debug(" - " + redirType + " redirect to " +
-                      url + " (fetching later)");
+            LOG.debug(" - " + redirType + " redirect to " + url
+                + " (fetching later)");
           }
           return null;
         }
       } else {
         if (LOG.isDebugEnabled()) {
-          LOG.debug(" - " + redirType + " redirect skipped: " +
-              (newUrl != null ? "to same url" : "filtered"));
+          LOG.debug(" - " + redirType + " redirect skipped: "
+              + (newUrl != null ? "to same url" : "filtered"));
         }
         return null;
       }
@@ -305,17 +311,18 @@
       if (LOG.isInfoEnabled()) {
         LOG.info("fetch of " + url + " failed with: " + message);
       }
-      synchronized (OldFetcher.this) {               // record failure
+      synchronized (OldFetcher.this) { // record failure
         errors++;
       }
     }
 
-    private ParseStatus output(Text key, CrawlDatum datum,
-                        Content content, ProtocolStatus pstatus, int status) {
+    private ParseStatus output(Text key, CrawlDatum datum, Content content,
+        ProtocolStatus pstatus, int status) {
 
       datum.setStatus(status);
       datum.setFetchTime(System.currentTimeMillis());
-      if (pstatus != null) datum.getMetaData().put(Nutch.WRITABLE_PROTO_STATUS_KEY, pstatus);
+      if (pstatus != null)
+        datum.getMetaData().put(Nutch.WRITABLE_PROTO_STATUS_KEY, pstatus);
 
       ParseResult parseResult = null;
       if (content != null) {
@@ -331,27 +338,31 @@
             LOG.warn("Couldn't pass score, url " + key + " (" + e + ")");
           }
         }
-        /* Note: Fetcher will only follow meta-redirects coming from the
-         * original URL. */ 
+        /*
+         * Note: Fetcher will only follow meta-redirects coming from the
+         * original URL.
+         */
         if (parsing && status == CrawlDatum.STATUS_FETCH_SUCCESS) {
           try {
             parseResult = this.parseUtil.parse(content);
           } catch (Exception e) {
-            LOG.warn("Error parsing: " + key + ": " + StringUtils.stringifyException(e));
+            LOG.warn("Error parsing: " + key + ": "
+                + StringUtils.stringifyException(e));
           }
 
           if (parseResult == null) {
-            byte[] signature = 
-              SignatureFactory.getSignature(getConf()).calculate(content, 
-                  new ParseStatus().getEmptyParse(conf));
+            byte[] signature = SignatureFactory.getSignature(getConf())
+                .calculate(content, new ParseStatus().getEmptyParse(conf));
             datum.setSignature(signature);
           }
         }
-        
-        /* Store status code in content So we can read this value during 
-         * parsing (as a separate job) and decide to parse or not.
+
+        /*
+         * Store status code in content So we can read this value during parsing
+         * (as a separate job) and decide to parse or not.
          */
-        content.getMetadata().add(Nutch.FETCH_STATUS_KEY, Integer.toString(status));
+        content.getMetadata().add(Nutch.FETCH_STATUS_KEY,
+            Integer.toString(status));
       }
 
       try {
@@ -363,7 +374,7 @@
             Text url = entry.getKey();
             Parse parse = entry.getValue();
             ParseStatus parseStatus = parse.getData().getStatus();
-            
+
             if (!parseStatus.isSuccess()) {
               LOG.warn("Error parsing: " + key + ": " + parseStatus);
               parse = parseStatus.getEmptyParse(getConf());
@@ -371,16 +382,16 @@
 
             // Calculate page signature. For non-parsing fetchers this will
             // be done in ParseSegment
-            byte[] signature = 
-              SignatureFactory.getSignature(getConf()).calculate(content, parse);
+            byte[] signature = SignatureFactory.getSignature(getConf())
+                .calculate(content, parse);
             // Ensure segment name and score are in parseData metadata
-            parse.getData().getContentMeta().set(Nutch.SEGMENT_NAME_KEY, 
-                segmentName);
-            parse.getData().getContentMeta().set(Nutch.SIGNATURE_KEY, 
-                StringUtil.toHexString(signature));
+            parse.getData().getContentMeta()
+                .set(Nutch.SEGMENT_NAME_KEY, segmentName);
+            parse.getData().getContentMeta()
+                .set(Nutch.SIGNATURE_KEY, StringUtil.toHexString(signature));
             // Pass fetch time to content meta
-            parse.getData().getContentMeta().set(Nutch.FETCH_TIME_KEY,
-                Long.toString(datum.getFetchTime()));
+            parse.getData().getContentMeta()
+                .set(Nutch.FETCH_TIME_KEY, Long.toString(datum.getFetchTime()));
             if (url.equals(key))
               datum.setSignature(signature);
             try {
@@ -391,15 +402,14 @@
                 LOG.warn("Couldn't pass score, url " + key + " (" + e + ")");
               }
             }
-            output.collect(url, new NutchWritable(
-                    new ParseImpl(new ParseText(parse.getText()), 
-                                  parse.getData(), parse.isCanonical())));
+            output.collect(url, new NutchWritable(new ParseImpl(new ParseText(
+                parse.getText()), parse.getData(), parse.isCanonical())));
           }
         }
       } catch (IOException e) {
         if (LOG.isErrorEnabled()) {
           e.printStackTrace(LogUtil.getErrorStream(LOG));
-          LOG.error("fetcher caught:"+e.toString());
+          LOG.error("fetcher caught:" + e.toString());
         }
       }
 
@@ -409,10 +419,10 @@
         if (p != null) {
           return p.getData().getStatus();
         }
-      } 
+      }
       return null;
     }
-    
+
   }
 
   private synchronized void updateStatus(int bytesInPage) throws IOException {
@@ -423,23 +433,22 @@
   private void reportStatus() throws IOException {
     String status;
     synchronized (this) {
-      long elapsed = (System.currentTimeMillis() - start)/1000;
-      status = 
-        pages+" pages, "+errors+" errors, "
-        + Math.round(((float)pages*10)/elapsed)/10.0+" pages/s, "
-        + Math.round(((((float)bytes)*8)/1024)/elapsed)+" kb/s, ";
+      long elapsed = (System.currentTimeMillis() - start) / 1000;
+      status = pages + " pages, " + errors + " errors, "
+          + Math.round(((float) pages * 10) / elapsed) / 10.0 + " pages/s, "
+          + Math.round(((((float) bytes) * 8) / 1024) / elapsed) + " kb/s, ";
     }
     reporter.setStatus(status);
   }
 
   public OldFetcher() {
-    
+
   }
-  
+
   public OldFetcher(Configuration conf) {
     setConf(conf);
   }
-  
+
   public void configure(JobConf job) {
     setConf(job);
 
@@ -447,12 +456,13 @@
     this.storingContent = isStoringContent(job);
     this.parsing = isParsing(job);
 
-//    if (job.getBoolean("fetcher.verbose", false)) {
-//      LOG.setLevel(Level.FINE);
-//    }
+    // if (job.getBoolean("fetcher.verbose", false)) {
+    // LOG.setLevel(Level.FINE);
+    // }
   }
 
-  public void close() {}
+  public void close() {
+  }
 
   public static boolean isParsing(Configuration conf) {
     return conf.getBoolean("fetcher.parse", true);
@@ -462,29 +472,33 @@
     return conf.getBoolean("fetcher.store.content", true);
   }
 
-  public void run(RecordReader<WritableComparable, Writable> input, OutputCollector<Text, NutchWritable> output,
-                  Reporter reporter) throws IOException {
+  public void run(RecordReader<WritableComparable, Writable> input,
+      OutputCollector<Text, NutchWritable> output, Reporter reporter)
+      throws IOException {
 
     this.input = input;
     this.output = output;
     this.reporter = reporter;
 
     this.maxRedirect = getConf().getInt("http.redirect.max", 3);
-    
+
     int threadCount = getConf().getInt("fetcher.threads.fetch", 10);
-    if (LOG.isInfoEnabled()) { LOG.info("OldFetcher: threads: " + threadCount); }
+    if (LOG.isInfoEnabled()) {
+      LOG.info("OldFetcher: threads: " + threadCount);
+    }
 
-    for (int i = 0; i < threadCount; i++) {       // spawn threads
+    for (int i = 0; i < threadCount; i++) { // spawn threads
       new FetcherThread(getConf()).start();
     }
 
     // select a timeout that avoids a task timeout
-    long timeout = getConf().getInt("mapred.task.timeout", 10*60*1000)/2;
+    long timeout = getConf().getInt("mapred.task.timeout", 10 * 60 * 1000) / 2;
 
-    do {                                          // wait for threads to exit
+    do { // wait for threads to exit
       try {
         Thread.sleep(1000);
-      } catch (InterruptedException e) {}
+      } catch (InterruptedException e) {
+      }
 
       reportStatus();
 
@@ -492,18 +506,17 @@
       synchronized (this) {
         if ((System.currentTimeMillis() - lastRequestStart) > timeout) {
           if (LOG.isWarnEnabled()) {
-            LOG.warn("Aborting with "+activeThreads+" hung threads.");
+            LOG.warn("Aborting with " + activeThreads + " hung threads.");
           }
           return;
         }
       }
 
     } while (activeThreads > 0);
-    
+
   }
 
-  public void fetch(Path segment, int threads)
-    throws IOException {
+  public void fetch(Path segment, int threads) throws IOException {
 
     SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
     long start = System.currentTimeMillis();
@@ -521,7 +534,8 @@
     // for politeness, don't permit parallel execution of a single task
     job.setSpeculativeExecution(false);
 
-    FileInputFormat.addInputPath(job, new Path(segment, CrawlDatum.GENERATE_DIR_NAME));
+    FileInputFormat.addInputPath(job, new Path(segment,
+        CrawlDatum.GENERATE_DIR_NAME));
     job.setInputFormat(InputFormat.class);
 
     job.setMapRunnerClass(OldFetcher.class);
@@ -533,16 +547,17 @@
 
     JobClient.runJob(job);
     long end = System.currentTimeMillis();
-    LOG.info("OldFetcher: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    LOG.info("OldFetcher: finished at " + sdf.format(end) + ", elapsed: "
+        + TimingUtil.elapsedTime(start, end));
   }
 
-
   /** Run the fetcher. */
   public static void main(String[] args) throws Exception {
-    int res = ToolRunner.run(NutchConfiguration.create(), new OldFetcher(), args);
+    int res = ToolRunner.run(NutchConfiguration.create(), new OldFetcher(),
+        args);
     System.exit(res);
   }
-  
+
   public int run(String[] args) throws Exception {
 
     String usage = "Usage: OldFetcher <segment> [-threads n] [-noParsing]";
@@ -551,15 +566,16 @@
       System.err.println(usage);
       return -1;
     }
-      
+
     Path segment = new Path(args[0]);
     int threads = getConf().getInt("fetcher.threads.fetch", 10);
     boolean parsing = true;
 
-    for (int i = 1; i < args.length; i++) {       // parse command line
-      if (args[i].equals("-threads")) {           // found -threads option
-        threads =  Integer.parseInt(args[++i]);
-      } else if (args[i].equals("-noParsing")) parsing = false;
+    for (int i = 1; i < args.length; i++) { // parse command line
+      if (args[i].equals("-threads")) { // found -threads option
+        threads = Integer.parseInt(args[++i]);
+      } else if (args[i].equals("-noParsing"))
+        parsing = false;
     }
 
     getConf().setInt("fetcher.threads.fetch", threads);
@@ -567,7 +583,7 @@
       getConf().setBoolean("fetcher.parse", parsing);
     }
     try {
-      fetch(segment, threads);              // run the Fetcher
+      fetch(segment, threads); // run the Fetcher
       return 0;
     } catch (Exception e) {
       LOG.error("OldFetcher: " + StringUtils.stringifyException(e));
Index: src/java/org/apache/nutch/fetcher/Fetcher.java
===================================================================
--- src/java/org/apache/nutch/fetcher/Fetcher.java	(revision 1188252)
+++ src/java/org/apache/nutch/fetcher/Fetcher.java	(working copy)
@@ -51,44 +51,46 @@
 import org.apache.nutch.scoring.ScoringFilters;
 import org.apache.nutch.util.*;
 
-
-/** 
+/**
  * A queue-based fetcher.
  * 
- * <p>This fetcher uses a well-known model of one producer (a QueueFeeder)
- * and many consumers (FetcherThread-s).
+ * <p>
+ * This fetcher uses a well-known model of one producer (a QueueFeeder) and many
+ * consumers (FetcherThread-s).
  * 
- * <p>QueueFeeder reads input fetchlists and
- * populates a set of FetchItemQueue-s, which hold FetchItem-s that
- * describe the items to be fetched. There are as many queues as there are unique
- * hosts, but at any given time the total number of fetch items in all queues
- * is less than a fixed number (currently set to a multiple of the number of
- * threads).
+ * <p>
+ * QueueFeeder reads input fetchlists and populates a set of FetchItemQueue-s,
+ * which hold FetchItem-s that describe the items to be fetched. There are as
+ * many queues as there are unique hosts, but at any given time the total number
+ * of fetch items in all queues is less than a fixed number (currently set to a
+ * multiple of the number of threads).
  * 
- * <p>As items are consumed from the queues, the QueueFeeder continues to add new
+ * <p>
+ * As items are consumed from the queues, the QueueFeeder continues to add new
  * input items, so that their total count stays fixed (FetcherThread-s may also
  * add new items to the queues e.g. as a results of redirection) - until all
  * input items are exhausted, at which point the number of items in the queues
  * begins to decrease. When this number reaches 0 fetcher will finish.
  * 
- * <p>This fetcher implementation handles per-host blocking itself, instead
- * of delegating this work to protocol-specific plugins.
- * Each per-host queue handles its own "politeness" settings, such as the
- * maximum number of concurrent requests and crawl delay between consecutive
- * requests - and also a list of requests in progress, and the time the last
- * request was finished. As FetcherThread-s ask for new items to be fetched,
- * queues may return eligible items or null if for "politeness" reasons this
- * host's queue is not yet ready.
+ * <p>
+ * This fetcher implementation handles per-host blocking itself, instead of
+ * delegating this work to protocol-specific plugins. Each per-host queue
+ * handles its own "politeness" settings, such as the maximum number of
+ * concurrent requests and crawl delay between consecutive requests - and also a
+ * list of requests in progress, and the time the last request was finished. As
+ * FetcherThread-s ask for new items to be fetched, queues may return eligible
+ * items or null if for "politeness" reasons this host's queue is not yet ready.
  * 
- * <p>If there are still unfetched items in the queues, but none of the items
- * are ready, FetcherThread-s will spin-wait until either some items become
+ * <p>
+ * If there are still unfetched items in the queues, but none of the items are
+ * ready, FetcherThread-s will spin-wait until either some items become
  * available, or a timeout is reached (at which point the Fetcher will abort,
  * assuming the task is hung).
  * 
  * @author Andrzej Bialecki
  */
 public class Fetcher extends Configured implements Tool,
-    MapRunnable<Text, CrawlDatum, Text, NutchWritable> { 
+    MapRunnable<Text, CrawlDatum, Text, NutchWritable> {
 
   public static final int PERM_REFRESH_TIME = 5;
 
@@ -97,17 +99,17 @@
   public static final String PROTOCOL_REDIR = "protocol";
 
   public static final Logger LOG = LoggerFactory.getLogger(Fetcher.class);
-  
-  public static class InputFormat extends SequenceFileInputFormat<Text, CrawlDatum> {
+
+  public static class InputFormat extends
+      SequenceFileInputFormat<Text, CrawlDatum> {
     /** Don't split inputs, to keep things polite. */
-    public InputSplit[] getSplits(JobConf job, int nSplits)
-      throws IOException {
+    public InputSplit[] getSplits(JobConf job, int nSplits) throws IOException {
       FileStatus[] files = listStatus(job);
       FileSplit[] splits = new FileSplit[files.length];
       for (int i = 0; i < files.length; i++) {
         FileStatus cur = files[i];
-        splits[i] = new FileSplit(cur.getPath(), 0,
-            cur.getLen(), (String[])null);
+        splits[i] = new FileSplit(cur.getPath(), 0, cur.getLen(),
+            (String[]) null);
       }
       return splits;
     }
@@ -115,7 +117,7 @@
 
   private OutputCollector<Text, NutchWritable> output;
   private Reporter reporter;
-  
+
   private String segmentName;
   private AtomicInteger activeThreads = new AtomicInteger(0);
   private AtomicInteger spinWaiting = new AtomicInteger(0);
@@ -123,36 +125,37 @@
   private long start = System.currentTimeMillis(); // start time of fetcher run
   private AtomicLong lastRequestStart = new AtomicLong(start);
 
-  private AtomicLong bytes = new AtomicLong(0);        // total bytes fetched
-  private AtomicInteger pages = new AtomicInteger(0);  // total pages fetched
+  private AtomicLong bytes = new AtomicLong(0); // total bytes fetched
+  private AtomicInteger pages = new AtomicInteger(0); // total pages fetched
   private AtomicInteger errors = new AtomicInteger(0); // total pages errored
 
   private boolean storingContent;
   private boolean parsing;
   FetchItemQueues fetchQueues;
   QueueFeeder feeder;
-  
+
   /**
    * This class described the item to be fetched.
    */
-  private static class FetchItem {    
+  private static class FetchItem {
     String queueID;
     Text url;
     URL u;
     CrawlDatum datum;
-    
+
     public FetchItem(Text url, URL u, CrawlDatum datum, String queueID) {
       this.url = url;
       this.u = u;
       this.datum = datum;
       this.queueID = queueID;
     }
-    
-    /** Create an item. Queue id will be created based on <code>queueMode</code>
-     * argument, either as a protocol + hostname pair, protocol + IP
-     * address pair or protocol+domain pair.
+
+    /**
+     * Create an item. Queue id will be created based on <code>queueMode</code>
+     * argument, either as a protocol + hostname pair, protocol + IP address
+     * pair or protocol+domain pair.
      */
-    public static FetchItem create(Text url, CrawlDatum datum,  String queueMode) {
+    public static FetchItem create(Text url, CrawlDatum datum, String queueMode) {
       String queueID;
       URL u = null;
       try {
@@ -172,19 +175,18 @@
           LOG.warn("Unable to resolve: " + u.getHost() + ", skipping.");
           return null;
         }
-      }
-      else if (FetchItemQueues.QUEUE_MODE_DOMAIN.equalsIgnoreCase(queueMode)){
+      } else if (FetchItemQueues.QUEUE_MODE_DOMAIN.equalsIgnoreCase(queueMode)) {
         key = URLUtil.getDomainName(u);
         if (key == null) {
-          LOG.warn("Unknown domain for url: " + url + ", using URL string as key");
-          key=u.toExternalForm();
+          LOG.warn("Unknown domain for url: " + url
+              + ", using URL string as key");
+          key = u.toExternalForm();
         }
-      }
-      else {
+      } else {
         key = u.getHost();
         if (key == null) {
           LOG.warn("Unknown host for url: " + url + ", using URL string as key");
-          key=u.toExternalForm();
+          key = u.toExternalForm();
         }
       }
       queueID = proto + "://" + key.toLowerCase();
@@ -202,28 +204,31 @@
     public Text getUrl() {
       return url;
     }
-    
+
     public URL getURL2() {
       return u;
     }
   }
-  
+
   /**
-   * This class handles FetchItems which come from the same host ID (be it
-   * a proto/hostname or proto/IP pair). It also keeps track of requests in
+   * This class handles FetchItems which come from the same host ID (be it a
+   * proto/hostname or proto/IP pair). It also keeps track of requests in
    * progress and elapsed time between requests.
    */
   private static class FetchItemQueue {
-    List<FetchItem> queue = Collections.synchronizedList(new LinkedList<FetchItem>());
-    Set<FetchItem>  inProgress = Collections.synchronizedSet(new HashSet<FetchItem>());
+    List<FetchItem> queue = Collections
+        .synchronizedList(new LinkedList<FetchItem>());
+    Set<FetchItem> inProgress = Collections
+        .synchronizedSet(new HashSet<FetchItem>());
     AtomicLong nextFetchTime = new AtomicLong();
     AtomicInteger exceptionCounter = new AtomicInteger();
     long crawlDelay;
     long minCrawlDelay;
     int maxThreads;
     Configuration conf;
-    
-    public FetchItemQueue(Configuration conf, int maxThreads, long crawlDelay, long minCrawlDelay) {
+
+    public FetchItemQueue(Configuration conf, int maxThreads, long crawlDelay,
+        long minCrawlDelay) {
       this.conf = conf;
       this.maxThreads = maxThreads;
       this.crawlDelay = crawlDelay;
@@ -231,57 +236,64 @@
       // ready to start
       setEndTime(System.currentTimeMillis() - crawlDelay);
     }
-    
+
     public synchronized int emptyQueue() {
       int presize = queue.size();
       queue.clear();
       return presize;
     }
-    
+
     public int getQueueSize() {
       return queue.size();
     }
-    
+
     public int getInProgressSize() {
       return inProgress.size();
     }
-    
+
     public int incrementExceptionCounter() {
       return exceptionCounter.incrementAndGet();
     }
-    
+
     public void finishFetchItem(FetchItem it, boolean asap) {
       if (it != null) {
         inProgress.remove(it);
         setEndTime(System.currentTimeMillis(), asap);
       }
     }
-    
+
     public void addFetchItem(FetchItem it) {
-      if (it == null) return;
+      if (it == null)
+        return;
       queue.add(it);
     }
-    
+
     public void addInProgressFetchItem(FetchItem it) {
-      if (it == null) return;
+      if (it == null)
+        return;
       inProgress.add(it);
     }
-    
+
     public FetchItem getFetchItem() {
-      if (inProgress.size() >= maxThreads) return null;
+      if (inProgress.size() >= maxThreads)
+        return null;
       long now = System.currentTimeMillis();
-      if (nextFetchTime.get() > now) return null;
+      if (nextFetchTime.get() > now)
+        return null;
       FetchItem it = null;
-      if (queue.size() == 0) return null;
+      if (queue.size() == 0)
+        return null;
       try {
         it = queue.remove(0);
         inProgress.add(it);
       } catch (Exception e) {
-        LOG.error("Cannot remove FetchItem from queue or cannot add it to inProgress queue", e);
+        LOG.error(
+            "Cannot remove FetchItem from queue or cannot add it to inProgress queue",
+            e);
       }
       return it;
     }
-    
+
     public synchronized void dump() {
       LOG.info("  maxThreads    = " + maxThreads);
       LOG.info("  inProgress    = " + inProgress.size());
@@ -294,19 +306,20 @@
         LOG.info("  " + i + ". " + it.url);
       }
     }
-    
+
     private void setEndTime(long endTime) {
       setEndTime(endTime, false);
     }
-    
+
     private void setEndTime(long endTime, boolean asap) {
       if (!asap)
-        nextFetchTime.set(endTime + (maxThreads > 1 ? minCrawlDelay : crawlDelay));
+        nextFetchTime.set(endTime
+            + (maxThreads > 1 ? minCrawlDelay : crawlDelay));
       else
         nextFetchTime.set(endTime);
     }
   }
-  
+
   /**
    * Convenience class - a collection of queues that keeps track of the total
    * number of items, and provides items eligible for fetching from any queue.
@@ -320,55 +333,60 @@
     long minCrawlDelay;
     long timelimit = -1;
     int maxExceptionsPerQueue = -1;
-    Configuration conf;  
+    Configuration conf;
 
     public static final String QUEUE_MODE_HOST = "byHost";
     public static final String QUEUE_MODE_DOMAIN = "byDomain";
     public static final String QUEUE_MODE_IP = "byIP";
-    
+
     String queueMode;
-    
+
     public FetchItemQueues(Configuration conf) {
       this.conf = conf;
       this.maxThreads = conf.getInt("fetcher.threads.per.queue", 1);
       queueMode = conf.get("fetcher.queue.mode", QUEUE_MODE_HOST);
       // check that the mode is known
-      if (!queueMode.equals(QUEUE_MODE_IP) && !queueMode.equals(QUEUE_MODE_DOMAIN)
+      if (!queueMode.equals(QUEUE_MODE_IP)
+          && !queueMode.equals(QUEUE_MODE_DOMAIN)
           && !queueMode.equals(QUEUE_MODE_HOST)) {
-        LOG.error("Unknown partition mode : " + queueMode + " - forcing to byHost");
+        LOG.error("Unknown partition mode : " + queueMode
+            + " - forcing to byHost");
         queueMode = QUEUE_MODE_HOST;
       }
-      LOG.info("Using queue mode : "+queueMode);
-      
+      LOG.info("Using queue mode : " + queueMode);
+
       this.crawlDelay = (long) (conf.getFloat("fetcher.server.delay", 1.0f) * 1000);
-      this.minCrawlDelay = (long) (conf.getFloat("fetcher.server.min.delay", 0.0f) * 1000);
+      this.minCrawlDelay = (long) (conf.getFloat("fetcher.server.min.delay",
+          0.0f) * 1000);
       this.timelimit = conf.getLong("fetcher.timelimit", -1);
-      this.maxExceptionsPerQueue = conf.getInt("fetcher.max.exceptions.per.queue", -1);
+      this.maxExceptionsPerQueue = conf.getInt(
+          "fetcher.max.exceptions.per.queue", -1);
     }
-    
+
     public int getTotalSize() {
       return totalSize.get();
     }
-    
+
     public int getQueueCount() {
       return queues.size();
     }
-    
+
     public void addFetchItem(Text url, CrawlDatum datum) {
       FetchItem it = FetchItem.create(url, datum, queueMode);
-      if (it != null) addFetchItem(it);
+      if (it != null)
+        addFetchItem(it);
     }
-    
+
     public synchronized void addFetchItem(FetchItem it) {
       FetchItemQueue fiq = getFetchItemQueue(it.queueID);
       fiq.addFetchItem(it);
       totalSize.incrementAndGet();
     }
-    
+
     public void finishFetchItem(FetchItem it) {
       finishFetchItem(it, false);
     }
-    
+
     public void finishFetchItem(FetchItem it, boolean asap) {
       FetchItemQueue fiq = queues.get(it.queueID);
       if (fiq == null) {
@@ -377,7 +395,7 @@
       }
       fiq.finishFetchItem(it, asap);
     }
-    
+
     public synchronized FetchItemQueue getFetchItemQueue(String id) {
       FetchItemQueue fiq = queues.get(id);
       if (fiq == null) {
@@ -387,10 +405,10 @@
       }
       return fiq;
     }
-    
+
     public synchronized FetchItem getFetchItem() {
-      Iterator<Map.Entry<String, FetchItemQueue>> it =
-        queues.entrySet().iterator();
+      Iterator<Map.Entry<String, FetchItemQueue>> it = queues.entrySet()
+          .iterator();
       while (it.hasNext()) {
         FetchItemQueue fiq = it.next().getValue();
         // reap empty queues
@@ -406,7 +424,7 @@
       }
       return null;
     }
-    
+
     // called only once the feeder has stopped
     public synchronized int checkTimelimit() {
       int count = 0;
@@ -418,7 +436,8 @@
         // there might also be a case where totalsize !=0 but number of queues
         // == 0
         // in which case we simply force it to 0 to avoid blocking
-        if (totalSize.get() != 0 && queues.size() == 0) totalSize.set(0);
+        if (totalSize.get() != 0 && queues.size() == 0)
+          totalSize.set(0);
       }
       return count;
     }
@@ -429,7 +448,8 @@
 
       for (String id : queues.keySet()) {
         FetchItemQueue fiq = queues.get(id);
-        if (fiq.getQueueSize() == 0) continue;
+        if (fiq.getQueueSize() == 0)
+          continue;
         LOG.info("* queue: " + id + " >> dropping! ");
         int deleted = fiq.emptyQueue();
         for (int i = 0; i < deleted; i++) {
@@ -440,11 +460,11 @@
 
       return count;
     }
-    
+
     /**
      * Increment the exception counter of a queue in case of an exception e.g.
      * timeout; when higher than a given threshold simply empty the queue.
-     *
+     * 
      * @param queueid
      * @return number of purged items
      */
@@ -457,7 +477,7 @@
         return 0;
       }
       int excCount = fiq.incrementExceptionCounter();
-      if (maxExceptionsPerQueue!= -1 && excCount >= maxExceptionsPerQueue) {
+      if (maxExceptionsPerQueue != -1 && excCount >= maxExceptionsPerQueue) {
         // too many exceptions for items in this queue - purge it
         int deleted = fiq.emptyQueue();
         LOG.info("* queue: " + queueid + " >> removed " + deleted
@@ -470,20 +490,20 @@
       return 0;
     }
 
-    
     public synchronized void dump() {
       for (String id : queues.keySet()) {
         FetchItemQueue fiq = queues.get(id);
-        if (fiq.getQueueSize() == 0) continue;
+        if (fiq.getQueueSize() == 0)
+          continue;
         LOG.info("* queue: " + id);
         fiq.dump();
       }
     }
   }
-  
+
   /**
-   * This class feeds the queues with input items, and re-fills them as
-   * items are consumed by FetcherThread-s.
+   * This class feeds the queues with input items, and re-fills them as items
+   * are consumed by FetcherThread-s.
    */
   private static class QueueFeeder extends Thread {
     private RecordReader<Text, CrawlDatum> reader;
@@ -499,7 +519,7 @@
       this.setDaemon(true);
       this.setName("QueueFeeder");
     }
-    
+
     public void setTimeLimit(long tl) {
       timelimit = tl;
     }
@@ -528,7 +548,9 @@
           // queues are full - spin-wait until they have some free space
           try {
             Thread.sleep(1000);
-          } catch (Exception e) {};
+          } catch (Exception e) {
+          }
+          ;
           continue;
         } else {
           LOG.debug("-feeding " + feed + " input urls ...");
@@ -549,11 +571,11 @@
           }
         }
       }
-      LOG.info("QueueFeeder finished: total " + cnt + " records + hit by time limit :"
-          + timelimitcount);
+      LOG.info("QueueFeeder finished: total " + cnt
+          + " records + hit by time limit :" + timelimitcount);
     }
   }
-  
+
   /**
    * This class picks items from queues and fetches the pages.
    */
@@ -573,8 +595,8 @@
     private boolean ignoreExternalLinks;
 
     public FetcherThread(Configuration conf) {
-      this.setDaemon(true);                       // don't hang JVM on exit
-      this.setName("FetcherThread");              // use an informative name
+      this.setDaemon(true); // don't hang JVM on exit
+      this.setName("FetcherThread"); // use an informative name
       this.conf = conf;
       this.urlFilters = new URLFilters(conf);
       this.scfilters = new ScoringFilters(conf);
@@ -582,25 +604,28 @@
       this.protocolFactory = new ProtocolFactory(conf);
       this.normalizers = new URLNormalizers(conf, URLNormalizers.SCOPE_FETCHER);
       this.maxCrawlDelay = conf.getInt("fetcher.max.crawl.delay", 30) * 1000;
-      queueMode = conf.get("fetcher.queue.mode", FetchItemQueues.QUEUE_MODE_HOST);
+      queueMode = conf.get("fetcher.queue.mode",
+          FetchItemQueues.QUEUE_MODE_HOST);
       // check that the mode is known
-      if (!queueMode.equals(FetchItemQueues.QUEUE_MODE_IP) && !queueMode.equals(FetchItemQueues.QUEUE_MODE_DOMAIN)
+      if (!queueMode.equals(FetchItemQueues.QUEUE_MODE_IP)
+          && !queueMode.equals(FetchItemQueues.QUEUE_MODE_DOMAIN)
           && !queueMode.equals(FetchItemQueues.QUEUE_MODE_HOST)) {
-        LOG.error("Unknown partition mode : " + queueMode + " - forcing to byHost");
+        LOG.error("Unknown partition mode : " + queueMode
+            + " - forcing to byHost");
         queueMode = FetchItemQueues.QUEUE_MODE_HOST;
       }
-      LOG.info("Using queue mode : "+queueMode);
+      LOG.info("Using queue mode : " + queueMode);
       this.maxRedirect = conf.getInt("http.redirect.max", 3);
-      this.ignoreExternalLinks = 
-        conf.getBoolean("db.ignore.external.links", false);
+      this.ignoreExternalLinks = conf.getBoolean("db.ignore.external.links",
+          false);
     }
 
     public void run() {
       activeThreads.incrementAndGet(); // count threads
-      
+
       FetchItem fit = null;
       try {
-        
+
         while (true) {
           fit = fetchQueues.getFetchItem();
           if (fit == null) {
@@ -610,8 +635,9 @@
               spinWaiting.incrementAndGet();
               try {
                 Thread.sleep(500);
-              } catch (Exception e) {}
-                spinWaiting.decrementAndGet();
+              } catch (Exception e) {
+              }
+              spinWaiting.decrementAndGet();
               continue;
             } else {
               // all done, finish this thread
@@ -619,15 +645,17 @@
             }
           }
           lastRequestStart.set(System.currentTimeMillis());
-          Text reprUrlWritable =
-            (Text) fit.datum.getMetaData().get(Nutch.WRITABLE_REPR_URL_KEY);
+          Text reprUrlWritable = (Text) fit.datum.getMetaData().get(
+              Nutch.WRITABLE_REPR_URL_KEY);
           if (reprUrlWritable == null) {
             reprUrl = fit.url.toString();
           } else {
             reprUrl = reprUrlWritable.toString();
           }
           try {
-            if (LOG.isInfoEnabled()) { LOG.info("fetching " + fit.url); }
+            if (LOG.isInfoEnabled()) {
+              LOG.info("fetching " + fit.url);
+            }
 
             // fetch the page
             redirecting = false;
@@ -637,7 +665,8 @@
                 LOG.debug("redirectCount=" + redirectCount);
               }
               redirecting = false;
-              Protocol protocol = this.protocolFactory.getProtocol(fit.url.toString());
+              Protocol protocol = this.protocolFactory.getProtocol(fit.url
+                  .toString());
               RobotRules rules = protocol.getRobotRules(fit.url, fit.datum);
               if (!rules.isAllowed(fit.u)) {
                 // unblock
@@ -645,7 +674,9 @@
                 if (LOG.isDebugEnabled()) {
                   LOG.debug("Denied by robots.txt: " + fit.url);
                 }
-                output(fit.url, fit.datum, null, ProtocolStatus.STATUS_ROBOTS_DENIED, CrawlDatum.STATUS_FETCH_GONE);
+                output(fit.url, fit.datum, null,
+                    ProtocolStatus.STATUS_ROBOTS_DENIED,
+                    CrawlDatum.STATUS_FETCH_GONE);
                 reporter.incrCounter("FetcherStatus", "robots_denied", 1);
                 continue;
               }
@@ -653,16 +684,22 @@
                 if (rules.getCrawlDelay() > maxCrawlDelay) {
                   // unblock
                   fetchQueues.finishFetchItem(fit, true);
-                  LOG.debug("Crawl-Delay for " + fit.url + " too long (" + rules.getCrawlDelay() + "), skipping");
-                  output(fit.url, fit.datum, null, ProtocolStatus.STATUS_ROBOTS_DENIED, CrawlDatum.STATUS_FETCH_GONE);
-                  reporter.incrCounter("FetcherStatus", "robots_denied_maxcrawldelay", 1);
+                  LOG.debug("Crawl-Delay for " + fit.url + " too long ("
+                      + rules.getCrawlDelay() + "), skipping");
+                  output(fit.url, fit.datum, null,
+                      ProtocolStatus.STATUS_ROBOTS_DENIED,
+                      CrawlDatum.STATUS_FETCH_GONE);
+                  reporter.incrCounter("FetcherStatus",
+                      "robots_denied_maxcrawldelay", 1);
                   continue;
                 } else {
-                  FetchItemQueue fiq = fetchQueues.getFetchItemQueue(fit.queueID);
+                  FetchItemQueue fiq = fetchQueues
+                      .getFetchItemQueue(fit.queueID);
                   fiq.crawlDelay = rules.getCrawlDelay();
                 }
               }
-              ProtocolOutput output = protocol.getProtocolOutput(fit.url, fit.datum);
+              ProtocolOutput output = protocol.getProtocolOutput(fit.url,
+                  fit.datum);
               ProtocolStatus status = output.getStatus();
               Content content = output.getContent();
               ParseStatus pstatus = null;
@@ -672,28 +709,28 @@
               String urlString = fit.url.toString();
 
               reporter.incrCounter("FetcherStatus", status.getName(), 1);
-              
-              switch(status.getCode()) {
-                
+
+              switch (status.getCode()) {
+
               case ProtocolStatus.WOULDBLOCK:
                 // retry ?
                 fetchQueues.addFetchItem(fit);
                 break;
 
-              case ProtocolStatus.SUCCESS:        // got a page
-                pstatus = output(fit.url, fit.datum, content, status, CrawlDatum.STATUS_FETCH_SUCCESS);
+              case ProtocolStatus.SUCCESS: // got a page
+                pstatus = output(fit.url, fit.datum, content, status,
+                    CrawlDatum.STATUS_FETCH_SUCCESS);
                 updateStatus(content.getContent().length);
-                if (pstatus != null && pstatus.isSuccess() &&
-                        pstatus.getMinorCode() == ParseStatus.SUCCESS_REDIRECT) {
+                if (pstatus != null && pstatus.isSuccess()
+                    && pstatus.getMinorCode() == ParseStatus.SUCCESS_REDIRECT) {
                   String newUrl = pstatus.getMessage();
                   int refreshTime = Integer.valueOf(pstatus.getArgs()[1]);
-                  Text redirUrl =
-                    handleRedirect(fit.url, fit.datum,
-                                   urlString, newUrl,
-                                   refreshTime < Fetcher.PERM_REFRESH_TIME,
-                                   Fetcher.CONTENT_REDIR);
+                  Text redirUrl = handleRedirect(fit.url, fit.datum, urlString,
+                      newUrl, refreshTime < Fetcher.PERM_REFRESH_TIME,
+                      Fetcher.CONTENT_REDIR);
                   if (redirUrl != null) {
-                    CrawlDatum newDatum = new CrawlDatum(CrawlDatum.STATUS_DB_UNFETCHED,
+                    CrawlDatum newDatum = new CrawlDatum(
+                        CrawlDatum.STATUS_DB_UNFETCHED,
                         fit.datum.getFetchInterval(), fit.datum.getScore());
                     // transfer existing metadata to the redir
                     newDatum.getMetaData().putAll(fit.datum.getMetaData());
@@ -704,19 +741,20 @@
                     }
                     fit = FetchItem.create(redirUrl, newDatum, queueMode);
                     if (fit != null) {
-                      FetchItemQueue fiq =
-                        fetchQueues.getFetchItemQueue(fit.queueID);
+                      FetchItemQueue fiq = fetchQueues
+                          .getFetchItemQueue(fit.queueID);
                       fiq.addInProgressFetchItem(fit);
                     } else {
                       // stop redirecting
                       redirecting = false;
-                      reporter.incrCounter("FetcherStatus", "FetchItem.notCreated.redirect", 1);
+                      reporter.incrCounter("FetcherStatus",
+                          "FetchItem.notCreated.redirect", 1);
                     }
                   }
                 }
                 break;
 
-              case ProtocolStatus.MOVED:         // redirect
+              case ProtocolStatus.MOVED: // redirect
               case ProtocolStatus.TEMP_MOVED:
                 int code;
                 boolean temp;
@@ -729,12 +767,11 @@
                 }
                 output(fit.url, fit.datum, content, status, code);
                 String newUrl = status.getMessage();
-                Text redirUrl =
-                  handleRedirect(fit.url, fit.datum,
-                                 urlString, newUrl, temp,
-                                 Fetcher.PROTOCOL_REDIR);
+                Text redirUrl = handleRedirect(fit.url, fit.datum, urlString,
+                    newUrl, temp, Fetcher.PROTOCOL_REDIR);
                 if (redirUrl != null) {
-                  CrawlDatum newDatum = new CrawlDatum(CrawlDatum.STATUS_DB_UNFETCHED,
+                  CrawlDatum newDatum = new CrawlDatum(
+                      CrawlDatum.STATUS_DB_UNFETCHED,
                       fit.datum.getFetchInterval(), fit.datum.getScore());
                   // transfer existing metadata
                   newDatum.getMetaData().putAll(fit.datum.getMetaData());
@@ -745,13 +782,14 @@
                   }
                   fit = FetchItem.create(redirUrl, newDatum, queueMode);
                   if (fit != null) {
-                    FetchItemQueue fiq =
-                      fetchQueues.getFetchItemQueue(fit.queueID);
+                    FetchItemQueue fiq = fetchQueues
+                        .getFetchItemQueue(fit.queueID);
                     fiq.addInProgressFetchItem(fit);
                   } else {
                     // stop redirecting
                     redirecting = false;
-                    reporter.incrCounter("FetcherStatus", "FetchItem.notCreated.redirect", 1);
+                    reporter.incrCounter("FetcherStatus",
+                        "FetchItem.notCreated.redirect", 1);
                   }
                 } else {
                   // stop redirecting
@@ -761,31 +799,37 @@
 
               case ProtocolStatus.EXCEPTION:
                 logError(fit.url, status.getMessage());
-                int killedURLs = fetchQueues.checkExceptionThreshold(fit.getQueueID());
-                if (killedURLs!=0)
-                   reporter.incrCounter("FetcherStatus", "AboveExceptionThresholdInQueue", killedURLs);
+                int killedURLs = fetchQueues.checkExceptionThreshold(fit
+                    .getQueueID());
+                if (killedURLs != 0)
+                  reporter.incrCounter("FetcherStatus",
+                      "AboveExceptionThresholdInQueue", killedURLs);
                 /* FALLTHROUGH */
-              case ProtocolStatus.RETRY:          // retry
+              case ProtocolStatus.RETRY: // retry
               case ProtocolStatus.BLOCKED:
-                output(fit.url, fit.datum, null, status, CrawlDatum.STATUS_FETCH_RETRY);
+                output(fit.url, fit.datum, null, status,
+                    CrawlDatum.STATUS_FETCH_RETRY);
                 break;
-                
-              case ProtocolStatus.GONE:           // gone
+
+              case ProtocolStatus.GONE: // gone
               case ProtocolStatus.NOTFOUND:
               case ProtocolStatus.ACCESS_DENIED:
               case ProtocolStatus.ROBOTS_DENIED:
-                output(fit.url, fit.datum, null, status, CrawlDatum.STATUS_FETCH_GONE);
+                output(fit.url, fit.datum, null, status,
+                    CrawlDatum.STATUS_FETCH_GONE);
                 break;
 
               case ProtocolStatus.NOTMODIFIED:
-                output(fit.url, fit.datum, null, status, CrawlDatum.STATUS_FETCH_NOTMODIFIED);
+                output(fit.url, fit.datum, null, status,
+                    CrawlDatum.STATUS_FETCH_NOTMODIFIED);
                 break;
 
               default:
                 if (LOG.isWarnEnabled()) {
                   LOG.warn("Unknown ProtocolStatus: " + status.getCode());
                 }
-                output(fit.url, fit.datum, null, status, CrawlDatum.STATUS_FETCH_RETRY);
+                output(fit.url, fit.datum, null, status,
+                    CrawlDatum.STATUS_FETCH_RETRY);
               }
 
               if (redirecting && redirectCount > maxRedirect) {
@@ -793,53 +837,58 @@
                 if (LOG.isInfoEnabled()) {
                   LOG.info(" - redirect count exceeded " + fit.url);
                 }
-                output(fit.url, fit.datum, null, ProtocolStatus.STATUS_REDIR_EXCEEDED, CrawlDatum.STATUS_FETCH_GONE);
+                output(fit.url, fit.datum, null,
+                    ProtocolStatus.STATUS_REDIR_EXCEEDED,
+                    CrawlDatum.STATUS_FETCH_GONE);
               }
 
             } while (redirecting && (redirectCount <= maxRedirect));
-            
-          } catch (Throwable t) {                 // unexpected exception
+
+          } catch (Throwable t) { // unexpected exception
             // unblock
             fetchQueues.finishFetchItem(fit);
             logError(fit.url, t.toString());
-            output(fit.url, fit.datum, null, ProtocolStatus.STATUS_FAILED, CrawlDatum.STATUS_FETCH_RETRY);
+            output(fit.url, fit.datum, null, ProtocolStatus.STATUS_FAILED,
+                CrawlDatum.STATUS_FETCH_RETRY);
           }
         }
 
       } catch (Throwable e) {
         if (LOG.isErrorEnabled()) {
           e.printStackTrace(LogUtil.getErrorStream(LOG));
-          LOG.error("fetcher caught:"+e.toString());
+          LOG.error("fetcher caught:" + e.toString());
         }
       } finally {
-        if (fit != null) fetchQueues.finishFetchItem(fit);
+        if (fit != null)
+          fetchQueues.finishFetchItem(fit);
         activeThreads.decrementAndGet(); // count threads
-        LOG.info("-finishing thread " + getName() + ", activeThreads=" + activeThreads);
+        LOG.info("-finishing thread " + getName() + ", activeThreads="
+            + activeThreads);
       }
     }
 
-    private Text handleRedirect(Text url, CrawlDatum datum,
-                                String urlString, String newUrl,
-                                boolean temp, String redirType)
-    throws MalformedURLException, URLFilterException {
+    private Text handleRedirect(Text url, CrawlDatum datum, String urlString,
+        String newUrl, boolean temp, String redirType)
+        throws MalformedURLException, URLFilterException {
       newUrl = normalizers.normalize(newUrl, URLNormalizers.SCOPE_FETCHER);
       newUrl = urlFilters.filter(newUrl);
-      
+
       if (ignoreExternalLinks) {
         try {
           String origHost = new URL(urlString).getHost().toLowerCase();
           String newHost = new URL(newUrl).getHost().toLowerCase();
           if (!origHost.equals(newHost)) {
             if (LOG.isDebugEnabled()) {
-              LOG.debug(" - ignoring redirect " + redirType + " from " +
-                          urlString + " to " + newUrl +
-                          " because external links are ignored");
+              LOG.debug(" - ignoring redirect " + redirType + " from "
+                  + urlString + " to " + newUrl
+                  + " because external links are ignored");
             }
             return null;
           }
-        } catch (MalformedURLException e) { }
+        } catch (MalformedURLException e) {
+        }
       }
-      
+
       if (newUrl != null && !newUrl.equals(urlString)) {
         reprUrl = URLUtil.chooseRepr(reprUrl, newUrl, temp);
         url = new Text(newUrl);
@@ -847,14 +896,14 @@
           redirecting = true;
           redirectCount++;
           if (LOG.isDebugEnabled()) {
-            LOG.debug(" - " + redirType + " redirect to " +
-                url + " (fetching now)");
+            LOG.debug(" - " + redirType + " redirect to " + url
+                + " (fetching now)");
           }
           return url;
         } else {
           CrawlDatum newDatum = new CrawlDatum(CrawlDatum.STATUS_LINKED,
-              datum.getFetchInterval(),datum.getScore());
-          // transfer existing metadata 
+              datum.getFetchInterval(), datum.getScore());
+          // transfer existing metadata
           newDatum.getMetaData().putAll(datum.getMetaData());
           try {
             scfilters.initialScore(url, newDatum);
@@ -867,15 +916,15 @@
           }
           output(url, newDatum, null, null, CrawlDatum.STATUS_LINKED);
           if (LOG.isDebugEnabled()) {
-            LOG.debug(" - " + redirType + " redirect to " +
-                url + " (fetching later)");
+            LOG.debug(" - " + redirType + " redirect to " + url
+                + " (fetching later)");
           }
           return null;
         }
       } else {
         if (LOG.isDebugEnabled()) {
-          LOG.debug(" - " + redirType + " redirect skipped: " +
-              (newUrl != null ? "to same url" : "filtered"));
+          LOG.debug(" - " + redirType + " redirect skipped: "
+              + (newUrl != null ? "to same url" : "filtered"));
         }
         return null;
       }
@@ -888,12 +937,13 @@
       errors.incrementAndGet();
     }
 
-    private ParseStatus output(Text key, CrawlDatum datum,
-                        Content content, ProtocolStatus pstatus, int status) {
+    private ParseStatus output(Text key, CrawlDatum datum, Content content,
+        ProtocolStatus pstatus, int status) {
 
       datum.setStatus(status);
       datum.setFetchTime(System.currentTimeMillis());
-      if (pstatus != null) datum.getMetaData().put(Nutch.WRITABLE_PROTO_STATUS_KEY, pstatus);
+      if (pstatus != null)
+        datum.getMetaData().put(Nutch.WRITABLE_PROTO_STATUS_KEY, pstatus);
 
       ParseResult parseResult = null;
       if (content != null) {
@@ -909,27 +959,31 @@
             LOG.warn("Couldn't pass score, url " + key + " (" + e + ")");
           }
         }
-        /* Note: Fetcher will only follow meta-redirects coming from the
-         * original URL. */ 
+        /*
+         * Note: Fetcher will only follow meta-redirects coming from the
+         * original URL.
+         */
         if (parsing && status == CrawlDatum.STATUS_FETCH_SUCCESS) {
           try {
             parseResult = this.parseUtil.parse(content);
           } catch (Exception e) {
-            LOG.warn("Error parsing: " + key + ": " + StringUtils.stringifyException(e));
+            LOG.warn("Error parsing: " + key + ": "
+                + StringUtils.stringifyException(e));
           }
 
           if (parseResult == null) {
-            byte[] signature = 
-              SignatureFactory.getSignature(getConf()).calculate(content, 
-                  new ParseStatus().getEmptyParse(conf));
+            byte[] signature = SignatureFactory.getSignature(getConf())
+                .calculate(content, new ParseStatus().getEmptyParse(conf));
             datum.setSignature(signature);
           }
         }
-        
-        /* Store status code in content So we can read this value during 
-         * parsing (as a separate job) and decide to parse or not.
+
+        /*
+         * Store status code in content So we can read this value during parsing
+         * (as a separate job) and decide to parse or not.
          */
-        content.getMetadata().add(Nutch.FETCH_STATUS_KEY, Integer.toString(status));
+        content.getMetadata().add(Nutch.FETCH_STATUS_KEY,
+            Integer.toString(status));
       }
 
       try {
@@ -941,7 +995,7 @@
             Text url = entry.getKey();
             Parse parse = entry.getValue();
             ParseStatus parseStatus = parse.getData().getStatus();
-            
+
             if (!parseStatus.isSuccess()) {
               LOG.warn("Error parsing: " + key + ": " + parseStatus);
               parse = parseStatus.getEmptyParse(getConf());
@@ -949,16 +1003,16 @@
 
             // Calculate page signature. For non-parsing fetchers this will
             // be done in ParseSegment
-            byte[] signature = 
-              SignatureFactory.getSignature(getConf()).calculate(content, parse);
+            byte[] signature = SignatureFactory.getSignature(getConf())
+                .calculate(content, parse);
             // Ensure segment name and score are in parseData metadata
-            parse.getData().getContentMeta().set(Nutch.SEGMENT_NAME_KEY, 
-                segmentName);
-            parse.getData().getContentMeta().set(Nutch.SIGNATURE_KEY, 
-                StringUtil.toHexString(signature));
+            parse.getData().getContentMeta()
+                .set(Nutch.SEGMENT_NAME_KEY, segmentName);
+            parse.getData().getContentMeta()
+                .set(Nutch.SIGNATURE_KEY, StringUtil.toHexString(signature));
             // Pass fetch time to content meta
-            parse.getData().getContentMeta().set(Nutch.FETCH_TIME_KEY,
-                Long.toString(datum.getFetchTime()));
+            parse.getData().getContentMeta()
+                .set(Nutch.FETCH_TIME_KEY, Long.toString(datum.getFetchTime()));
             if (url.equals(key))
               datum.setSignature(signature);
             try {
@@ -969,15 +1023,14 @@
                 LOG.warn("Couldn't pass score, url " + key + " (" + e + ")");
               }
             }
-            output.collect(url, new NutchWritable(
-                    new ParseImpl(new ParseText(parse.getText()), 
-                                  parse.getData(), parse.isCanonical())));
+            output.collect(url, new NutchWritable(new ParseImpl(new ParseText(
+                parse.getText()), parse.getData(), parse.isCanonical())));
           }
         }
       } catch (IOException e) {
         if (LOG.isErrorEnabled()) {
           e.printStackTrace(LogUtil.getErrorStream(LOG));
-          LOG.error("fetcher caught:"+e.toString());
+          LOG.error("fetcher caught:" + e.toString());
         }
       }
 
@@ -985,38 +1038,42 @@
       if (parseResult != null && !parseResult.isEmpty()) {
         Parse p = parseResult.get(content.getUrl());
         if (p != null) {
-          reporter.incrCounter("ParserStatus", ParseStatus.majorCodes[p.getData().getStatus().getMajorCode()], 1);
+          reporter.incrCounter("ParserStatus", ParseStatus.majorCodes[p
+              .getData().getStatus().getMajorCode()], 1);
           return p.getData().getStatus();
         }
       }
       return null;
     }
-    
+
   }
 
-  public Fetcher() { super(null); }
+  public Fetcher() {
+    super(null);
+  }
 
-  public Fetcher(Configuration conf) { super(conf); }
+  public Fetcher(Configuration conf) {
+    super(conf);
+  }
 
   private void updateStatus(int bytesInPage) throws IOException {
     pages.incrementAndGet();
     bytes.addAndGet(bytesInPage);
   }
 
-  
-  private void reportStatus(int pagesLastSec, int bytesLastSec) throws IOException {
+  private void reportStatus(int pagesLastSec, int bytesLastSec)
+      throws IOException {
     String status;
-    long elapsed = (System.currentTimeMillis() - start)/1000;
+    long elapsed = (System.currentTimeMillis() - start) / 1000;
 
-    float avgPagesSec = Math.round(((float)pages.get()*10)/elapsed)/10;
-    float avgBytesSec = Math.round(((((float)bytes.get())*8)/1000)/elapsed);
+    float avgPagesSec = Math.round(((float) pages.get() * 10) / elapsed) / 10;
+    float avgBytesSec = Math.round(((((float) bytes.get()) * 8) / 1000)
+        / elapsed);
 
-    status = activeThreads + " threads, " +
-     fetchQueues.getQueueCount() + " queues, "+
-     fetchQueues.getTotalSize() + " URLs queued, "+
-      pages+" pages, "+errors+" errors, "
-      + avgPagesSec + " (" + pagesLastSec + ") pages/s, "
-      + avgBytesSec + " (" + bytesLastSec + ") kbits/s, ";
+    status = activeThreads + " threads, " + fetchQueues.getQueueCount()
+        + " queues, " + fetchQueues.getTotalSize() + " URLs queued, " + pages
+        + " pages, " + errors + " errors, " + avgPagesSec + " (" + pagesLastSec
+        + ") pages/s, " + avgBytesSec + " (" + bytesLastSec + ") kbits/s, ";
 
     reporter.setStatus(status);
   }
@@ -1028,12 +1085,13 @@
     this.storingContent = isStoringContent(job);
     this.parsing = isParsing(job);
 
-//    if (job.getBoolean("fetcher.verbose", false)) {
-//      LOG.setLevel(Level.FINE);
-//    }
+    // if (job.getBoolean("fetcher.verbose", false)) {
+    // LOG.setLevel(Level.FINE);
+    // }
   }
 
-  public void close() {}
+  public void close() {
+  }
 
   public static boolean isParsing(Configuration conf) {
     return conf.getBoolean("fetcher.parse", true);
@@ -1044,41 +1102,51 @@
   }
 
   public void run(RecordReader<Text, CrawlDatum> input,
-      OutputCollector<Text, NutchWritable> output,
-                  Reporter reporter) throws IOException {
+      OutputCollector<Text, NutchWritable> output, Reporter reporter)
+      throws IOException {
 
     this.output = output;
     this.reporter = reporter;
     this.fetchQueues = new FetchItemQueues(getConf());
 
     int threadCount = getConf().getInt("fetcher.threads.fetch", 10);
-    if (LOG.isInfoEnabled()) { LOG.info("Fetcher: threads: " + threadCount); }
+    if (LOG.isInfoEnabled()) {
+      LOG.info("Fetcher: threads: " + threadCount);
+    }
 
     int timeoutDivisor = getConf().getInt("fetcher.threads.timeout.divisor", 2);
-    if (LOG.isInfoEnabled()) { LOG.info("Fetcher: time-out divisor: " + timeoutDivisor); }
+    if (LOG.isInfoEnabled()) {
+      LOG.info("Fetcher: time-out divisor: " + timeoutDivisor);
+    }
 
-    int queueDepthMuliplier =  getConf().getInt("fetcher.queue.depth.multiplier", 50);
-    
-    feeder = new QueueFeeder(input, fetchQueues, threadCount * queueDepthMuliplier);
-    //feeder.setPriority((Thread.MAX_PRIORITY + Thread.NORM_PRIORITY) / 2);
-    
-    // the value of the time limit is either -1 or the time where it should finish
+    int queueDepthMuliplier = getConf().getInt(
+        "fetcher.queue.depth.multiplier", 50);
+
+    feeder = new QueueFeeder(input, fetchQueues, threadCount
+        * queueDepthMuliplier);
+    // feeder.setPriority((Thread.MAX_PRIORITY + Thread.NORM_PRIORITY) / 2);
+
+    // the value of the time limit is either -1 or the time where it should
+    // finish
     long timelimit = getConf().getLong("fetcher.timelimit", -1);
-    if (timelimit != -1) feeder.setTimeLimit(timelimit);
+    if (timelimit != -1)
+      feeder.setTimeLimit(timelimit);
     feeder.start();
 
     // set non-blocking & no-robots mode for HTTP protocol plugins.
     getConf().setBoolean(Protocol.CHECK_BLOCKING, false);
     getConf().setBoolean(Protocol.CHECK_ROBOTS, false);
-    
-    for (int i = 0; i < threadCount; i++) {       // spawn threads
+
+    for (int i = 0; i < threadCount; i++) { // spawn threads
       new FetcherThread(getConf()).start();
     }
 
     // select a timeout that avoids a task timeout
-    long timeout = getConf().getInt("mapred.task.timeout", 10*60*1000)/timeoutDivisor;
+    long timeout = getConf().getInt("mapred.task.timeout", 10 * 60 * 1000)
+        / timeoutDivisor;
 
-    // Used for threshold check, holds pages and bytes processed in the last second
+    // Used for threshold check, holds pages and bytes processed in the last
+    // second
     int pagesLastSec;
     int bytesLastSec;
 
@@ -1086,26 +1154,35 @@
     boolean throughputThresholdExceeded = false;
     int throughputThresholdNumRetries = 0;
 
-    int throughputThresholdPages = getConf().getInt("fetcher.throughput.threshold.pages", -1);
-    if (LOG.isInfoEnabled()) { LOG.info("Fetcher: throughput threshold: " + throughputThresholdPages); }
-    int throughputThresholdMaxRetries = getConf().getInt("fetcher.throughput.threshold.retries", 5);
-    if (LOG.isInfoEnabled()) { LOG.info("Fetcher: throughput threshold retries: " + throughputThresholdMaxRetries); }
+    int throughputThresholdPages = getConf().getInt(
+        "fetcher.throughput.threshold.pages", -1);
+    if (LOG.isInfoEnabled()) {
+      LOG.info("Fetcher: throughput threshold: " + throughputThresholdPages);
+    }
+    int throughputThresholdMaxRetries = getConf().getInt(
+        "fetcher.throughput.threshold.retries", 5);
+    if (LOG.isInfoEnabled()) {
+      LOG.info("Fetcher: throughput threshold retries: "
+          + throughputThresholdMaxRetries);
+    }
 
-    do {                                          // wait for threads to exit
+    do { // wait for threads to exit
       pagesLastSec = pages.get();
-      bytesLastSec = (int)bytes.get();
+      bytesLastSec = (int) bytes.get();
 
       try {
         Thread.sleep(1000);
-      } catch (InterruptedException e) {}
+      } catch (InterruptedException e) {
+      }
 
       pagesLastSec = pages.get() - pagesLastSec;
-      bytesLastSec = (int)bytes.get() - bytesLastSec;
+      bytesLastSec = (int) bytes.get() - bytesLastSec;
 
       reportStatus(pagesLastSec, bytesLastSec);
 
-      LOG.info("-activeThreads=" + activeThreads + ", spinWaiting=" + spinWaiting.get()
-          + ", fetchQueues.totalSize=" + fetchQueues.getTotalSize());
+      LOG.info("-activeThreads=" + activeThreads + ", spinWaiting="
+          + spinWaiting.get() + ", fetchQueues.totalSize="
+          + fetchQueues.getTotalSize());
 
       if (!feeder.isAlive() && fetchQueues.getTotalSize() < 5) {
         fetchQueues.dump();
@@ -1113,16 +1190,23 @@
 
       // if throughput threshold is enabled
       if (!feeder.isAlive() && throughputThresholdPages != -1) {
-        // Have we reached the threshold of pages/second and threshold was not yet exceeded
-        if (pagesLastSec > throughputThresholdPages && !throughputThresholdExceeded) {
-          LOG.info("Exceding " + Integer.toString(throughputThresholdPages) + " pages/second");
+        // Have we reached the threshold of pages/second and threshold was not
+        // yet exceeded
+        if (pagesLastSec > throughputThresholdPages
+            && !throughputThresholdExceeded) {
+          LOG.info("Exceding " + Integer.toString(throughputThresholdPages)
+              + " pages/second");
           throughputThresholdExceeded = true;
         }
 
         // Check if we're dropping below the threshold
-        if (throughputThresholdExceeded && pagesLastSec < throughputThresholdPages) {
+        if (throughputThresholdExceeded
+            && pagesLastSec < throughputThresholdPages) {
           throughputThresholdNumRetries++;
-          LOG.warn(Integer.toString(throughputThresholdNumRetries) + ": dropping below configured threshold of " + Integer.toString(throughputThresholdPages) + " pages per second");
+          LOG.warn(Integer.toString(throughputThresholdNumRetries)
+              + ": dropping below configured threshold of "
+              + Integer.toString(throughputThresholdPages)
+              + " pages per second");
 
           // Quit if we dropped below threshold too many times
           if (throughputThresholdNumRetries == throughputThresholdMaxRetries) {
@@ -1131,11 +1215,13 @@
             // Disable the threshold checker
             throughputThresholdPages = -1;
 
-            // Empty the queues cleanly and get number of items that were dropped
+            // Empty the queues cleanly and get number of items that were
+            // dropped
             int hitByThrougputThreshold = fetchQueues.emptyQueues();
 
-            if (hitByThrougputThreshold != 0) reporter.incrCounter("FetcherStatus",
-              "hitByThrougputThreshold", hitByThrougputThreshold);
+            if (hitByThrougputThreshold != 0)
+              reporter.incrCounter("FetcherStatus", "hitByThrougputThreshold",
+                  hitByThrougputThreshold);
           }
         }
       }
@@ -1143,25 +1229,25 @@
       // check timelimit
       if (!feeder.isAlive()) {
         int hitByTimeLimit = fetchQueues.checkTimelimit();
-        if (hitByTimeLimit != 0) reporter.incrCounter("FetcherStatus",
-            "hitByTimeLimit", hitByTimeLimit);
+        if (hitByTimeLimit != 0)
+          reporter.incrCounter("FetcherStatus", "hitByTimeLimit",
+              hitByTimeLimit);
       }
-      
+
       // some requests seem to hang, despite all intentions
       if ((System.currentTimeMillis() - lastRequestStart.get()) > timeout) {
         if (LOG.isWarnEnabled()) {
-          LOG.warn("Aborting with "+activeThreads+" hung threads.");
+          LOG.warn("Aborting with " + activeThreads + " hung threads.");
         }
         return;
       }
 
     } while (activeThreads.get() > 0);
     LOG.info("-activeThreads=" + activeThreads);
-    
+
   }
 
-  public void fetch(Path segment, int threads)
-    throws IOException {
+  public void fetch(Path segment, int threads) throws IOException {
 
     checkConfiguration();
 
@@ -1181,7 +1267,7 @@
       LOG.info("Fetcher Timelimit set for : " + timelimit);
       getConf().setLong("fetcher.timelimit", timelimit);
     }
-        
+
     JobConf job = new NutchJob(getConf());
     job.setJobName("fetch " + segment);
 
@@ -1191,7 +1277,8 @@
     // for politeness, don't permit parallel execution of a single task
     job.setSpeculativeExecution(false);
 
-    FileInputFormat.addInputPath(job, new Path(segment, CrawlDatum.GENERATE_DIR_NAME));
+    FileInputFormat.addInputPath(job, new Path(segment,
+        CrawlDatum.GENERATE_DIR_NAME));
     job.setInputFormat(InputFormat.class);
 
     job.setMapRunnerClass(Fetcher.class);
@@ -1204,16 +1291,16 @@
     JobClient.runJob(job);
 
     long end = System.currentTimeMillis();
-    LOG.info("Fetcher: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    LOG.info("Fetcher: finished at " + sdf.format(end) + ", elapsed: "
+        + TimingUtil.elapsedTime(start, end));
   }
 
-
   /** Run the fetcher. */
   public static void main(String[] args) throws Exception {
     int res = ToolRunner.run(NutchConfiguration.create(), new Fetcher(), args);
     System.exit(res);
   }
-  
+
   public int run(String[] args) throws Exception {
 
     String usage = "Usage: Fetcher <segment> [-threads n]";
@@ -1222,15 +1309,15 @@
       System.err.println(usage);
       return -1;
     }
-      
+
     Path segment = new Path(args[0]);
 
     int threads = getConf().getInt("fetcher.threads.fetch", 10);
     boolean parsing = false;
 
-    for (int i = 1; i < args.length; i++) {       // parse command line
-      if (args[i].equals("-threads")) {           // found -threads option
-        threads =  Integer.parseInt(args[++i]);
+    for (int i = 1; i < args.length; i++) { // parse command line
+      if (args[i].equals("-threads")) { // found -threads option
+        threads = Integer.parseInt(args[++i]);
       }
     }
 
Index: src/java/org/apache/nutch/fetcher/FetcherOutput.java
===================================================================
--- src/java/org/apache/nutch/fetcher/FetcherOutput.java	(revision 1188252)
+++ src/java/org/apache/nutch/fetcher/FetcherOutput.java	(working copy)
@@ -30,10 +30,10 @@
   private Content content;
   private ParseImpl parse;
 
-  public FetcherOutput() {}
+  public FetcherOutput() {
+  }
 
-  public FetcherOutput(CrawlDatum crawlDatum, Content content,
-                       ParseImpl parse) {
+  public FetcherOutput(CrawlDatum crawlDatum, Content content, ParseImpl parse) {
     this.crawlDatum = crawlDatum;
     this.content = content;
     this.parse = parse;
@@ -59,22 +59,29 @@
     }
   }
 
-  public CrawlDatum getCrawlDatum() { return crawlDatum; }
-  public Content getContent() { return content; }
-  public ParseImpl getParse() { return parse; }
+  public CrawlDatum getCrawlDatum() {
+    return crawlDatum;
+  }
 
+  public Content getContent() {
+    return content;
+  }
+
+  public ParseImpl getParse() {
+    return parse;
+  }
+
   public boolean equals(Object o) {
     if (!(o instanceof FetcherOutput))
       return false;
-    FetcherOutput other = (FetcherOutput)o;
-    return
-      this.crawlDatum.equals(other.crawlDatum) &&
-      this.content.equals(other.content);
+    FetcherOutput other = (FetcherOutput) o;
+    return this.crawlDatum.equals(other.crawlDatum)
+        && this.content.equals(other.content);
   }
 
   public String toString() {
     StringBuffer buffer = new StringBuffer();
-    buffer.append("CrawlDatum: " + crawlDatum+"\n" );
+    buffer.append("CrawlDatum: " + crawlDatum + "\n");
     return buffer.toString();
   }
 
Index: src/java/org/apache/nutch/fetcher/FetcherOutputFormat.java
===================================================================
--- src/java/org/apache/nutch/fetcher/FetcherOutputFormat.java	(revision 1188252)
+++ src/java/org/apache/nutch/fetcher/FetcherOutputFormat.java	(working copy)
@@ -48,74 +48,68 @@
   public void checkOutputSpecs(FileSystem fs, JobConf job) throws IOException {
     Path out = FileOutputFormat.getOutputPath(job);
     if ((out == null) && (job.getNumReduceTasks() != 0)) {
-    	throw new InvalidJobConfException(
-    			"Output directory not set in JobConf.");
+      throw new InvalidJobConfException("Output directory not set in JobConf.");
     }
     if (fs == null) {
-    	fs = out.getFileSystem(job);
+      fs = out.getFileSystem(job);
     }
     if (fs.exists(new Path(out, CrawlDatum.FETCH_DIR_NAME)))
-    	throw new IOException("Segment already fetched!");
+      throw new IOException("Segment already fetched!");
   }
 
   public RecordWriter<Text, NutchWritable> getRecordWriter(final FileSystem fs,
-                                      final JobConf job,
-                                      final String name,
-                                      final Progressable progress) throws IOException {
+      final JobConf job, final String name, final Progressable progress)
+      throws IOException {
 
     Path out = FileOutputFormat.getOutputPath(job);
-    final Path fetch =
-      new Path(new Path(out, CrawlDatum.FETCH_DIR_NAME), name);
-    final Path content =
-      new Path(new Path(out, Content.DIR_NAME), name);
-    
-    final CompressionType compType = SequenceFileOutputFormat.getOutputCompressionType(job);
+    final Path fetch = new Path(new Path(out, CrawlDatum.FETCH_DIR_NAME), name);
+    final Path content = new Path(new Path(out, Content.DIR_NAME), name);
 
-    final MapFile.Writer fetchOut =
-      new MapFile.Writer(job, fs, fetch.toString(), Text.class, CrawlDatum.class,
-          compType, progress);
-    
+    final CompressionType compType = SequenceFileOutputFormat
+        .getOutputCompressionType(job);
+
+    final MapFile.Writer fetchOut = new MapFile.Writer(job, fs,
+        fetch.toString(), Text.class, CrawlDatum.class, compType, progress);
+
     return new RecordWriter<Text, NutchWritable>() {
-        private MapFile.Writer contentOut;
-        private RecordWriter<Text, Parse> parseOut;
+      private MapFile.Writer contentOut;
+      private RecordWriter<Text, Parse> parseOut;
 
-        {
-          if (Fetcher.isStoringContent(job)) {
-            contentOut = new MapFile.Writer(job, fs, content.toString(),
-                                            Text.class, Content.class,
-                                            compType, progress);
-          }
+      {
+        if (Fetcher.isStoringContent(job)) {
+          contentOut = new MapFile.Writer(job, fs, content.toString(),
+              Text.class, Content.class, compType, progress);
+        }
 
-          if (Fetcher.isParsing(job)) {
-            parseOut = new ParseOutputFormat().getRecordWriter(fs, job, name, progress);
-          }
+        if (Fetcher.isParsing(job)) {
+          parseOut = new ParseOutputFormat().getRecordWriter(fs, job, name,
+              progress);
         }
+      }
 
-        public void write(Text key, NutchWritable value)
-          throws IOException {
+      public void write(Text key, NutchWritable value) throws IOException {
 
-          Writable w = value.get();
-          
-          if (w instanceof CrawlDatum)
-            fetchOut.append(key, w);
-          else if (w instanceof Content)
-            contentOut.append(key, w);
-          else if (w instanceof Parse)
-            parseOut.write(key, (Parse)w);
-        }
+        Writable w = value.get();
 
-        public void close(Reporter reporter) throws IOException {
-          fetchOut.close();
-          if (contentOut != null) {
-            contentOut.close();
-          }
-          if (parseOut != null) {
-            parseOut.close(reporter);
-          }
+        if (w instanceof CrawlDatum)
+          fetchOut.append(key, w);
+        else if (w instanceof Content)
+          contentOut.append(key, w);
+        else if (w instanceof Parse)
+          parseOut.write(key, (Parse) w);
+      }
+
+      public void close(Reporter reporter) throws IOException {
+        fetchOut.close();
+        if (contentOut != null) {
+          contentOut.close();
         }
+        if (parseOut != null) {
+          parseOut.close(reporter);
+        }
+      }
 
-      };
+    };
 
-  }      
+  }
 }
-
Index: src/java/org/apache/nutch/metadata/Metadata.java
===================================================================
--- src/java/org/apache/nutch/metadata/Metadata.java	(revision 1188252)
+++ src/java/org/apache/nutch/metadata/Metadata.java	(working copy)
@@ -27,23 +27,21 @@
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Writable;
 
-
 /**
  * A multi-valued metadata container.
- *
+ * 
  * @author Chris Mattmann
  * @author J&eacute;r&ocirc;me Charron
- *
+ * 
  */
-public class Metadata implements Writable, CreativeCommons,
-DublinCore, HttpHeaders, Nutch, Office, Feed {
+public class Metadata implements Writable, CreativeCommons, DublinCore,
+    HttpHeaders, Nutch, Office, Feed {
 
   /**
    * A map of all metadata attributes.
    */
   private Map<String, String[]> metadata = null;
 
-
   /**
    * Constructs a new, empty metadata.
    */
@@ -53,9 +51,10 @@
 
   /**
    * Returns true if named value is multivalued.
-   * @param name name of metadata
-   * @return true is named value is multivalued, false if single
-   * value or null
+   * 
+   * @param name
+   *          name of metadata
+   * @return true is named value is multivalued, false if single value or null
    */
   public boolean isMultiValued(final String name) {
     return metadata.get(name) != null && metadata.get(name).length > 1;
@@ -63,6 +62,7 @@
 
   /**
    * Returns an array of the names contained in the metadata.
+   * 
    * @return Metadata names
    */
   public String[] names() {
@@ -70,11 +70,11 @@
   }
 
   /**
-   * Get the value associated to a metadata name.
-   * If many values are assiociated to the specified name, then the first
-   * one is returned.
-   *
-   * @param name of the metadata.
+   * Get the value associated to a metadata name. If many values are assiociated
+   * to the specified name, then the first one is returned.
+   * 
+   * @param name
+   *          of the metadata.
    * @return the value associated to the specified metadata name.
    */
   public String get(final String name) {
@@ -88,13 +88,15 @@
 
   /**
    * Get the values associated to a metadata name.
-   * @param name of the metadata.
+   * 
+   * @param name
+   *          of the metadata.
    * @return the values associated to a metadata name.
    */
   public String[] getValues(final String name) {
     return _getValues(name);
   }
-  
+
   private String[] _getValues(final String name) {
     String[] values = metadata.get(name);
     if (values == null) {
@@ -104,12 +106,13 @@
   }
 
   /**
-   * Add a metadata name/value mapping.
-   * Add the specified value to the list of values associated to the
-   * specified metadata name.
-   *
-   * @param name the metadata name.
-   * @param value the metadata value.
+   * Add a metadata name/value mapping. Add the specified value to the list of
+   * values associated to the specified metadata name.
+   * 
+   * @param name
+   *          the metadata name.
+   * @param value
+   *          the metadata value.
    */
   public void add(final String name, final String value) {
     String[] values = metadata.get(name);
@@ -125,31 +128,37 @@
 
   /**
    * Copy All key-value pairs from properties.
-   * @param properties properties to copy from
+   * 
+   * @param properties
+   *          properties to copy from
    */
   public void setAll(Properties properties) {
     Enumeration names = properties.propertyNames();
     while (names.hasMoreElements()) {
       String name = (String) names.nextElement();
-      metadata.put(name, new String[]{properties.getProperty(name)});
+      metadata.put(name, new String[] { properties.getProperty(name) });
     }
   }
 
   /**
-   * Set metadata name/value.
-   * Associate the specified value to the specified metadata name. If some
-   * previous values were associated to this name, they are removed.
-   *
-   * @param name the metadata name.
-   * @param value the metadata value.
+   * Set metadata name/value. Associate the specified value to the specified
+   * metadata name. If some previous values were associated to this name, they
+   * are removed.
+   * 
+   * @param name
+   *          the metadata name.
+   * @param value
+   *          the metadata value.
    */
   public void set(String name, String value) {
-    metadata.put(name, new String[]{value});
+    metadata.put(name, new String[] { value });
   }
 
   /**
    * Remove a metadata and all its associated values.
-   * @param name metadata name to remove
+   * 
+   * @param name
+   *          metadata name to remove
    */
   public void remove(String name) {
     metadata.remove(name);
@@ -157,12 +166,13 @@
 
   /**
    * Returns the number of metadata names in this metadata.
+   * 
    * @return number of metadata names
    */
   public int size() {
     return metadata.size();
   }
-  
+
   /** Remove all mappings from metadata. */
   public void clear() {
     metadata.clear();
@@ -170,7 +180,9 @@
 
   public boolean equals(Object o) {
 
-    if (o == null) { return false; }
+    if (o == null) {
+      return false;
+    }
 
     Metadata other = null;
     try {
@@ -179,7 +191,9 @@
       return false;
     }
 
-    if (other.size() != size()) { return false; }
+    if (other.size() != size()) {
+      return false;
+    }
 
     String[] names = names();
     for (int i = 0; i < names.length; i++) {
@@ -203,10 +217,7 @@
     for (int i = 0; i < names.length; i++) {
       String[] values = _getValues(names[i]);
       for (int j = 0; j < values.length; j++) {
-        buf.append(names[i])
-           .append("=")
-           .append(values[j])
-           .append(" ");
+        buf.append(names[i]).append("=").append(values[j]).append(" ");
       }
     }
     return buf.toString();
Index: src/java/org/apache/nutch/metadata/Nutch.java
===================================================================
--- src/java/org/apache/nutch/metadata/Nutch.java	(revision 1188252)
+++ src/java/org/apache/nutch/metadata/Nutch.java	(working copy)
@@ -18,21 +18,18 @@
 
 import org.apache.hadoop.io.Text;
 
-
 /**
  * A collection of Nutch internal metadata constants.
- *
+ * 
  * @author Chris Mattmann
  * @author J&eacute;r&ocirc;me Charron
  */
 public interface Nutch {
-  
-  public static final String ORIGINAL_CHAR_ENCODING =
-          "OriginalCharEncoding";
-  
-  public static final String CHAR_ENCODING_FOR_CONVERSION =
-          "CharEncodingForConversion";
 
+  public static final String ORIGINAL_CHAR_ENCODING = "OriginalCharEncoding";
+
+  public static final String CHAR_ENCODING_FOR_CONVERSION = "CharEncodingForConversion";
+
   public static final String SIGNATURE_KEY = "nutch.content.digest";
 
   public static final String SEGMENT_NAME_KEY = "nutch.segment.name";
@@ -41,17 +38,22 @@
 
   public static final String GENERATE_TIME_KEY = "_ngt_";
 
-  public static final Text WRITABLE_GENERATE_TIME_KEY = new Text(GENERATE_TIME_KEY);
+  public static final Text WRITABLE_GENERATE_TIME_KEY = new Text(
+      GENERATE_TIME_KEY);
 
   public static final String PROTO_STATUS_KEY = "_pst_";
 
-  public static final Text WRITABLE_PROTO_STATUS_KEY = new Text(PROTO_STATUS_KEY);
-  
+  public static final Text WRITABLE_PROTO_STATUS_KEY = new Text(
+      PROTO_STATUS_KEY);
+
   public static final String FETCH_TIME_KEY = "_ftk_";
-  
+
   public static final String FETCH_STATUS_KEY = "_fst_";
 
-  /** Sites may request that search engines don't provide access to cached documents. */
+  /**
+   * Sites may request that search engines don't provide access to cached
+   * documents.
+   */
   public static final String CACHING_FORBIDDEN_KEY = "caching.forbidden";
 
   /** Show both original forbidden content and summaries (default). */
Index: src/java/org/apache/nutch/metadata/DublinCore.java
===================================================================
--- src/java/org/apache/nutch/metadata/DublinCore.java	(revision 1188252)
+++ src/java/org/apache/nutch/metadata/DublinCore.java	(working copy)
@@ -16,149 +16,146 @@
  */
 package org.apache.nutch.metadata;
 
-
 /**
  * A collection of Dublin Core metadata names.
- *
- * @see <a href="http://dublincore.org">dublincore.org</a> 
- *
+ * 
+ * @see <a href="http://dublincore.org">dublincore.org</a>
+ * 
  * @author Chris Mattmann
  * @author J&eacute;r&ocirc;me Charron
  */
 public interface DublinCore {
-  
-    
+
   /**
-   * Typically, Format may include the media-type or dimensions of the
-   * resource. Format may be used to determine the software, hardware or other
-   * equipment needed to display or operate the resource. Examples of
-   * dimensions include size and duration. Recommended best practice is to
-   * select a value from a controlled vocabulary (for example, the list of
-   * Internet Media Types [MIME] defining computer media formats).
+   * Typically, Format may include the media-type or dimensions of the resource.
+   * Format may be used to determine the software, hardware or other equipment
+   * needed to display or operate the resource. Examples of dimensions include
+   * size and duration. Recommended best practice is to select a value from a
+   * controlled vocabulary (for example, the list of Internet Media Types [MIME]
+   * defining computer media formats).
    */
   public static final String FORMAT = "format";
-  
+
   /**
-   * Recommended best practice is to identify the resource by means of a
-   * string or number conforming to a formal identification system. Example
-   * formal identification systems include the Uniform Resource Identifier
-   * (URI) (including the Uniform Resource Locator (URL)), the Digital Object
+   * Recommended best practice is to identify the resource by means of a string
+   * or number conforming to a formal identification system. Example formal
+   * identification systems include the Uniform Resource Identifier (URI)
+   * (including the Uniform Resource Locator (URL)), the Digital Object
    * Identifier (DOI) and the International Standard Book Number (ISBN).
    */
   public static final String IDENTIFIER = "identifier";
-  
+
   /**
    * Date on which the resource was changed.
    */
   public static final String MODIFIED = "modified";
-  
+
   /**
    * An entity responsible for making contributions to the content of the
-   * resource. Examples of a Contributor include a person, an organisation, or
-   * a service. Typically, the name of a Contributor should be used to
-   * indicate the entity.
+   * resource. Examples of a Contributor include a person, an organisation, or a
+   * service. Typically, the name of a Contributor should be used to indicate
+   * the entity.
    */
   public static final String CONTRIBUTOR = "contributor";
-  
+
   /**
-   * The extent or scope of the content of the resource. Coverage will
-   * typically include spatial location (a place name or geographic
-   * coordinates), temporal period (a period label, date, or date range) or
-   * jurisdiction (such as a named administrative entity). Recommended best
-   * practice is to select a value from a controlled vocabulary (for example,
-   * the Thesaurus of Geographic Names [TGN]) and that, where appropriate,
-   * named places or time periods be used in preference to numeric identifiers
-   * such as sets of coordinates or date ranges.
+   * The extent or scope of the content of the resource. Coverage will typically
+   * include spatial location (a place name or geographic coordinates), temporal
+   * period (a period label, date, or date range) or jurisdiction (such as a
+   * named administrative entity). Recommended best practice is to select a
+   * value from a controlled vocabulary (for example, the Thesaurus of
+   * Geographic Names [TGN]) and that, where appropriate, named places or time
+   * periods be used in preference to numeric identifiers such as sets of
+   * coordinates or date ranges.
    */
   public static final String COVERAGE = "coverage";
-  
+
   /**
    * An entity primarily responsible for making the content of the resource.
    * Examples of a Creator include a person, an organisation, or a service.
    * Typically, the name of a Creator should be used to indicate the entity.
    */
   public static final String CREATOR = "creator";
-  
+
   /**
    * A date associated with an event in the life cycle of the resource.
-   * Typically, Date will be associated with the creation or availability of
-   * the resource. Recommended best practice for encoding the date value is
-   * defined in a profile of ISO 8601 [W3CDTF] and follows the YYYY-MM-DD
-   * format.
+   * Typically, Date will be associated with the creation or availability of the
+   * resource. Recommended best practice for encoding the date value is defined
+   * in a profile of ISO 8601 [W3CDTF] and follows the YYYY-MM-DD format.
    */
   public static final String DATE = "date";
-  
+
   /**
    * An account of the content of the resource. Description may include but is
    * not limited to: an abstract, table of contents, reference to a graphical
    * representation of content or a free-text account of the content.
    */
   public static final String DESCRIPTION = "description";
-  
+
   /**
    * A language of the intellectual content of the resource. Recommended best
    * practice is to use RFC 3066 [RFC3066], which, in conjunction with ISO 639
-   * [ISO639], defines two- and three-letter primary language tags with
-   * optional subtags. Examples include "en" or "eng" for English, "akk" for
-   * Akkadian, and "en-GB" for English used in the United Kingdom.
+   * [ISO639], defines two- and three-letter primary language tags with optional
+   * subtags. Examples include "en" or "eng" for English, "akk" for Akkadian,
+   * and "en-GB" for English used in the United Kingdom.
    */
   public static final String LANGUAGE = "language";
-  
+
   /**
    * An entity responsible for making the resource available. Examples of a
    * Publisher include a person, an organisation, or a service. Typically, the
    * name of a Publisher should be used to indicate the entity.
    */
   public static final String PUBLISHER = "publisher";
-  
+
   /**
    * A reference to a related resource. Recommended best practice is to
    * reference the resource by means of a string or number conforming to a
    * formal identification system.
    */
   public static final String RELATION = "relation";
-  
+
   /**
-   * Information about rights held in and over the resource. Typically, a
-   * Rights element will contain a rights management statement for the
-   * resource, or reference a service providing such information. Rights
-   * information often encompasses Intellectual Property Rights (IPR),
-   * Copyright, and various Property Rights. If the Rights element is absent,
-   * no assumptions can be made about the status of these and other rights
-   * with respect to the resource.
+   * Information about rights held in and over the resource. Typically, a Rights
+   * element will contain a rights management statement for the resource, or
+   * reference a service providing such information. Rights information often
+   * encompasses Intellectual Property Rights (IPR), Copyright, and various
+   * Property Rights. If the Rights element is absent, no assumptions can be
+   * made about the status of these and other rights with respect to the
+   * resource.
    */
   public static final String RIGHTS = "rights";
-  
+
   /**
    * A reference to a resource from which the present resource is derived. The
    * present resource may be derived from the Source resource in whole or in
-   * part. Recommended best practice is to reference the resource by means of
-   * a string or number conforming to a formal identification system.
+   * part. Recommended best practice is to reference the resource by means of a
+   * string or number conforming to a formal identification system.
    */
   public static final String SOURCE = "source";
-  
+
   /**
    * The topic of the content of the resource. Typically, a Subject will be
-   * expressed as keywords, key phrases or classification codes that describe
-   * a topic of the resource. Recommended best practice is to select a value
-   * from a controlled vocabulary or formal classification scheme.
+   * expressed as keywords, key phrases or classification codes that describe a
+   * topic of the resource. Recommended best practice is to select a value from
+   * a controlled vocabulary or formal classification scheme.
    */
   public static final String SUBJECT = "subject";
-  
+
   /**
    * A name given to the resource. Typically, a Title will be a name by which
    * the resource is formally known.
    */
   public static final String TITLE = "title";
-  
+
   /**
    * The nature or genre of the content of the resource. Type includes terms
-   * describing general categories, functions, genres, or aggregation levels
-   * for content. Recommended best practice is to select a value from a
-   * controlled vocabulary (for example, the DCMI Type Vocabulary [DCMITYPE]).
-   * To describe the physical or digital manifestation of the resource, use
-   * the Format element.
+   * describing general categories, functions, genres, or aggregation levels for
+   * content. Recommended best practice is to select a value from a controlled
+   * vocabulary (for example, the DCMI Type Vocabulary [DCMITYPE]). To describe
+   * the physical or digital manifestation of the resource, use the Format
+   * element.
    */
   public static final String TYPE = "type";
-  
+
 }
Index: src/java/org/apache/nutch/metadata/MetaWrapper.java
===================================================================
--- src/java/org/apache/nutch/metadata/MetaWrapper.java	(revision 1188252)
+++ src/java/org/apache/nutch/metadata/MetaWrapper.java	(working copy)
@@ -28,28 +28,29 @@
 /**
  * This is a simple decorator that adds metadata to any Writable-s that can be
  * serialized by <tt>NutchWritable</tt>. This is useful when data needs to be
- * temporarily enriched during processing, but this
- * temporary metadata doesn't need to be permanently stored after the job is done.
+ * temporarily enriched during processing, but this temporary metadata doesn't
+ * need to be permanently stored after the job is done.
  * 
  * @author Andrzej Bialecki
  */
 public class MetaWrapper extends NutchWritable {
   private Metadata metadata;
-  
+
   public MetaWrapper() {
     super();
     metadata = new Metadata();
   }
-  
+
   public MetaWrapper(Writable instance, Configuration conf) {
     super(instance);
     metadata = new Metadata();
     setConf(conf);
   }
-  
+
   public MetaWrapper(Metadata metadata, Writable instance, Configuration conf) {
     super(instance);
-    if (metadata == null) metadata = new Metadata();
+    if (metadata == null)
+      metadata = new Metadata();
     this.metadata = metadata;
     setConf(conf);
   }
@@ -60,43 +61,52 @@
   public Metadata getMetadata() {
     return metadata;
   }
-  
+
   /**
-   * Add metadata. See {@link Metadata#add(String, String)} for more information.
-   * @param name metadata name
-   * @param value metadata value
+   * Add metadata. See {@link Metadata#add(String, String)} for more
+   * information.
+   * 
+   * @param name
+   *          metadata name
+   * @param value
+   *          metadata value
    */
   public void addMeta(String name, String value) {
     metadata.add(name, value);
   }
-  
+
   /**
-   * Set metadata. See {@link Metadata#set(String, String)} for more information.
+   * Set metadata. See {@link Metadata#set(String, String)} for more
+   * information.
+   * 
    * @param name
    * @param value
    */
   public void setMeta(String name, String value) {
     metadata.set(name, value);
   }
-  
+
   /**
    * Get metadata. See {@link Metadata#get(String)} for more information.
+   * 
    * @param name
    * @return metadata value
    */
   public String getMeta(String name) {
     return metadata.get(name);
   }
-  
+
   /**
-   * Get multiple metadata. See {@link Metadata#getValues(String)} for more information.
+   * Get multiple metadata. See {@link Metadata#getValues(String)} for more
+   * information.
+   * 
    * @param name
    * @return multiple values
    */
   public String[] getMetaValues(String name) {
     return metadata.getValues(name);
   }
-  
+
   public void readFields(DataInput in) throws IOException {
     super.readFields(in);
     metadata = new Metadata();
Index: src/java/org/apache/nutch/metadata/SpellCheckedMetadata.java
===================================================================
--- src/java/org/apache/nutch/metadata/SpellCheckedMetadata.java	(revision 1188252)
+++ src/java/org/apache/nutch/metadata/SpellCheckedMetadata.java	(working copy)
@@ -33,7 +33,7 @@
 
   /**
    * Treshold divider.
-   *
+   * 
    * <code>threshold = searched.length() / TRESHOLD_DIVIDER;</code>
    */
   private static final int TRESHOLD_DIVIDER = 3;
@@ -52,7 +52,7 @@
 
     // Uses following array to fill the metanames index and the
     // metanames list.
-    Class[] spellthese = {HttpHeaders.class};
+    Class[] spellthese = { HttpHeaders.class };
 
     for (Class spellCheckedNames : spellthese) {
       for (Field field : spellCheckedNames.getFields()) {
@@ -73,7 +73,7 @@
 
   /**
    * Normalizes String.
-   *
+   * 
    * @param str
    *          the string to normalize
    * @return normalized String
@@ -102,7 +102,7 @@
    * </ul>
    * If no matching with a well-known metadata name is found, then the original
    * name is returned.
-   *
+   * 
    * @param name
    *          Name to normalize
    * @return normalized name
Index: src/java/org/apache/nutch/metadata/HttpHeaders.java
===================================================================
--- src/java/org/apache/nutch/metadata/HttpHeaders.java	(revision 1188252)
+++ src/java/org/apache/nutch/metadata/HttpHeaders.java	(working copy)
@@ -16,34 +16,33 @@
  */
 package org.apache.nutch.metadata;
 
-
 /**
  * A collection of HTTP header names.
- *
- * @see <a href="http://rfc-ref.org/RFC-TEXTS/2616/">Hypertext Transfer
- *      Protocol -- HTTP/1.1 (RFC 2616)</a>
- *
+ * 
+ * @see <a href="http://rfc-ref.org/RFC-TEXTS/2616/">Hypertext Transfer Protocol
+ *      -- HTTP/1.1 (RFC 2616)</a>
+ * 
  * @author Chris Mattmann
  * @author J&eacute;r&ocirc;me Charron
  */
 public interface HttpHeaders {
 
   public final static String CONTENT_ENCODING = "Content-Encoding";
-  
+
   public final static String CONTENT_LANGUAGE = "Content-Language";
 
   public final static String CONTENT_LENGTH = "Content-Length";
-  
+
   public final static String CONTENT_LOCATION = "Content-Location";
-  
+
   public static final String CONTENT_DISPOSITION = "Content-Disposition";
 
   public final static String CONTENT_MD5 = "Content-MD5";
-  
+
   public final static String CONTENT_TYPE = "Content-Type";
-  
+
   public final static String LAST_MODIFIED = "Last-Modified";
-  
+
   public final static String LOCATION = "Location";
 
 }
Index: src/java/org/apache/nutch/metadata/Office.java
===================================================================
--- src/java/org/apache/nutch/metadata/Office.java	(revision 1188252)
+++ src/java/org/apache/nutch/metadata/Office.java	(working copy)
@@ -16,37 +16,36 @@
  */
 package org.apache.nutch.metadata;
 
-
 /**
  * A collection of <i>"Office"</i> documents properties names.
- *
+ * 
  * @author Chris Mattmann
  * @author J&eacute;r&ocirc;me Charron
  */
 public interface Office {
-    
+
   public static final String KEYWORDS = "Keywords";
-  
+
   public static final String COMMENTS = "Comments";
-  
+
   public static final String LAST_AUTHOR = "Last-Author";
-  
+
   public static final String APPLICATION_NAME = "Application-Name";
-  
+
   public static final String CHARACTER_COUNT = "Character Count";
-  
+
   public static final String LAST_PRINTED = "Last-Printed";
-  
+
   public static final String LAST_SAVED = "Last-Save-Date";
-  
+
   public static final String PAGE_COUNT = "Page-Count";
-  
+
   public static final String REVISION_NUMBER = "Revision-Number";
-  
+
   public static final String WORD_COUNT = "Word-Count";
-  
+
   public static final String TEMPLATE = "Template";
-  
+
   public static final String AUTHOR = "Author";
-  
+
 }
Index: src/java/org/apache/nutch/metadata/CreativeCommons.java
===================================================================
--- src/java/org/apache/nutch/metadata/CreativeCommons.java	(revision 1188252)
+++ src/java/org/apache/nutch/metadata/CreativeCommons.java	(working copy)
@@ -16,21 +16,20 @@
  */
 package org.apache.nutch.metadata;
 
-
 /**
  * A collection of Creative Commons properties names.
- *
+ * 
  * @see <a href="http://www.creativecommons.org/">creativecommons.org</a>
- *
+ * 
  * @author Chris Mattmann
  * @author J&eacute;r&ocirc;me Charron
  */
 public interface CreativeCommons {
-  
+
   public final static String LICENSE_URL = "License-Url";
-  
+
   public final static String LICENSE_LOCATION = "License-Location";
-  
+
   public final static String WORK_TYPE = "Work-Type";
-  
+
 }
Index: src/java/org/apache/nutch/tools/proxy/SegmentHandler.java
===================================================================
--- src/java/org/apache/nutch/tools/proxy/SegmentHandler.java	(revision 1188252)
+++ src/java/org/apache/nutch/tools/proxy/SegmentHandler.java	(working copy)
@@ -1,4 +1,5 @@
 package org.apache.nutch.tools.proxy;
+
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -52,42 +53,54 @@
  * XXX should turn this into a plugin?
  */
 public class SegmentHandler extends AbstractTestbedHandler {
-  private static final Logger LOG = LoggerFactory.getLogger(SegmentHandler.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(SegmentHandler.class);
   private Segment seg;
-  
-  private static HashMap<Integer,Integer> protoCodes = new HashMap<Integer,Integer>();
-  
+
+  private static HashMap<Integer, Integer> protoCodes = new HashMap<Integer, Integer>();
+
   static {
-    protoCodes.put(ProtocolStatus.ACCESS_DENIED, HttpServletResponse.SC_UNAUTHORIZED);
-    protoCodes.put(ProtocolStatus.BLOCKED, HttpServletResponse.SC_SERVICE_UNAVAILABLE);
-    protoCodes.put(ProtocolStatus.EXCEPTION, HttpServletResponse.SC_INTERNAL_SERVER_ERROR);
+    protoCodes.put(ProtocolStatus.ACCESS_DENIED,
+        HttpServletResponse.SC_UNAUTHORIZED);
+    protoCodes.put(ProtocolStatus.BLOCKED,
+        HttpServletResponse.SC_SERVICE_UNAVAILABLE);
+    protoCodes.put(ProtocolStatus.EXCEPTION,
+        HttpServletResponse.SC_INTERNAL_SERVER_ERROR);
     protoCodes.put(ProtocolStatus.FAILED, HttpServletResponse.SC_BAD_REQUEST);
     protoCodes.put(ProtocolStatus.GONE, HttpServletResponse.SC_GONE);
-    protoCodes.put(ProtocolStatus.MOVED, HttpServletResponse.SC_MOVED_PERMANENTLY);
-    protoCodes.put(ProtocolStatus.NOTFETCHING, HttpServletResponse.SC_BAD_REQUEST);
+    protoCodes.put(ProtocolStatus.MOVED,
+        HttpServletResponse.SC_MOVED_PERMANENTLY);
+    protoCodes.put(ProtocolStatus.NOTFETCHING,
+        HttpServletResponse.SC_BAD_REQUEST);
     protoCodes.put(ProtocolStatus.NOTFOUND, HttpServletResponse.SC_NOT_FOUND);
-    protoCodes.put(ProtocolStatus.NOTMODIFIED, HttpServletResponse.SC_NOT_MODIFIED);
-    protoCodes.put(ProtocolStatus.PROTO_NOT_FOUND, HttpServletResponse.SC_BAD_REQUEST);
-    protoCodes.put(ProtocolStatus.REDIR_EXCEEDED, HttpServletResponse.SC_BAD_REQUEST);
+    protoCodes.put(ProtocolStatus.NOTMODIFIED,
+        HttpServletResponse.SC_NOT_MODIFIED);
+    protoCodes.put(ProtocolStatus.PROTO_NOT_FOUND,
+        HttpServletResponse.SC_BAD_REQUEST);
+    protoCodes.put(ProtocolStatus.REDIR_EXCEEDED,
+        HttpServletResponse.SC_BAD_REQUEST);
     protoCodes.put(ProtocolStatus.RETRY, HttpServletResponse.SC_BAD_REQUEST);
-    protoCodes.put(ProtocolStatus.ROBOTS_DENIED, HttpServletResponse.SC_FORBIDDEN);
+    protoCodes.put(ProtocolStatus.ROBOTS_DENIED,
+        HttpServletResponse.SC_FORBIDDEN);
     protoCodes.put(ProtocolStatus.SUCCESS, HttpServletResponse.SC_OK);
-    protoCodes.put(ProtocolStatus.TEMP_MOVED, HttpServletResponse.SC_MOVED_TEMPORARILY);
-    protoCodes.put(ProtocolStatus.WOULDBLOCK, HttpServletResponse.SC_BAD_REQUEST);
+    protoCodes.put(ProtocolStatus.TEMP_MOVED,
+        HttpServletResponse.SC_MOVED_TEMPORARILY);
+    protoCodes.put(ProtocolStatus.WOULDBLOCK,
+        HttpServletResponse.SC_BAD_REQUEST);
   }
-  
+
   private static class SegmentPathFilter implements PathFilter {
     public static final SegmentPathFilter INSTANCE = new SegmentPathFilter();
-    
+
     @Override
     public boolean accept(Path p) {
       return p.getName().startsWith("part-");
     }
-    
+
   }
-  
+
   private static class Segment implements Closeable {
-    
+
     private static final Partitioner PARTITIONER = new HashPartitioner();
 
     private FileSystem fs;
@@ -101,7 +114,8 @@
     private MapFile.Reader[] crawl;
     private Configuration conf;
 
-    public Segment(FileSystem fs, Path segmentDir, Configuration conf) throws IOException {
+    public Segment(FileSystem fs, Path segmentDir, Configuration conf)
+        throws IOException {
       this.fs = fs;
       this.segmentDir = segmentDir;
       this.conf = conf;
@@ -112,43 +126,52 @@
         if (crawl == null)
           crawl = getReaders(CrawlDatum.FETCH_DIR_NAME);
       }
-      return (CrawlDatum)getEntry(crawl, url, new CrawlDatum());
+      return (CrawlDatum) getEntry(crawl, url, new CrawlDatum());
     }
-    
+
     public Content getContent(Text url) throws IOException {
       synchronized (cLock) {
         if (content == null)
           content = getReaders(Content.DIR_NAME);
       }
-      return (Content)getEntry(content, url, new Content());
+      return (Content) getEntry(content, url, new Content());
     }
 
     /** Open the output generated by this format. */
     private MapFile.Reader[] getReaders(String subDir) throws IOException {
       Path dir = new Path(segmentDir, subDir);
       FileSystem fs = dir.getFileSystem(conf);
-      Path[] names = FileUtil.stat2Paths(fs.listStatus(dir, SegmentPathFilter.INSTANCE));
+      Path[] names = FileUtil.stat2Paths(fs.listStatus(dir,
+          SegmentPathFilter.INSTANCE));
 
       // sort names, so that hash partitioning works
       Arrays.sort(names);
-      
+
       MapFile.Reader[] parts = new MapFile.Reader[names.length];
       for (int i = 0; i < names.length; i++) {
         parts[i] = new MapFile.Reader(fs, names[i].toString(), conf);
       }
       return parts;
     }
-    
-    private Writable getEntry(MapFile.Reader[] readers, Text url,
-                              Writable entry) throws IOException {
+
+    private Writable getEntry(MapFile.Reader[] readers, Text url, Writable entry)
+        throws IOException {
       return MapFileOutputFormat.getEntry(readers, PARTITIONER, url, entry);
     }
 
     public void close() throws IOException {
-      if (content != null) { closeReaders(content); }
-      if (parseText != null) { closeReaders(parseText); }
-      if (parseData != null) { closeReaders(parseData); }
-      if (crawl != null) { closeReaders(crawl); }
+      if (content != null) {
+        closeReaders(content);
+      }
+      if (parseText != null) {
+        closeReaders(parseText);
+      }
+      if (parseData != null) {
+        closeReaders(parseData);
+      }
+      if (crawl != null) {
+        closeReaders(crawl);
+      }
     }
 
     private void closeReaders(MapFile.Reader[] readers) throws IOException {
@@ -158,14 +181,14 @@
     }
 
   }
-  
+
   public SegmentHandler(Configuration conf, Path name) throws Exception {
     seg = new Segment(FileSystem.get(conf), name, conf);
   }
 
   @Override
   public void handle(Request req, HttpServletResponse res, String target,
-          int dispatch) throws IOException, ServletException {
+      int dispatch) throws IOException, ServletException {
     try {
       String uri = req.getUri().toString();
       LOG.info("URI: " + uri);
@@ -175,17 +198,18 @@
       if (cd != null) {
         addMyHeader(res, "Res", "found");
         LOG.info("-got " + cd.toString());
-        ProtocolStatus ps = (ProtocolStatus)cd.getMetaData().get(Nutch.WRITABLE_PROTO_STATUS_KEY);
+        ProtocolStatus ps = (ProtocolStatus) cd.getMetaData().get(
+            Nutch.WRITABLE_PROTO_STATUS_KEY);
         if (ps != null) {
           Integer TrCode = protoCodes.get(ps.getCode());
           if (TrCode != null) {
-            res.setStatus(TrCode.intValue());            
+            res.setStatus(TrCode.intValue());
           } else {
             res.setStatus(HttpServletResponse.SC_OK);
           }
           addMyHeader(res, "ProtocolStatus", ps.toString());
         } else {
-          res.setStatus(HttpServletResponse.SC_OK);          
+          res.setStatus(HttpServletResponse.SC_OK);
         }
         Content c = seg.getContent(url);
         if (c == null) { // missing content
Index: src/java/org/apache/nutch/tools/proxy/FakeHandler.java
===================================================================
--- src/java/org/apache/nutch/tools/proxy/FakeHandler.java	(revision 1188252)
+++ src/java/org/apache/nutch/tools/proxy/FakeHandler.java	(working copy)
@@ -1,4 +1,5 @@
 package org.apache.nutch.tools.proxy;
+
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -29,22 +30,20 @@
 public class FakeHandler extends AbstractTestbedHandler {
   Random r = new Random(1234567890L); // predictable
 
-  private static final String testA = 
-    "<html><body><h1>Internet Weather Forecast Accuracy</h1>\n" + 
-    "<p>Weather forecasting is a secure and popular online presence, which is understandable. The weather affects most everyone's life, and the Internet can provide information on just about any location at any hour of the day or night. But how accurate is this information? How much can we trust it? Perhaps it is just my skeptical nature (or maybe the seeming unpredictability of nature), but I've never put much weight into weather forecasts - especially those made more than three days in advance. That skepticism progressed to a new high in the Summer of 2004, but I have only now done the research necessary to test the accuracy of online weather forecasts. First the story, then the data.</p>" +
-    "<h2>An Internet Weather Forecast Gone Terribly Awry</h2>" +
-    "<p>It was the Summer of 2004 and my wife and I were gearing up for a trip with another couple to Schlitterbahn in New Braunfels - one of the (if not the) best waterparks ever created. As a matter of course when embarking on a 2.5-hour drive to spend the day in a swimsuit, and given the tendency of the area for natural disasters, we checked the weather. The temperatures looked ideal and, most importantly, the chance of rain was a nice round goose egg.</p>";
-  private static final String testB =
-    "<p>A couple of hours into our Schlitterbahn experience, we got on a bus to leave the 'old section' for the 'new section.' Along the way, clouds gathered and multiple claps of thunder sounded. 'So much for the 0% chance of rain,' I commented. By the time we got to our destination, lightning sightings had led to the slides and pools being evacuated and soon the rain began coming down in torrents - accompanied by voluminous lightning flashes. After at least a half an hour the downpour had subsided, but the lightning showed no sign of letting up, so we began heading back to our vehicles. A hundred yards into the parking lot, we passing a tree that had apparently been split in two during the storm (whether by lightning or wind, I'm not sure). Not but a few yards later, there was a distinct thud and the husband of the couple accompanying us cried out as a near racquetball sized hunk of ice rebounded off of his head and onto the concrete. Soon, similarly sized hail was falling all around us as everyone scampered for cover. Some cowered under overturned trashcans while others were more fortunate and made it indoors.</p>" +
-    "<p>The hail, rain and lightning eventually subsided, but the most alarming news was waiting on cell phone voicemail. A friend who lived in the area had called frantically, knowing we were at the park, as the local news was reporting multiple people had been by struck by lightning at Schlitterbahn during the storm.</p>" +
-    "<p>'So much for the 0% chance of rain,' I repeated.</p></body></html>";
+  private static final String testA = "<html><body><h1>Internet Weather Forecast Accuracy</h1>\n"
+      + "<p>Weather forecasting is a secure and popular online presence, which is understandable. The weather affects most everyone's life, and the Internet can provide information on just about any location at any hour of the day or night. But how accurate is this information? How much can we trust it? Perhaps it is just my skeptical nature (or maybe the seeming unpredictability of nature), but I've never put much weight into weather forecasts - especially those made more than three days in advance. That skepticism progressed to a new high in the Summer of 2004, but I have only now done the research necessary to test the accuracy of online weather forecasts. First the story, then the data.</p>"
+      + "<h2>An Internet Weather Forecast Gone Terribly Awry</h2>"
+      + "<p>It was the Summer of 2004 and my wife and I were gearing up for a trip with another couple to Schlitterbahn in New Braunfels - one of the (if not the) best waterparks ever created. As a matter of course when embarking on a 2.5-hour drive to spend the day in a swimsuit, and given the tendency of the area for natural disasters, we checked the weather. The temperatures looked ideal and, most importantly, the chance of rain was a nice round goose egg.</p>";
+  private static final String testB = "<p>A couple of hours into our Schlitterbahn experience, we got on a bus to leave the 'old section' for the 'new section.' Along the way, clouds gathered and multiple claps of thunder sounded. 'So much for the 0% chance of rain,' I commented. By the time we got to our destination, lightning sightings had led to the slides and pools being evacuated and soon the rain began coming down in torrents - accompanied by voluminous lightning flashes. After at least a half an hour the downpour had subsided, but the lightning showed no sign of letting up, so we began heading back to our vehicles. A hundred yards into the parking lot, we passing a tree that had apparently been split in two during the storm (whether by lightning or wind, I'm not sure). Not but a few yards later, there was a distinct thud and the husband of the couple accompanying us cried out as a near racquetball sized hunk of ice rebounded off of his head and onto the concrete. Soon, similarly sized hail was falling all around us as everyone scampered for cover. Some cowered under overturned trashcans while others were more fortunate and made it indoors.</p>"
+      + "<p>The hail, rain and lightning eventually subsided, but the most alarming news was waiting on cell phone voicemail. A friend who lived in the area had called frantically, knowing we were at the park, as the local news was reporting multiple people had been by struck by lightning at Schlitterbahn during the storm.</p>"
+      + "<p>'So much for the 0% chance of rain,' I repeated.</p></body></html>";
 
   @Override
-  public void handle(Request req, HttpServletResponse res, String target, 
-          int dispatch) throws IOException, ServletException {
+  public void handle(Request req, HttpServletResponse res, String target,
+      int dispatch) throws IOException, ServletException {
     HttpURI u = req.getUri();
     String uri = u.toString();
-    //System.err.println("-faking " + uri.toString());
+    // System.err.println("-faking " + uri.toString());
     addMyHeader(res, "URI", uri);
     // don't pass it down the chain
     req.setHandled(true);
@@ -68,8 +67,10 @@
         base = u.getPath();
       }
       String prefix = u.getScheme() + "://" + u.getHost();
-      if (u.getPort() != 80 && u.getPort() != -1) base += ":" + u.getPort();
-      if (!base.startsWith("/")) prefix += "/";
+      if (u.getPort() != 80 && u.getPort() != -1)
+        base += ":" + u.getPort();
+      if (!base.startsWith("/"))
+        prefix += "/";
       prefix = prefix + base;
       for (int i = 0; i < 10; i++) {
         String link = "<p><a href='" + prefix;
@@ -82,18 +83,20 @@
       // fake a few links to random nonexistent hosts
       for (int i = 0; i < 5; i++) {
         int h = r.nextInt(1000000); // 1 mln hosts
-        String link = "<p><a href='http://www.fake-" + h + ".com/'>fake host " + h + "</a></p>\r\n";
+        String link = "<p><a href='http://www.fake-" + h + ".com/'>fake host "
+            + h + "</a></p>\r\n";
         os.write(link.getBytes());
       }
       // fake a link to the root URL
       String link = "<p><a href='" + u.getScheme() + "://" + u.getHost();
-      if (u.getPort() != 80 && u.getPort() != -1) link += ":" + u.getPort();
+      if (u.getPort() != 80 && u.getPort() != -1)
+        link += ":" + u.getPort();
       link += "/'>site " + u.getHost() + "</a></p>\r\n";
       os.write(link.getBytes());
       os.write(testB.getBytes());
       res.flushBuffer();
     } catch (IOException ioe) {
-    }    
+    }
   }
 
 }
Index: src/java/org/apache/nutch/tools/proxy/LogDebugHandler.java
===================================================================
--- src/java/org/apache/nutch/tools/proxy/LogDebugHandler.java	(revision 1188252)
+++ src/java/org/apache/nutch/tools/proxy/LogDebugHandler.java	(working copy)
@@ -1,4 +1,5 @@
 package org.apache.nutch.tools.proxy;
+
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -31,29 +32,33 @@
 import org.mortbay.jetty.Request;
 
 public class LogDebugHandler extends AbstractTestbedHandler implements Filter {
-  private static final Logger LOG = LoggerFactory.getLogger(LogDebugHandler.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(LogDebugHandler.class);
 
   @Override
   public void handle(Request req, HttpServletResponse res, String target,
-          int dispatch) throws IOException, ServletException {
-    LOG.info("-- " + req.getMethod() + " " + req.getUri().toString() + "\n" + req.getConnection().getRequestFields());
+      int dispatch) throws IOException, ServletException {
+    LOG.info("-- " + req.getMethod() + " " + req.getUri().toString() + "\n"
+        + req.getConnection().getRequestFields());
   }
 
   @Override
   public void doFilter(ServletRequest req, ServletResponse res,
-          FilterChain chain) throws IOException, ServletException {
-    ((HttpServletResponse)res).addHeader("X-Handled-By", "AsyncProxyHandler");
-    ((HttpServletResponse)res).addHeader("X-TestbedHandlers", "AsyncProxyHandler");
+      FilterChain chain) throws IOException, ServletException {
+    ((HttpServletResponse) res).addHeader("X-Handled-By", "AsyncProxyHandler");
+    ((HttpServletResponse) res).addHeader("X-TestbedHandlers",
+        "AsyncProxyHandler");
     try {
       chain.doFilter(req, res);
     } catch (Throwable e) {
-      ((HttpServletResponse)res).sendError(HttpServletResponse.SC_BAD_REQUEST, e.toString());
+      ((HttpServletResponse) res).sendError(HttpServletResponse.SC_BAD_REQUEST,
+          e.toString());
     }
   }
 
   @Override
   public void init(FilterConfig arg0) throws ServletException {
     // TODO Auto-generated method stub
-    
+
   }
 }
Index: src/java/org/apache/nutch/tools/proxy/NotFoundHandler.java
===================================================================
--- src/java/org/apache/nutch/tools/proxy/NotFoundHandler.java	(revision 1188252)
+++ src/java/org/apache/nutch/tools/proxy/NotFoundHandler.java	(working copy)
@@ -1,4 +1,5 @@
 package org.apache.nutch.tools.proxy;
+
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -27,13 +28,13 @@
 
   @Override
   public void handle(Request req, HttpServletResponse res, String target,
-          int dispatch) throws IOException, ServletException {
+      int dispatch) throws IOException, ServletException {
     // don't pass it down the chain
     req.setHandled(true);
     res.addHeader("X-Handled-By", getClass().getSimpleName());
     addMyHeader(res, "URI", req.getUri().toString());
-    res.sendError(HttpServletResponse.SC_NOT_FOUND, "Not found: " +
-            req.getUri().toString());
+    res.sendError(HttpServletResponse.SC_NOT_FOUND, "Not found: "
+        + req.getUri().toString());
   }
 
 }
Index: src/java/org/apache/nutch/tools/proxy/AbstractTestbedHandler.java
===================================================================
--- src/java/org/apache/nutch/tools/proxy/AbstractTestbedHandler.java	(revision 1188252)
+++ src/java/org/apache/nutch/tools/proxy/AbstractTestbedHandler.java	(working copy)
@@ -1,4 +1,5 @@
 package org.apache.nutch.tools.proxy;
+
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -30,16 +31,17 @@
 
   @Override
   public void handle(String target, HttpServletRequest req,
-          HttpServletResponse res, int dispatch) throws IOException,
-          ServletException {
-    Request base_request = (req instanceof Request) ? (Request)req : HttpConnection.getCurrentConnection().getRequest();
+      HttpServletResponse res, int dispatch) throws IOException,
+      ServletException {
+    Request base_request = (req instanceof Request) ? (Request) req
+        : HttpConnection.getCurrentConnection().getRequest();
     res.addHeader("X-TestbedHandlers", this.getClass().getSimpleName());
     handle(base_request, res, target, dispatch);
   }
-  
-  public abstract void handle(Request req, HttpServletResponse res, String target,
-          int dispatch) throws IOException, ServletException;
-  
+
+  public abstract void handle(Request req, HttpServletResponse res,
+      String target, int dispatch) throws IOException, ServletException;
+
   public void addMyHeader(HttpServletResponse res, String name, String value) {
     name = "X-" + this.getClass().getSimpleName() + "-" + name;
     res.addHeader(name, value);
Index: src/java/org/apache/nutch/tools/proxy/DelayHandler.java
===================================================================
--- src/java/org/apache/nutch/tools/proxy/DelayHandler.java	(revision 1188252)
+++ src/java/org/apache/nutch/tools/proxy/DelayHandler.java	(working copy)
@@ -1,4 +1,5 @@
 package org.apache.nutch.tools.proxy;
+
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -25,13 +26,13 @@
 import org.mortbay.jetty.Request;
 
 public class DelayHandler extends AbstractTestbedHandler {
-  
+
   public static final long DEFAULT_DELAY = 2000;
-  
+
   private int delay;
   private boolean random;
   private Random r;
-  
+
   public DelayHandler(int delay) {
     if (delay < 0) {
       delay = -delay;
@@ -43,13 +44,13 @@
 
   @Override
   public void handle(Request req, HttpServletResponse res, String target,
-          int dispatch) throws IOException, ServletException {
+      int dispatch) throws IOException, ServletException {
     try {
       int del = random ? r.nextInt(delay) : delay;
       Thread.sleep(del);
       addMyHeader(res, "Delay", String.valueOf(del));
     } catch (Exception e) {
-      
+
     }
   }
 }
Index: src/java/org/apache/nutch/tools/proxy/TestbedProxy.java
===================================================================
--- src/java/org/apache/nutch/tools/proxy/TestbedProxy.java	(revision 1188252)
+++ src/java/org/apache/nutch/tools/proxy/TestbedProxy.java	(working copy)
@@ -1,4 +1,5 @@
 package org.apache.nutch.tools.proxy;
+
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -44,17 +45,25 @@
    */
   public static void main(String[] args) throws Exception {
     if (args.length == 0) {
-      System.err.println("TestbedProxy [-seg <segment_name> | -segdir <segments>] [-port <nnn>] [-forward] [-fake] [-delay nnn] [-debug]");
-      System.err.println("-seg <segment_name>\tpath to a single segment (can be specified multiple times)");
-      System.err.println("-segdir <segments>\tpath to a parent directory of multiple segments (as above)");
-      System.err.println("-port <nnn>\trun the proxy on port <nnn> (special permissions may be needed for ports < 1024)");
-      System.err.println("-forward\tif specified, requests to all unknown urls will be passed to");
-      System.err.println("\t\toriginal servers. If false (default) unknown urls generate 404 Not Found.");
-      System.err.println("-delay\tdelay every response by nnn seconds. If delay is negative use a random value up to nnn");
-      System.err.println("-fake\tif specified, requests to all unknown urls will succeed with fake content");
+      System.err
+          .println("TestbedProxy [-seg <segment_name> | -segdir <segments>] [-port <nnn>] [-forward] [-fake] [-delay nnn] [-debug]");
+      System.err
+          .println("-seg <segment_name>\tpath to a single segment (can be specified multiple times)");
+      System.err
+          .println("-segdir <segments>\tpath to a parent directory of multiple segments (as above)");
+      System.err
+          .println("-port <nnn>\trun the proxy on port <nnn> (special permissions may be needed for ports < 1024)");
+      System.err
+          .println("-forward\tif specified, requests to all unknown urls will be passed to");
+      System.err
+          .println("\t\toriginal servers. If false (default) unknown urls generate 404 Not Found.");
+      System.err
+          .println("-delay\tdelay every response by nnn seconds. If delay is negative use a random value up to nnn");
+      System.err
+          .println("-fake\tif specified, requests to all unknown urls will succeed with fake content");
       System.exit(-1);
     }
-    
+
     Configuration conf = NutchConfiguration.create();
     int port = conf.getInt("segment.proxy.port", 8181);
     boolean forward = false;
@@ -62,7 +71,7 @@
     boolean delay = false;
     boolean debug = false;
     int delayVal = 0;
-    
+
     HashSet<Path> segs = new HashSet<Path>();
     for (int i = 0; i < args.length; i++) {
       if (args[i].equals("-segdir")) {
@@ -88,28 +97,30 @@
         System.exit(-1);
       }
     }
-    
+
     // Create the server
     Server server = new Server();
     SocketConnector connector = new SocketConnector();
     connector.setPort(port);
     connector.setResolveNames(false);
     server.addConnector(connector);
-    
+
     // create a list of handlers
     HandlerList list = new HandlerList();
     server.addHandler(list);
-    
+
     if (debug) {
       LOG.info("* Added debug handler.");
       list.addHandler(new LogDebugHandler());
     }
- 
+
     if (delay) {
-      LOG.info("* Added delay handler: " + (delayVal < 0 ? "random delay up to " + (-delayVal) : "constant delay of " + delayVal));
+      LOG.info("* Added delay handler: "
+          + (delayVal < 0 ? "random delay up to " + (-delayVal)
+              : "constant delay of " + delayVal));
       list.addHandler(new DelayHandler(delayVal));
     }
-    
+
     // XXX alternatively, we can add the DispatchHandler as the first one,
     // XXX to activate handler plugins and redirect requests to appropriate
     // XXX handlers ... Here we always load these handlers
@@ -122,7 +133,8 @@
         list.addHandler(segment);
         LOG.info("* Added segment handler for: " + p);
       } catch (Exception e) {
-        LOG.warn("Skipping segment '" + p + "': " + StringUtils.stringifyException(e));
+        LOG.warn("Skipping segment '" + p + "': "
+            + StringUtils.stringifyException(e));
       }
     }
     if (forward) {
Index: src/java/org/apache/nutch/tools/arc/ArcRecordReader.java
===================================================================
--- src/java/org/apache/nutch/tools/arc/ArcRecordReader.java	(revision 1188252)
+++ src/java/org/apache/nutch/tools/arc/ArcRecordReader.java	(working copy)
@@ -34,23 +34,29 @@
 import org.apache.hadoop.util.StringUtils;
 
 /**
- * <p>The <code>ArchRecordReader</code> class provides a record reader which 
- * reads records from arc files.</p>
+ * <p>
+ * The <code>ArchRecordReader</code> class provides a record reader which reads
+ * records from arc files.
+ * </p>
  * 
- * <p>Arc files are essentially tars of gzips.  Each record in an arc file is
- * a compressed gzip.  Multiple records are concatenated together to form a
- * complete arc.  For more information on the arc file format see
- * {@link http://www.archive.org/web/researcher/ArcFileFormat.php}.</p>
+ * <p>
+ * Arc files are essentially tars of gzips. Each record in an arc file is a
+ * compressed gzip. Multiple records are concatenated together to form a
+ * complete arc. For more information on the arc file format see {@link http
+ * ://www.archive.org/web/researcher/ArcFileFormat.php}.
+ * </p>
  * 
- * <p>Arc files are used by the internet archive and grub projects.</p>
+ * <p>
+ * Arc files are used by the internet archive and grub projects.
+ * </p>
  * 
  * @see http://www.archive.org/
  * @see http://www.grub.org/
  */
-public class ArcRecordReader
-  implements RecordReader<Text, BytesWritable> {
+public class ArcRecordReader implements RecordReader<Text, BytesWritable> {
 
-  public static final Logger LOG = LoggerFactory.getLogger(ArcRecordReader.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(ArcRecordReader.class);
 
   protected Configuration conf;
   protected long splitStart = 0;
@@ -60,30 +66,32 @@
   protected long fileLen = 0;
   protected FSDataInputStream in;
 
-  private static byte[] MAGIC = {(byte)0x1F, (byte)0x8B};
+  private static byte[] MAGIC = { (byte) 0x1F, (byte) 0x8B };
 
   /**
-   * <p>Returns true if the byte array passed matches the gzip header magic 
-   * number.</p>
+   * <p>
+   * Returns true if the byte array passed matches the gzip header magic number.
+   * </p>
    * 
-   * @param input The byte array to check.
+   * @param input
+   *          The byte array to check.
    * 
    * @return True if the byte array matches the gzip header magic number.
    */
   public static boolean isMagic(byte[] input) {
 
-	// check for null and incorrect length
+    // check for null and incorrect length
     if (input == null || input.length != MAGIC.length) {
       return false;
     }
-    
+
     // check byte by byte
     for (int i = 0; i < MAGIC.length; i++) {
       if (MAGIC[i] != input[i]) {
         return false;
       }
     }
-    
+
     // must match
     return true;
   }
@@ -91,13 +99,16 @@
   /**
    * Constructor that sets the configuration and file split.
    * 
-   * @param conf The job configuration.
-   * @param split The file split to read from.
+   * @param conf
+   *          The job configuration.
+   * @param split
+   *          The file split to read from.
    * 
-   * @throws IOException  If an IO error occurs while initializing file split.
+   * @throws IOException
+   *           If an IO error occurs while initializing file split.
    */
   public ArcRecordReader(Configuration conf, FileSplit split)
-    throws IOException {
+      throws IOException {
 
     Path path = split.getPath();
     FileSystem fs = path.getFileSystem(conf);
@@ -113,8 +124,7 @@
   /**
    * Closes the record reader resources.
    */
-  public void close()
-    throws IOException {
+  public void close() throws IOException {
     this.in.close();
   }
 
@@ -122,14 +132,15 @@
    * Creates a new instance of the <code>Text</code> object for the key.
    */
   public Text createKey() {
-    return (Text)ReflectionUtils.newInstance(Text.class, conf);
+    return (Text) ReflectionUtils.newInstance(Text.class, conf);
   }
 
   /**
    * Creates a new instance of the <code>BytesWritable</code> object for the key
    */
   public BytesWritable createValue() {
-    return (BytesWritable)ReflectionUtils.newInstance(BytesWritable.class, conf);
+    return (BytesWritable) ReflectionUtils.newInstance(BytesWritable.class,
+        conf);
   }
 
   /**
@@ -137,63 +148,64 @@
    * 
    * @return The long of the current position in the file.
    */
-  public long getPos()
-    throws IOException {
+  public long getPos() throws IOException {
     return in.getPos();
   }
 
   /**
-   * Returns the percentage of progress in processing the file.  This will be
+   * Returns the percentage of progress in processing the file. This will be
    * represented as a float from 0 to 1 with 1 being 100% completed.
    * 
    * @return The percentage of progress as a float from 0 to 1.
    */
-  public float getProgress()
-    throws IOException {
-	  
+  public float getProgress() throws IOException {
+
     // if we haven't even started
     if (splitEnd == splitStart) {
       return 0.0f;
+    } else {
+      // the progress is current pos - where we started / length of the split
+      return Math.min(1.0f, (getPos() - splitStart) / (float) splitLen);
     }
-    else {
-      // the progress is current pos - where we started  / length of the split
-      return Math.min(1.0f, (getPos() - splitStart) / (float)splitLen);
-    }
   }
 
   /**
-   * <p>Returns true if the next record in the split is read into the key and 
-   * value pair.  The key will be the arc record header and the values will be
-   * the raw content bytes of the arc record.</p>
+   * <p>
+   * Returns true if the next record in the split is read into the key and value
+   * pair. The key will be the arc record header and the values will be the raw
+   * content bytes of the arc record.
+   * </p>
    * 
-   * @param key The record key
-   * @param value The record value
+   * @param key
+   *          The record key
+   * @param value
+   *          The record value
    * 
    * @return True if the next record is read.
    * 
-   * @throws IOException If an error occurs while reading the record value.
+   * @throws IOException
+   *           If an error occurs while reading the record value.
    */
-  public boolean next(Text key, BytesWritable value)
-    throws IOException {
+  public boolean next(Text key, BytesWritable value) throws IOException {
 
     try {
-      
+
       // get the starting position on the input stream
       long startRead = in.getPos();
       byte[] magicBuffer = null;
-      
+
       // we need this loop to handle false positives in reading of gzip records
       while (true) {
-        
+
         // while we haven't passed the end of the split
         if (startRead >= splitEnd) {
           return false;
         }
-        
+
         // scanning for the gzip header
         boolean foundStart = false;
         while (!foundStart) {
-          
+
           // start at the current file position and scan for 1K at time, break
           // if there is no more to read
           startRead = in.getPos();
@@ -202,13 +214,13 @@
           if (read < 0) {
             break;
           }
-          
-          // scan the byte array for the gzip header magic number.  This happens
+
+          // scan the byte array for the gzip header magic number. This happens
           // byte by byte
           for (int i = 0; i < read - 1; i++) {
             byte[] testMagic = new byte[2];
-            System.arraycopy(magicBuffer, i, testMagic, 0, 2);            
-            if (isMagic(testMagic)) {              
+            System.arraycopy(magicBuffer, i, testMagic, 0, 2);
+            if (isMagic(testMagic)) {
               // set the next start to the current gzip header
               startRead += i;
               foundStart = true;
@@ -216,14 +228,14 @@
             }
           }
         }
-        
+
         // seek to the start of the gzip header
         in.seek(startRead);
         ByteArrayOutputStream baos = null;
         int totalRead = 0;
 
         try {
-          
+
           // read 4K of the gzip at a time putting into a byte array
           byte[] buffer = new byte[4096];
           GZIPInputStream zin = new GZIPInputStream(in);
@@ -233,9 +245,8 @@
             baos.write(buffer, 0, gzipRead);
             totalRead += gzipRead;
           }
-        }
-        catch (Exception e) {
-          
+        } catch (Exception e) {
+
           // there are times we get false positives where the gzip header exists
           // but it is not an actual gzip record, so we ignore it and start
           // over seeking
@@ -248,7 +259,7 @@
 
         // change the output stream to a byte array
         byte[] content = baos.toByteArray();
-        
+
         // the first line of the raw content in arc files is the header
         int eol = 0;
         for (int i = 0; i < content.length; i++) {
@@ -257,34 +268,33 @@
             break;
           }
         }
-        
+
         // create the header and the raw content minus the header
         String header = new String(content, 0, eol).trim();
         byte[] raw = new byte[(content.length - eol) - 1];
         System.arraycopy(content, eol + 1, raw, 0, raw.length);
-        
+
         // populate key and values with the header and raw content.
-        Text keyText = (Text)key;
+        Text keyText = (Text) key;
         keyText.set(header);
-        BytesWritable valueBytes = (BytesWritable)value;
+        BytesWritable valueBytes = (BytesWritable) value;
         valueBytes.set(raw, 0, raw.length);
 
-        // TODO: It would be best to start at the end of the gzip read but 
-        // the bytes read in gzip don't match raw bytes in the file so we 
-        // overshoot the next header.  With this current method you get
+        // TODO: It would be best to start at the end of the gzip read but
+        // the bytes read in gzip don't match raw bytes in the file so we
+        // overshoot the next header. With this current method you get
         // some false positives but don't miss records.
         if (startRead + 1 < fileLen) {
           in.seek(startRead + 1);
         }
-        
+
         // populated the record, now return
         return true;
       }
+    } catch (Exception e) {
+      LOG.equals(StringUtils.stringifyException(e));
     }
-    catch (Exception e) {
-      LOG.equals(StringUtils.stringifyException(e));      
-    }
-    
+
     // couldn't populate the record or there is no next record to read
     return false;
   }
Index: src/java/org/apache/nutch/tools/arc/ArcSegmentCreator.java
===================================================================
--- src/java/org/apache/nutch/tools/arc/ArcSegmentCreator.java	(revision 1188252)
+++ src/java/org/apache/nutch/tools/arc/ArcSegmentCreator.java	(working copy)
@@ -62,18 +62,22 @@
 import org.apache.nutch.util.TimingUtil;
 
 /**
- * <p>The <code>ArcSegmentCreator</code> is a replacement for fetcher that will
- * take arc files as input and produce a nutch segment as output.</p>
+ * <p>
+ * The <code>ArcSegmentCreator</code> is a replacement for fetcher that will
+ * take arc files as input and produce a nutch segment as output.
+ * </p>
  * 
- * <p>Arc files are tars of compressed gzips which are produced by both the
- * internet archive project and the grub distributed crawler project.</p>
+ * <p>
+ * Arc files are tars of compressed gzips which are produced by both the
+ * internet archive project and the grub distributed crawler project.
+ * </p>
  * 
  */
-public class ArcSegmentCreator
-  extends Configured
-  implements Tool, Mapper<Text, BytesWritable, Text, NutchWritable> {
+public class ArcSegmentCreator extends Configured implements Tool,
+    Mapper<Text, BytesWritable, Text, NutchWritable> {
 
-  public static final Logger LOG = LoggerFactory.getLogger(ArcSegmentCreator.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(ArcSegmentCreator.class);
   public static final String URL_VERSION = "arc.url.version";
   private JobConf jobConf;
   private URLFilters urlFilters;
@@ -89,7 +93,9 @@
   }
 
   /**
-   * <p>Constructor that sets the job configuration.</p>
+   * <p>
+   * Constructor that sets the job configuration.
+   * </p>
    * 
    * @param conf
    */
@@ -105,17 +111,19 @@
   public static synchronized String generateSegmentName() {
     try {
       Thread.sleep(1000);
+    } catch (Throwable t) {
     }
-    catch (Throwable t) {
-    }
     return sdf.format(new Date(System.currentTimeMillis()));
   }
 
   /**
-   * <p>Configures the job.  Sets the url filters, scoring filters, url normalizers
-   * and other relevant data.</p>
+   * <p>
+   * Configures the job. Sets the url filters, scoring filters, url normalizers
+   * and other relevant data.
+   * </p>
    * 
-   * @param job The job configuration.
+   * @param job
+   *          The job configuration.
    */
   public void configure(JobConf job) {
 
@@ -133,23 +141,31 @@
   }
 
   /**
-   * <p>Parses the raw content of a single record to create output.  This method
-   * is almost the same as the {@link org.apache.nutch.Fetcher#output} method in
-   * terms of processing and output.  
+   * <p>
+   * Parses the raw content of a single record to create output. This method is
+   * almost the same as the {@link org.apache.nutch.Fetcher#output} method in
+   * terms of processing and output.
    * 
-   * @param output  The job output collector.
-   * @param segmentName The name of the segment to create.
-   * @param key The url of the record.
-   * @param datum The CrawlDatum of the record.
-   * @param content The raw content of the record
-   * @param pstatus The protocol status
-   * @param status The fetch status.
+   * @param output
+   *          The job output collector.
+   * @param segmentName
+   *          The name of the segment to create.
+   * @param key
+   *          The url of the record.
+   * @param datum
+   *          The CrawlDatum of the record.
+   * @param content
+   *          The raw content of the record
+   * @param pstatus
+   *          The protocol status
+   * @param status
+   *          The fetch status.
    * 
    * @return The result of the parse in a ParseStatus object.
    */
-  private ParseStatus output(OutputCollector<Text, NutchWritable> output, String segmentName,
-    Text key, CrawlDatum datum, Content content, ProtocolStatus pstatus,
-    int status) {
+  private ParseStatus output(OutputCollector<Text, NutchWritable> output,
+      String segmentName, Text key, CrawlDatum datum, Content content,
+      ProtocolStatus pstatus, int status) {
 
     // set the fetch status and the fetch time
     datum.setStatus(status);
@@ -165,8 +181,7 @@
       // add score to content metadata so that ParseSegment can pick it up.
       try {
         scfilters.passScoreBeforeParsing(key, datum, content);
-      }
-      catch (Exception e) {
+      } catch (Exception e) {
         if (LOG.isWarnEnabled()) {
           e.printStackTrace(LogUtil.getWarnStream(LOG));
           LOG.warn("Couldn't pass score, url " + key + " (" + e + ")");
@@ -177,16 +192,15 @@
 
         // parse the content
         parseResult = this.parseUtil.parse(content);
-      }
-      catch (Exception e) {
+      } catch (Exception e) {
         LOG.warn("Error parsing: " + key + ": "
-          + StringUtils.stringifyException(e));
+            + StringUtils.stringifyException(e));
       }
 
       // set the content signature
       if (parseResult == null) {
         byte[] signature = SignatureFactory.getSignature(getConf()).calculate(
-          content, new ParseStatus().getEmptyParse(getConf()));
+            content, new ParseStatus().getEmptyParse(getConf()));
         datum.setSignature(signature);
       }
 
@@ -195,7 +209,7 @@
         output.collect(key, new NutchWritable(content));
 
         if (parseResult != null) {
-          for (Entry <Text, Parse> entry : parseResult) {
+          for (Entry<Text, Parse> entry : parseResult) {
             Text url = entry.getKey();
             Parse parse = entry.getValue();
             ParseStatus parseStatus = parse.getData().getStatus();
@@ -205,36 +219,35 @@
               parse = parseStatus.getEmptyParse(getConf());
             }
 
-            // Calculate page signature. 
-            byte[] signature = SignatureFactory.getSignature(getConf()).calculate(
-              content, parse);
+            // Calculate page signature.
+            byte[] signature = SignatureFactory.getSignature(getConf())
+                .calculate(content, parse);
             // Ensure segment name and score are in parseData metadata
-            parse.getData().getContentMeta().set(Nutch.SEGMENT_NAME_KEY,
-              segmentName);
-            parse.getData().getContentMeta().set(Nutch.SIGNATURE_KEY,
-              StringUtil.toHexString(signature));
+            parse.getData().getContentMeta()
+                .set(Nutch.SEGMENT_NAME_KEY, segmentName);
+            parse.getData().getContentMeta()
+                .set(Nutch.SIGNATURE_KEY, StringUtil.toHexString(signature));
             // Pass fetch time to content meta
-            parse.getData().getContentMeta().set(Nutch.FETCH_TIME_KEY,
-              Long.toString(datum.getFetchTime()));
+            parse.getData().getContentMeta()
+                .set(Nutch.FETCH_TIME_KEY, Long.toString(datum.getFetchTime()));
             if (url.equals(key))
               datum.setSignature(signature);
             try {
               scfilters.passScoreAfterParsing(url, content, parse);
-            }
-            catch (Exception e) {
+            } catch (Exception e) {
               if (LOG.isWarnEnabled()) {
                 e.printStackTrace(LogUtil.getWarnStream(LOG));
                 LOG.warn("Couldn't pass score, url " + key + " (" + e + ")");
               }
             }
             output.collect(url, new NutchWritable(new ParseImpl(new ParseText(
-              parse.getText()), parse.getData(), parse.isCanonical())));
+                parse.getText()), parse.getData(), parse.isCanonical())));
           }
         }
-      }
-      catch (IOException e) {
+      } catch (IOException e) {
         if (LOG.isErrorEnabled()) {
-          LOG.error("ArcSegmentCreator caught:" + StringUtils.stringifyException(e));
+          LOG.error("ArcSegmentCreator caught:"
+              + StringUtils.stringifyException(e));
         }
       }
 
@@ -246,42 +259,51 @@
         }
       }
     }
-    
+
     return null;
   }
 
   /**
-   * <p>Logs any error that occurs during conversion.</p>
+   * <p>
+   * Logs any error that occurs during conversion.
+   * </p>
    * 
-   * @param url The url we are parsing.
-   * @param t The error that occured.
+   * @param url
+   *          The url we are parsing.
+   * @param t
+   *          The error that occured.
    */
   private void logError(Text url, Throwable t) {
     if (LOG.isInfoEnabled()) {
-      LOG.info("Conversion of " + url + " failed with: " + 
-          StringUtils.stringifyException(t));
+      LOG.info("Conversion of " + url + " failed with: "
+          + StringUtils.stringifyException(t));
     }
   }
 
   /**
-   * <p>Runs the Map job to translate an arc record into output for Nutch 
-   * segments.</p>
+   * <p>
+   * Runs the Map job to translate an arc record into output for Nutch segments.
+   * </p>
    * 
-   * @param key The arc record header.
-   * @param bytes The arc record raw content bytes.
-   * @param output The output collecter.
-   * @param reporter The progress reporter.
+   * @param key
+   *          The arc record header.
+   * @param bytes
+   *          The arc record raw content bytes.
+   * @param output
+   *          The output collecter.
+   * @param reporter
+   *          The progress reporter.
    */
   public void map(Text key, BytesWritable bytes,
-    OutputCollector<Text, NutchWritable> output, Reporter reporter)
-    throws IOException {
+      OutputCollector<Text, NutchWritable> output, Reporter reporter)
+      throws IOException {
 
     String[] headers = key.toString().split("\\s+");
     String urlStr = headers[0];
     String version = headers[2];
     String contentType = headers[3];
-    
-    // arcs start with a file description.  for now we ignore this as it is not
+
+    // arcs start with a file description. for now we ignore this as it is not
     // a content record
     if (urlStr.startsWith("filedesc://")) {
       LOG.info("Ignoring file header: " + urlStr);
@@ -289,18 +311,17 @@
     }
     LOG.info("Processing: " + urlStr);
 
-    // get the raw  bytes from the arc file, create a new crawldatum
+    // get the raw bytes from the arc file, create a new crawldatum
     Text url = new Text();
     CrawlDatum datum = new CrawlDatum(CrawlDatum.STATUS_DB_FETCHED, interval,
-      1.0f);
+        1.0f);
     String segmentName = getConf().get(Nutch.SEGMENT_NAME_KEY);
 
     // normalize and filter the urls
     try {
       urlStr = normalizers.normalize(urlStr, URLNormalizers.SCOPE_FETCHER);
       urlStr = urlFilters.filter(urlStr); // filter the url
-    }
-    catch (Exception e) {
+    } catch (Exception e) {
       if (LOG.isWarnEnabled()) {
         LOG.warn("Skipping " + url + ":" + e);
       }
@@ -315,37 +336,41 @@
 
         // set the protocol status to success and the crawl status to success
         // create the content from the normalized url and the raw bytes from
-        // the arc file,  TODO: currently this doesn't handle text of errors
+        // the arc file, TODO: currently this doesn't handle text of errors
         // pages (i.e. 404, etc.). We assume we won't get those.
         ProtocolStatus status = ProtocolStatus.STATUS_SUCCESS;
         Content content = new Content(urlStr, urlStr, bytes.get(), contentType,
-          new Metadata(), getConf());
-        
+            new Metadata(), getConf());
+
         // set the url version into the metadata
         content.getMetadata().set(URL_VERSION, version);
         ParseStatus pstatus = null;
         pstatus = output(output, segmentName, url, datum, content, status,
-          CrawlDatum.STATUS_FETCH_SUCCESS);
+            CrawlDatum.STATUS_FETCH_SUCCESS);
         reporter.progress();
-      }
-      catch (Throwable t) { // unexpected exception
+      } catch (Throwable t) { // unexpected exception
         logError(url, t);
         output(output, segmentName, url, datum, null, null,
-          CrawlDatum.STATUS_FETCH_RETRY);
+            CrawlDatum.STATUS_FETCH_RETRY);
       }
     }
   }
 
   /**
-   * <p>Creates the arc files to segments job.</p>
+   * <p>
+   * Creates the arc files to segments job.
+   * </p>
    * 
-   * @param arcFiles The path to the directory holding the arc files
-   * @param segmentsOutDir The output directory for writing the segments
+   * @param arcFiles
+   *          The path to the directory holding the arc files
+   * @param segmentsOutDir
+   *          The output directory for writing the segments
    * 
-   * @throws IOException If an IO error occurs while running the job.
+   * @throws IOException
+   *           If an IO error occurs while running the job.
    */
   public void createSegments(Path arcFiles, Path segmentsOutDir)
-    throws IOException {
+      throws IOException {
 
     SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
     long start = System.currentTimeMillis();
@@ -369,17 +394,17 @@
     JobClient.runJob(job);
 
     long end = System.currentTimeMillis();
-    LOG.info("ArcSegmentCreator: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    LOG.info("ArcSegmentCreator: finished at " + sdf.format(end)
+        + ", elapsed: " + TimingUtil.elapsedTime(start, end));
   }
 
-  public static void main(String args[])
-    throws Exception {
-    int res = ToolRunner.run(NutchConfiguration.create(), new ArcSegmentCreator(), args);
+  public static void main(String args[]) throws Exception {
+    int res = ToolRunner.run(NutchConfiguration.create(),
+        new ArcSegmentCreator(), args);
     System.exit(res);
   }
 
-  public int run(String[] args)
-    throws Exception {
+  public int run(String[] args) throws Exception {
 
     String usage = "Usage: ArcSegmentCreator <arcFiles> <segmentsOutDir>";
 
@@ -396,8 +421,7 @@
       // create the segments from the arc files
       createSegments(arcFiles, segmentsOutDir);
       return 0;
-    }
-    catch (Exception e) {
+    } catch (Exception e) {
       LOG.error("ArcSegmentCreator: " + StringUtils.stringifyException(e));
       return -1;
     }
Index: src/java/org/apache/nutch/tools/arc/ArcInputFormat.java
===================================================================
--- src/java/org/apache/nutch/tools/arc/ArcInputFormat.java	(revision 1188252)
+++ src/java/org/apache/nutch/tools/arc/ArcInputFormat.java	(working copy)
@@ -30,21 +30,22 @@
 /**
  * A input format the reads arc files.
  */
-public class ArcInputFormat
-  extends FileInputFormat<Text, BytesWritable> {
+public class ArcInputFormat extends FileInputFormat<Text, BytesWritable> {
 
   /**
    * Returns the <code>RecordReader</code> for reading the arc file.
    * 
-   * @param split The InputSplit of the arc file to process.
-   * @param job The job configuration.
-   * @param reporter The progress reporter.
+   * @param split
+   *          The InputSplit of the arc file to process.
+   * @param job
+   *          The job configuration.
+   * @param reporter
+   *          The progress reporter.
    */
   public RecordReader<Text, BytesWritable> getRecordReader(InputSplit split,
-      JobConf job, Reporter reporter)
-    throws IOException {
+      JobConf job, Reporter reporter) throws IOException {
     reporter.setStatus(split.toString());
-    return new ArcRecordReader(job, (FileSplit)split);
+    return new ArcRecordReader(job, (FileSplit) split);
   }
 
 }
Index: src/java/org/apache/nutch/tools/ResolveUrls.java
===================================================================
--- src/java/org/apache/nutch/tools/ResolveUrls.java	(revision 1188252)
+++ src/java/org/apache/nutch/tools/ResolveUrls.java	(working copy)
@@ -59,8 +59,7 @@
   /**
    * A Thread which gets the ip address of a single host by name.
    */
-  private static class ResolverThread
-    extends Thread {
+  private static class ResolverThread extends Thread {
 
     private String url = null;
 
@@ -74,14 +73,13 @@
       String host = URLUtil.getHost(url);
       long start = System.currentTimeMillis();
       try {
-        
-        // get the address by name and if no error is thrown then it 
+
+        // get the address by name and if no error is thrown then it
         // is resolved successfully
         InetAddress ia = InetAddress.getByName(host);
         LOG.info("Resolved: " + host);
         numResolved.incrementAndGet();
-      }
-      catch (Exception uhe) {
+      } catch (Exception uhe) {
         LOG.info("Error Resolving: " + host);
         numErrored.incrementAndGet();
       }
@@ -93,8 +91,8 @@
   }
 
   /**
-   * Creates a thread pool for resolving urls.  Reads in the url file on the
-   * local filesystem.  For each url it attempts to resolve it keeping a total
+   * Creates a thread pool for resolving urls. Reads in the url file on the
+   * local filesystem. For each url it attempts to resolve it keeping a total
    * account of the number resolved, errored, and the amount of time.
    */
   public void resolveUrls() {
@@ -103,13 +101,14 @@
 
       // create a thread pool with a fixed number of threads
       pool = Executors.newFixedThreadPool(numThreads);
-      
-      // read in the urls file and loop through each line, one url per line
+
+      // read in the urls file and loop through each line, one url per
+      // line
       BufferedReader buffRead = new BufferedReader(new FileReader(new File(
-        urlsFile)));
+          urlsFile)));
       String urlStr = null;
       while ((urlStr = buffRead.readLine()) != null) {
-        
+
         // spin up a resolver thread per url
         LOG.info("Starting: " + urlStr);
         pool.execute(new ResolverThread(urlStr));
@@ -119,9 +118,8 @@
       // the thread pool to give urls time to finish resolving
       buffRead.close();
       pool.awaitTermination(60, TimeUnit.SECONDS);
-    }
-    catch (Exception e) {
-      
+    } catch (Exception e) {
+
       // on error shutdown the thread pool immediately
       pool.shutdownNow();
       LOG.info(StringUtils.stringifyException(e));
@@ -129,15 +127,16 @@
 
     // shutdown the thread pool and log totals
     pool.shutdown();
-    LOG.info("Total: " + numTotal.get() + ", Resovled: "
-      + numResolved.get() + ", Errored: " + numErrored.get()
-      + ", Average Time: " + totalTime.get() / numTotal.get());
+    LOG.info("Total: " + numTotal.get() + ", Resovled: " + numResolved.get()
+        + ", Errored: " + numErrored.get() + ", Average Time: "
+        + totalTime.get() / numTotal.get());
   }
 
   /**
    * Create a new ResolveUrls with a file from the local file system.
-   *
-   * @param urlsFile The local urls file, one url per line.
+   * 
+   * @param urlsFile
+   *          The local urls file, one url per line.
    */
   public ResolveUrls(String urlsFile) {
     this(urlsFile, 100);
@@ -145,10 +144,12 @@
 
   /**
    * Create a new ResolveUrls with a urls file and a number of threads for the
-   * Thread pool.  Number of threads is 100 by default.
+   * Thread pool. Number of threads is 100 by default.
    * 
-   * @param urlsFile The local urls file, one url per line.
-   * @param numThreads The number of threads used to resolve urls in parallel.
+   * @param urlsFile
+   *          The local urls file, one url per line.
+   * @param numThreads
+   *          The number of threads used to resolve urls in parallel.
    */
   public ResolveUrls(String urlsFile, int numThreads) {
     this.urlsFile = urlsFile;
@@ -161,12 +162,12 @@
   public static void main(String[] args) {
 
     Options options = new Options();
-    Option helpOpts = OptionBuilder.withArgName("help").withDescription(
-      "show this help message").create("help");
-    Option urlOpts = OptionBuilder.withArgName("urls").hasArg().withDescription(
-      "the urls file to check").create("urls");
-    Option numThreadOpts = OptionBuilder.withArgName("numThreads").hasArgs().withDescription(
-      "the number of threads to use").create("numThreads");
+    Option helpOpts = OptionBuilder.withArgName("help")
+        .withDescription("show this help message").create("help");
+    Option urlOpts = OptionBuilder.withArgName("urls").hasArg()
+        .withDescription("the urls file to check").create("urls");
+    Option numThreadOpts = OptionBuilder.withArgName("numThreads").hasArgs()
+        .withDescription("the number of threads to use").create("numThreads");
     options.addOption(helpOpts);
     options.addOption(urlOpts);
     options.addOption(numThreadOpts);
@@ -191,8 +192,7 @@
       }
       ResolveUrls resolve = new ResolveUrls(urls, numThreads);
       resolve.resolveUrls();
-    }
-    catch (Exception e) {
+    } catch (Exception e) {
       LOG.error("ResolveUrls: " + StringUtils.stringifyException(e));
     }
   }
Index: src/java/org/apache/nutch/tools/DmozParser.java
===================================================================
--- src/java/org/apache/nutch/tools/DmozParser.java	(revision 1188252)
+++ src/java/org/apache/nutch/tools/DmozParser.java	(working copy)
@@ -36,16 +36,15 @@
 import org.apache.nutch.util.LogUtil;
 import org.apache.nutch.util.NutchConfiguration;
 
-
 /** Utility that converts DMOZ RDF into a flat file of URLs to be injected. */
 public class DmozParser {
   public static final Logger LOG = LoggerFactory.getLogger(DmozParser.class);
-  
-    long pages = 0;
 
+  long pages = 0;
+
   /**
-   * This filter fixes characters that might offend our parser.
-   * This lets us be tolerant of errors that might appear in the input XML.
+   * This filter fixes characters that might offend our parser. This lets us be
+   * tolerant of errors that might appear in the input XML.
    */
   private static class XMLCharFilter extends FilterReader {
     private boolean lastBad = false;
@@ -57,9 +56,9 @@
     public int read() throws IOException {
       int c = in.read();
       int value = c;
-      if (c != -1 && !(XMLChar.isValid(c)))     // fix invalid characters
+      if (c != -1 && !(XMLChar.isValid(c))) // fix invalid characters
         value = 'X';
-      else if (lastBad && c == '<') {           // fix mis-matched brackets
+      else if (lastBad && c == '<') { // fix mis-matched brackets
         in.mark(1);
         if (in.read() != '/')
           value = 'X';
@@ -70,37 +69,35 @@
       return value;
     }
 
-    public int read(char[] cbuf, int off, int len)
-      throws IOException {
+    public int read(char[] cbuf, int off, int len) throws IOException {
       int n = in.read(cbuf, off, len);
       if (n != -1) {
         for (int i = 0; i < n; i++) {
-          char c = cbuf[off+i];
+          char c = cbuf[off + i];
           char value = c;
-          if (!(XMLChar.isValid(c)))            // fix invalid characters
+          if (!(XMLChar.isValid(c))) // fix invalid characters
             value = 'X';
-          else if (lastBad && c == '<') {       // fix mis-matched brackets
-            if (i != n-1 && cbuf[off+i+1] != '/')
+          else if (lastBad && c == '<') { // fix mis-matched brackets
+            if (i != n - 1 && cbuf[off + i + 1] != '/')
               value = 'X';
           }
           lastBad = (c == 65533);
-          cbuf[off+i] = value;
+          cbuf[off + i] = value;
         }
       }
       return n;
     }
   }
 
-
   /**
-   * The RDFProcessor receives tag messages during a parse
-   * of RDF XML data.  We build whatever structures we need
-   * from these messages.
+   * The RDFProcessor receives tag messages during a parse of RDF XML data. We
+   * build whatever structures we need from these messages.
    */
   private class RDFProcessor extends DefaultHandler {
     String curURL = null, curSection = null;
-    boolean titlePending = false, descPending = false, insideAdultSection = false;
-    Pattern topicPattern = null; 
+    boolean titlePending = false, descPending = false,
+        insideAdultSection = false;
+    Pattern topicPattern = null;
     StringBuffer title = new StringBuffer(), desc = new StringBuffer();
     XMLReader reader;
     int subsetDenom;
@@ -109,10 +106,12 @@
     Locator location;
 
     /**
-     * Pass in an XMLReader, plus a flag as to whether we 
-     * should include adult material.
+     * Pass in an XMLReader, plus a flag as to whether we should include adult
+     * material.
      */
-    public RDFProcessor(XMLReader reader, int subsetDenom, boolean includeAdult, int skew, Pattern topicPattern) throws IOException {
+    public RDFProcessor(XMLReader reader, int subsetDenom,
+        boolean includeAdult, int skew, Pattern topicPattern)
+        throws IOException {
       this.reader = reader;
       this.subsetDenom = subsetDenom;
       this.includeAdult = includeAdult;
@@ -128,20 +127,21 @@
     /**
      * Start of an XML elt
      */
-    public void startElement(String namespaceURI, String localName, String qName, Attributes atts) throws SAXException {
+    public void startElement(String namespaceURI, String localName,
+        String qName, Attributes atts) throws SAXException {
       if ("Topic".equals(qName)) {
         curSection = atts.getValue("r:id");
       } else if ("ExternalPage".equals(qName)) {
         // Porn filter
-        if ((! includeAdult) && curSection.startsWith("Top/Adult")) {
+        if ((!includeAdult) && curSection.startsWith("Top/Adult")) {
           return;
         }
-          
+
         if (topicPattern != null && !topicPattern.matcher(curSection).matches()) {
           return;
         }
 
-        // Subset denominator filter.  
+        // Subset denominator filter.
         // Only emit with a chance of 1/denominator.
         String url = atts.getValue("about");
         int hashValue = MD5Hash.digest(url).hashCode();
@@ -174,18 +174,18 @@
      * Termination of XML elt
      */
     public void endElement(String namespaceURI, String localName, String qName)
-      throws SAXException {
+        throws SAXException {
       if (curURL != null) {
         if ("ExternalPage".equals(qName)) {
           //
-          // Inc the number of pages, insert the page, and 
+          // Inc the number of pages, insert the page, and
           // possibly print status.
           //
-          System.out.println(curURL); 
+          System.out.println(curURL);
           pages++;
 
           //
-          // Clear out the link text.  This is what
+          // Clear out the link text. This is what
           // you would use for adding to the linkdb.
           //
           if (title.length() > 0) {
@@ -220,15 +220,13 @@
     }
 
     /**
-     * From time to time the Parser will set the "current location"
-     * by calling this function.  It's useful for emitting locations
-     * for error messages.
+     * From time to time the Parser will set the "current location" by calling
+     * this function. It's useful for emitting locations for error messages.
      */
     public void setDocumentLocator(Locator locator) {
       location = locator;
     }
 
-
     //
     // Interface ErrorHandler
     //
@@ -249,12 +247,12 @@
     public void errorError(SAXParseException spe) {
       if (LOG.isErrorEnabled()) {
         LOG.error("Fatal err: " + spe.toString() + ": " + spe.getMessage());
-        LOG.error("Last known line is " + location.getLineNumber() +
-                  ", column " + location.getColumnNumber());
+        LOG.error("Last known line is " + location.getLineNumber()
+            + ", column " + location.getColumnNumber());
         spe.printStackTrace(LogUtil.getErrorStream(LOG));
       }
     }
-        
+
     /**
      * Emit exception warning message
      */
@@ -267,34 +265,33 @@
   }
 
   /**
-   * Iterate through all the items in this structured DMOZ file.
-   * Add each URL to the web db.
+   * Iterate through all the items in this structured DMOZ file. Add each URL to
+   * the web db.
    */
   public void parseDmozFile(File dmozFile, int subsetDenom,
-                            boolean includeAdult,
-                            int skew,
-                            Pattern topicPattern)
+      boolean includeAdult, int skew, Pattern topicPattern)
 
-    throws IOException, SAXException, ParserConfigurationException {
+  throws IOException, SAXException, ParserConfigurationException {
 
     SAXParserFactory parserFactory = SAXParserFactory.newInstance();
     SAXParser parser = parserFactory.newSAXParser();
     XMLReader reader = parser.getXMLReader();
 
     // Create our own processor to receive SAX events
-    RDFProcessor rp =
-      new RDFProcessor(reader, subsetDenom, includeAdult,
-                       skew, topicPattern);
+    RDFProcessor rp = new RDFProcessor(reader, subsetDenom, includeAdult, skew,
+        topicPattern);
     reader.setContentHandler(rp);
     reader.setErrorHandler(rp);
     LOG.info("skew = " + rp.hashSkew);
 
     //
-    // Open filtered text stream.  The TextFilter makes sure that
+    // Open filtered text stream. The TextFilter makes sure that
     // only appropriate XML-approved Text characters are received.
     // Any non-conforming characters are silently skipped.
     //
-    XMLCharFilter in = new XMLCharFilter(new BufferedReader(new InputStreamReader(new BufferedInputStream(new FileInputStream(dmozFile)), "UTF-8")));
+    XMLCharFilter in = new XMLCharFilter(new BufferedReader(
+        new InputStreamReader(new BufferedInputStream(new FileInputStream(
+            dmozFile)), "UTF-8")));
     try {
       InputSource is = new InputSource(in);
       reader.parse(is);
@@ -309,18 +306,17 @@
     }
   }
 
-  private static void addTopicsFromFile(String topicFile,
-                                        Vector<String> topics)
-  throws IOException {
+  private static void addTopicsFromFile(String topicFile, Vector<String> topics)
+      throws IOException {
     BufferedReader in = null;
     try {
-      in = new BufferedReader(new InputStreamReader(new FileInputStream(topicFile), "UTF-8"));
+      in = new BufferedReader(new InputStreamReader(new FileInputStream(
+          topicFile), "UTF-8"));
       String line = null;
       while ((line = in.readLine()) != null) {
         topics.addElement(new String(line));
       }
-    } 
-    catch (Exception e) {
+    } catch (Exception e) {
       if (LOG.isErrorEnabled()) {
         LOG.error(e.toString());
         e.printStackTrace(LogUtil.getErrorStream(LOG));
@@ -330,18 +326,19 @@
       in.close();
     }
   }
-    
+
   /**
-   * Command-line access.  User may add URLs via a flat text file
-   * or the structured DMOZ file.  By default, we ignore Adult
-   * material (as categorized by DMOZ).
+   * Command-line access. User may add URLs via a flat text file or the
+   * structured DMOZ file. By default, we ignore Adult material (as categorized
+   * by DMOZ).
    */
   public static void main(String argv[]) throws Exception {
     if (argv.length < 1) {
-      System.err.println("Usage: DmozParser <dmoz_file> [-subset <subsetDenominator>] [-includeAdultMaterial] [-skew skew] [-topicFile <topic list file>] [-topic <topic> [-topic <topic> [...]]]");
+      System.err
+          .println("Usage: DmozParser <dmoz_file> [-subset <subsetDenominator>] [-includeAdultMaterial] [-skew skew] [-topicFile <topic list file>] [-topic <topic> [-topic <topic> [...]]]");
       return;
     }
-    
+
     //
     // Parse the command line, figure out what kind of
     // URL file we need to load
@@ -350,9 +347,9 @@
     int skew = 0;
     String dmozFile = argv[0];
     boolean includeAdult = false;
-    Pattern topicPattern = null; 
+    Pattern topicPattern = null;
     Vector<String> topics = new Vector<String>();
-    
+
     Configuration conf = NutchConfiguration.create();
     FileSystem fs = FileSystem.get(conf);
     try {
@@ -360,16 +357,16 @@
         if ("-includeAdultMaterial".equals(argv[i])) {
           includeAdult = true;
         } else if ("-subset".equals(argv[i])) {
-          subsetDenom = Integer.parseInt(argv[i+1]);
+          subsetDenom = Integer.parseInt(argv[i + 1]);
           i++;
         } else if ("-topic".equals(argv[i])) {
-          topics.addElement(argv[i+1]); 
+          topics.addElement(argv[i + 1]);
           i++;
         } else if ("-topicFile".equals(argv[i])) {
-          addTopicsFromFile(argv[i+1], topics);
+          addTopicsFromFile(argv[i + 1], topics);
           i++;
         } else if ("-skew".equals(argv[i])) {
-          skew = Integer.parseInt(argv[i+1]);
+          skew = Integer.parseInt(argv[i + 1]);
           i++;
         }
       }
@@ -377,21 +374,21 @@
       DmozParser parser = new DmozParser();
 
       if (!topics.isEmpty()) {
-        String regExp = new String("^("); 
+        String regExp = new String("^(");
         int j = 0;
-        for ( ; j < topics.size() - 1; ++j) {
+        for (; j < topics.size() - 1; ++j) {
           regExp = regExp.concat(topics.get(j));
           regExp = regExp.concat("|");
         }
         regExp = regExp.concat(topics.get(j));
-        regExp = regExp.concat(").*"); 
+        regExp = regExp.concat(").*");
         LOG.info("Topic selection pattern = " + regExp);
-        topicPattern = Pattern.compile(regExp); 
+        topicPattern = Pattern.compile(regExp);
       }
 
-      parser.parseDmozFile(new File(dmozFile), subsetDenom,
-                           includeAdult, skew, topicPattern);
-      
+      parser.parseDmozFile(new File(dmozFile), subsetDenom, includeAdult, skew,
+          topicPattern);
+
     } finally {
       fs.close();
     }
Index: src/java/org/apache/nutch/tools/CrawlDBScanner.java
===================================================================
--- src/java/org/apache/nutch/tools/CrawlDBScanner.java	(revision 1188252)
+++ src/java/org/apache/nutch/tools/CrawlDBScanner.java	(working copy)
@@ -57,17 +57,21 @@
  */
 
 public class CrawlDBScanner extends Configured implements Tool,
-    Mapper<Text,CrawlDatum,Text,CrawlDatum>, Reducer<Text,CrawlDatum,Text,CrawlDatum> {
+    Mapper<Text, CrawlDatum, Text, CrawlDatum>,
+    Reducer<Text, CrawlDatum, Text, CrawlDatum> {
 
-  public static final Logger LOG = LoggerFactory.getLogger(CrawlDBScanner.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(CrawlDBScanner.class);
 
-  public CrawlDBScanner() {}
+  public CrawlDBScanner() {
+  }
 
   public CrawlDBScanner(Configuration conf) {
     setConf(conf);
   }
 
-  public void close() {}
+  public void close() {
+  }
 
   private String regex = null;
   private String status = null;
@@ -78,11 +82,14 @@
   }
 
   public void map(Text url, CrawlDatum crawlDatum,
-      OutputCollector<Text,CrawlDatum> output, Reporter reporter) throws IOException {
+      OutputCollector<Text, CrawlDatum> output, Reporter reporter)
+      throws IOException {
 
     // check status
     if (status != null
-        && !status.equalsIgnoreCase(CrawlDatum.getStatusName(crawlDatum.getStatus()))) return;
+        && !status.equalsIgnoreCase(CrawlDatum.getStatusName(crawlDatum
+            .getStatus())))
+      return;
 
     // if URL matched regexp dump it
     if (url.toString().matches(regex)) {
@@ -91,7 +98,8 @@
   }
 
   public void reduce(Text key, Iterator<CrawlDatum> values,
-      OutputCollector<Text,CrawlDatum> output, Reporter reporter) throws IOException {
+      OutputCollector<Text, CrawlDatum> output, Reporter reporter)
+      throws IOException {
     while (values.hasNext()) {
       CrawlDatum val = values.next();
       output.collect(key, val);
@@ -110,7 +118,8 @@
     job.setJobName("Scan : " + crawlDb + " for URLS matching : " + regex);
 
     job.set("CrawlDBScanner.regex", regex);
-    if (status != null) job.set("CrawlDBScanner.status", status);
+    if (status != null)
+      job.set("CrawlDBScanner.status", status);
 
     FileInputFormat.addInputPath(job, new Path(crawlDb, CrawlDb.CURRENT_NAME));
     job.setInputFormat(SequenceFileInputFormat.class);
@@ -147,11 +156,13 @@
     }
 
     long end = System.currentTimeMillis();
-    LOG.info("CrawlDb scanner: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    LOG.info("CrawlDb scanner: finished at " + sdf.format(end) + ", elapsed: "
+        + TimingUtil.elapsedTime(start, end));
   }
 
   public static void main(String args[]) throws Exception {
-    int res = ToolRunner.run(NutchConfiguration.create(), new CrawlDBScanner(), args);
+    int res = ToolRunner.run(NutchConfiguration.create(), new CrawlDBScanner(),
+        args);
     System.exit(res);
   }
 
Index: src/java/org/apache/nutch/tools/FreeGenerator.java
===================================================================
--- src/java/org/apache/nutch/tools/FreeGenerator.java	(revision 1188252)
+++ src/java/org/apache/nutch/tools/FreeGenerator.java	(working copy)
@@ -54,21 +54,22 @@
 import org.apache.nutch.util.TimingUtil;
 
 /**
- * This tool generates fetchlists (segments to be fetched) from plain text
- * files containing one URL per line. It's useful when arbitrary URL-s need to
- * be fetched without adding them first to the CrawlDb, or during testing.
+ * This tool generates fetchlists (segments to be fetched) from plain text files
+ * containing one URL per line. It's useful when arbitrary URL-s need to be
+ * fetched without adding them first to the CrawlDb, or during testing.
  * 
  * @author Andrzej Bialecki
  */
 public class FreeGenerator extends Configured implements Tool {
-  private static final Logger LOG = LoggerFactory.getLogger(FreeGenerator.class);
-  
+  private static final Logger LOG = LoggerFactory
+      .getLogger(FreeGenerator.class);
+
   private static final String FILTER_KEY = "free.generator.filter";
   private static final String NORMALIZE_KEY = "free.generator.normalize";
 
-  public static class FG extends MapReduceBase
-  implements Mapper<WritableComparable, Text, Text, Generator.SelectorEntry>,
-  Reducer<Text, Generator.SelectorEntry, Text, CrawlDatum> {
+  public static class FG extends MapReduceBase implements
+      Mapper<WritableComparable, Text, Text, Generator.SelectorEntry>,
+      Reducer<Text, Generator.SelectorEntry, Text, CrawlDatum> {
     private URLNormalizers normalizers = null;
     private URLFilters filters = null;
     private ScoringFilters scfilters;
@@ -86,16 +87,18 @@
         normalizers = new URLNormalizers(job, URLNormalizers.SCOPE_INJECT);
       }
     }
-    
+
     Generator.SelectorEntry entry = new Generator.SelectorEntry();
 
-    public void map(WritableComparable key, Text value, OutputCollector<Text,
-        Generator.SelectorEntry> output, Reporter reporter) throws IOException {
+    public void map(WritableComparable key, Text value,
+        OutputCollector<Text, Generator.SelectorEntry> output, Reporter reporter)
+        throws IOException {
       // value is a line of text
       String urlString = value.toString();
       try {
         if (normalizers != null) {
-          urlString = normalizers.normalize(urlString, URLNormalizers.SCOPE_INJECT);
+          urlString = normalizers.normalize(urlString,
+              URLNormalizers.SCOPE_INJECT);
         }
         if (urlString != null && filters != null) {
           urlString = filters.filter(urlString);
@@ -105,7 +108,8 @@
           scfilters.injectedScore(url, datum);
         }
       } catch (Exception e) {
-        LOG.warn("Error adding url '" + value.toString() + "', skipping: " + StringUtils.stringifyException(e));
+        LOG.warn("Error adding url '" + value.toString() + "', skipping: "
+            + StringUtils.stringifyException(e));
         return;
       }
       if (urlString == null) {
@@ -120,11 +124,13 @@
     }
 
     public void reduce(Text key, Iterator<Generator.SelectorEntry> values,
-        OutputCollector<Text, CrawlDatum> output, Reporter reporter) throws IOException {
-      // pick unique urls from values - discard the reduce key due to hash collisions
+        OutputCollector<Text, CrawlDatum> output, Reporter reporter)
+        throws IOException {
+      // pick unique urls from values - discard the reduce key due to hash
+      // collisions
       HashMap<Text, CrawlDatum> unique = new HashMap<Text, CrawlDatum>();
       while (values.hasNext()) {
-        Generator.SelectorEntry entry = (Generator.SelectorEntry)values.next();
+        Generator.SelectorEntry entry = (Generator.SelectorEntry) values.next();
         unique.put(entry.url, entry.datum);
       }
       // output unique urls
@@ -133,15 +139,20 @@
       }
     }
   }
-  
+
   public int run(String[] args) throws Exception {
     if (args.length < 2) {
-      System.err.println("Usage: FreeGenerator <inputDir> <segmentsDir> [-filter] [-normalize]");
-      System.err.println("\tinputDir\tinput directory containing one or more input files.");
-      System.err.println("\t\tEach text file contains a list of URLs, one URL per line");
-      System.err.println("\tsegmentsDir\toutput directory, where new segment will be created");
+      System.err
+          .println("Usage: FreeGenerator <inputDir> <segmentsDir> [-filter] [-normalize]");
+      System.err
+          .println("\tinputDir\tinput directory containing one or more input files.");
+      System.err
+          .println("\t\tEach text file contains a list of URLs, one URL per line");
+      System.err
+          .println("\tsegmentsDir\toutput directory, where new segment will be created");
       System.err.println("\t-filter\trun current URLFilters on input URLs");
-      System.err.println("\t-normalize\trun current URLNormalizers on input URLs");
+      System.err
+          .println("\t-normalize\trun current URLNormalizers on input URLs");
       return -1;
     }
     boolean filter = false;
@@ -158,7 +169,7 @@
         }
       }
     }
-    
+
     SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
     long start = System.currentTimeMillis();
     LOG.info("FreeGenerator: starting at " + sdf.format(start));
@@ -179,8 +190,8 @@
     job.setOutputKeyClass(Text.class);
     job.setOutputValueClass(CrawlDatum.class);
     job.setOutputKeyComparatorClass(Generator.HashComparator.class);
-    FileOutputFormat.setOutputPath(job, new Path(args[1],
-        new Path(segName, CrawlDatum.GENERATE_DIR_NAME)));
+    FileOutputFormat.setOutputPath(job, new Path(args[1], new Path(segName,
+        CrawlDatum.GENERATE_DIR_NAME)));
     try {
       JobClient.runJob(job);
     } catch (Exception e) {
@@ -188,12 +199,14 @@
       return -1;
     }
     long end = System.currentTimeMillis();
-    LOG.info("FreeGenerator: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    LOG.info("FreeGenerator: finished at " + sdf.format(end) + ", elapsed: "
+        + TimingUtil.elapsedTime(start, end));
     return 0;
   }
 
   public static void main(String[] args) throws Exception {
-    int res = ToolRunner.run(NutchConfiguration.create(), new FreeGenerator(), args);
+    int res = ToolRunner.run(NutchConfiguration.create(), new FreeGenerator(),
+        args);
     System.exit(res);
   }
 }
Index: src/java/org/apache/nutch/tools/Benchmark.java
===================================================================
--- src/java/org/apache/nutch/tools/Benchmark.java	(revision 1188252)
+++ src/java/org/apache/nutch/tools/Benchmark.java	(working copy)
@@ -52,13 +52,14 @@
     int res = ToolRunner.run(conf, new Benchmark(), args);
     System.exit(res);
   }
-  
+
   private static String getDate() {
-    return new SimpleDateFormat("yyyyMMddHHmmss").format
-      (new Date(System.currentTimeMillis()));
+    return new SimpleDateFormat("yyyyMMddHHmmss").format(new Date(System
+        .currentTimeMillis()));
   }
- 
-  private void createSeeds(FileSystem fs, Path seedsDir, int count) throws Exception {
+
+  private void createSeeds(FileSystem fs, Path seedsDir, int count)
+      throws Exception {
     OutputStream os = fs.create(new Path(seedsDir, "seeds"));
     for (int i = 0; i < count; i++) {
       String url = "http://www.test-" + i + ".com/\r\n";
@@ -67,9 +68,9 @@
     os.flush();
     os.close();
   }
-  
+
   public static final class BenchmarkResults {
-    Map<String,Map<String,Long>> timings = new HashMap<String,Map<String,Long>>();
+    Map<String, Map<String, Long>> timings = new HashMap<String, Map<String, Long>>();
     List<String> runs = new ArrayList<String>();
     List<String> stages = new ArrayList<String>();
     int seeds, depth, threads;
@@ -77,7 +78,7 @@
     long topN;
     long elapsed;
     String plugins;
-    
+
     public void addTiming(String stage, String run, long timing) {
       if (!runs.contains(run)) {
         runs.add(run);
@@ -85,14 +86,14 @@
       if (!stages.contains(stage)) {
         stages.add(stage);
       }
-      Map<String,Long> t = timings.get(stage);
+      Map<String, Long> t = timings.get(stage);
       if (t == null) {
-        t = new HashMap<String,Long>();
+        t = new HashMap<String, Long>();
         timings.put(stage, t);
       }
       t.put(run, timing);
     }
-    
+
     public String toString() {
       StringBuilder sb = new StringBuilder();
       sb.append("* Plugins:\t" + plugins + "\n");
@@ -103,8 +104,9 @@
       sb.append("* Delete:\t" + delete + "\n");
       sb.append("* TOTAL ELAPSED:\t" + elapsed + "\n");
       for (String stage : stages) {
-        Map<String,Long> timing = timings.get(stage);
-        if (timing == null) continue;
+        Map<String, Long> timing = timings.get(stage);
+        if (timing == null)
+          continue;
         sb.append("- stage: " + stage + "\n");
         for (String r : runs) {
           Long Time = timing.get(r);
@@ -116,15 +118,16 @@
       }
       return sb.toString();
     }
-    
+
     public List<String> getStages() {
       return stages;
     }
+
     public List<String> getRuns() {
       return runs;
     }
   }
-  
+
   public int run(String[] args) throws Exception {
     String plugins = "protocol-http|parse-tika|scoring-opic|urlfilter-regex|urlnormalizer-pass";
     int seeds = 1;
@@ -132,17 +135,24 @@
     int threads = 10;
     boolean delete = true;
     long topN = Long.MAX_VALUE;
-    
+
     if (args.length == 0) {
-      System.err.println("Usage: Benchmark [-seeds NN] [-depth NN] [-threads NN] [-keep] [-maxPerHost NN] [-plugins <regex>]");
-      System.err.println("\t-seeds NN\tcreate NN unique hosts in a seed list (default: 1)");
+      System.err
+          .println("Usage: Benchmark [-seeds NN] [-depth NN] [-threads NN] [-keep] [-maxPerHost NN] [-plugins <regex>]");
+      System.err
+          .println("\t-seeds NN\tcreate NN unique hosts in a seed list (default: 1)");
       System.err.println("\t-depth NN\tperform NN crawl cycles (default: 10)");
-      System.err.println("\t-threads NN\tuse NN threads per Fetcher task (default: 10)");
-      System.err.println("\t-keep\tkeep segment data (default: delete after updatedb)");
+      System.err
+          .println("\t-threads NN\tuse NN threads per Fetcher task (default: 10)");
+      System.err
+          .println("\t-keep\tkeep segment data (default: delete after updatedb)");
       System.err.println("\t-plugins <regex>\toverride 'plugin.includes'.");
-      System.err.println("\tNOTE: if not specified, this is reset to: " + plugins);
-      System.err.println("\tNOTE: if 'default' is specified then a value set in nutch-default/nutch-site is used.");
-      System.err.println("\t-maxPerHost NN\tmax. # of URLs per host in a fetchlist");
+      System.err.println("\tNOTE: if not specified, this is reset to: "
+          + plugins);
+      System.err
+          .println("\tNOTE: if 'default' is specified then a value set in nutch-default/nutch-site is used.");
+      System.err
+          .println("\t-maxPerHost NN\tmax. # of URLs per host in a fetchlist");
       return -1;
     }
     int maxPerHost = Integer.MAX_VALUE;
@@ -164,13 +174,15 @@
         return -1;
       }
     }
-    BenchmarkResults res = benchmark(seeds, depth, threads, maxPerHost, topN, delete, plugins);
+    BenchmarkResults res = benchmark(seeds, depth, threads, maxPerHost, topN,
+        delete, plugins);
     System.out.println(res);
     return 0;
   }
-  
-  public BenchmarkResults benchmark(int seeds, int depth, int threads, int maxPerHost,
-        long topN, boolean delete, String plugins) throws Exception {
+
+  public BenchmarkResults benchmark(int seeds, int depth, int threads,
+      int maxPerHost, long topN, boolean delete, String plugins)
+      throws Exception {
     Configuration conf = getConf();
     conf.set("http.proxy.host", "localhost");
     conf.setInt("http.proxy.port", 8181);
@@ -180,11 +192,12 @@
       conf.set("plugin.includes", plugins);
     }
     conf.setInt(Generator.GENERATOR_MAX_COUNT, maxPerHost);
-    conf.set(Generator.GENERATOR_COUNT_MODE, Generator.GENERATOR_COUNT_VALUE_HOST);
-    JobConf job = new NutchJob(getConf());    
+    conf.set(Generator.GENERATOR_COUNT_MODE,
+        Generator.GENERATOR_COUNT_VALUE_HOST);
+    JobConf job = new NutchJob(getConf());
     FileSystem fs = FileSystem.get(job);
-    Path dir = new Path(getConf().get("hadoop.tmp.dir"),
-            "bench-" + System.currentTimeMillis());
+    Path dir = new Path(getConf().get("hadoop.tmp.dir"), "bench-"
+        + System.currentTimeMillis());
     fs.mkdirs(dir);
     Path rootUrlDir = new Path(dir, "seed");
     fs.mkdirs(rootUrlDir);
@@ -194,7 +207,7 @@
       LOG.info("crawl started in: " + dir);
       LOG.info("rootUrlDir = " + rootUrlDir);
       LOG.info("threads = " + threads);
-      LOG.info("depth = " + depth);      
+      LOG.info("depth = " + depth);
     }
     BenchmarkResults res = new BenchmarkResults();
     res.delete = delete;
@@ -213,17 +226,17 @@
     ParseSegment parseSegment = new ParseSegment(getConf());
     CrawlDb crawlDbTool = new CrawlDb(getConf());
     LinkDb linkDbTool = new LinkDb(getConf());
-      
+
     // initialize crawlDb
     long start = System.currentTimeMillis();
     injector.inject(crawlDb, rootUrlDir);
     long delta = System.currentTimeMillis() - start;
     res.addTiming("inject", "0", delta);
     int i;
-    for (i = 0; i < depth; i++) {             // generate new segment
+    for (i = 0; i < depth; i++) { // generate new segment
       start = System.currentTimeMillis();
-      Path[] segs = generator.generate(crawlDb, segments, -1, topN, System
-          .currentTimeMillis());
+      Path[] segs = generator.generate(crawlDb, segments, -1, topN,
+          System.currentTimeMillis());
       delta = System.currentTimeMillis() - start;
       res.addTiming("generate", i + "", delta);
       if (segs == null) {
@@ -231,12 +244,12 @@
         break;
       }
       start = System.currentTimeMillis();
-      fetcher.fetch(segs[0], threads);  // fetch it
+      fetcher.fetch(segs[0], threads); // fetch it
       delta = System.currentTimeMillis() - start;
       res.addTiming("fetch", i + "", delta);
       if (!Fetcher.isParsing(job)) {
         start = System.currentTimeMillis();
-        parseSegment.parse(segs[0]);    // parse it, if needed
+        parseSegment.parse(segs[0]); // parse it, if needed
         delta = System.currentTimeMillis() - start;
         res.addTiming("parse", i + "", delta);
       }
@@ -258,7 +271,9 @@
     if (i == 0) {
       LOG.warn("No URLs to fetch - check your seed list and URL filters.");
     }
-    if (LOG.isInfoEnabled()) { LOG.info("crawl finished: " + dir); }
+    if (LOG.isInfoEnabled()) {
+      LOG.info("crawl finished: " + dir);
+    }
     res.elapsed = System.currentTimeMillis() - res.elapsed;
     CrawlDbReader dbreader = new CrawlDbReader();
     dbreader.processStatJob(crawlDb.toString(), conf, false);
Index: src/java/org/apache/nutch/protocol/RobotRules.java
===================================================================
--- src/java/org/apache/nutch/protocol/RobotRules.java	(revision 1188252)
+++ src/java/org/apache/nutch/protocol/RobotRules.java	(working copy)
@@ -35,9 +35,8 @@
   public long getCrawlDelay();
 
   /**
-   * Returns <code>false</code> if the <code>robots.txt</code> file
-   * prohibits us from accessing the given <code>url</code>, or
-   * <code>true</code> otherwise.
+   * Returns <code>false</code> if the <code>robots.txt</code> file prohibits us
+   * from accessing the given <code>url</code>, or <code>true</code> otherwise.
    */
   public boolean isAllowed(URL url);
 
Index: src/java/org/apache/nutch/protocol/Protocol.java
===================================================================
--- src/java/org/apache/nutch/protocol/Protocol.java	(revision 1188252)
+++ src/java/org/apache/nutch/protocol/Protocol.java	(working copy)
@@ -25,12 +25,11 @@
 import org.apache.nutch.crawl.CrawlDatum;
 import org.apache.nutch.plugin.Pluggable;
 
-
-/** A retriever of url content.  Implemented by protocol extensions. */
+/** A retriever of url content. Implemented by protocol extensions. */
 public interface Protocol extends Pluggable, Configurable {
   /** The name of the extension point. */
   public final static String X_POINT_ID = Protocol.class.getName();
-  
+
   /**
    * Property name. If in the current configuration this property is set to
    * true, protocol implementations should handle "politeness" limits
@@ -49,14 +48,18 @@
    */
   public final static String CHECK_ROBOTS = "protocol.plugin.check.robots";
 
-  /** Returns the {@link Content} for a fetchlist entry.
+  /**
+   * Returns the {@link Content} for a fetchlist entry.
    */
   ProtocolOutput getProtocolOutput(Text url, CrawlDatum datum);
 
   /**
    * Retrieve robot rules applicable for this url.
-   * @param url url to check
-   * @param datum page datum
+   * 
+   * @param url
+   *          url to check
+   * @param datum
+   *          page datum
    * @return robot rules (specific for this url or default), never null
    */
   RobotRules getRobotRules(Text url, CrawlDatum datum);
Index: src/java/org/apache/nutch/protocol/ProtocolOutput.java
===================================================================
--- src/java/org/apache/nutch/protocol/ProtocolOutput.java	(revision 1188252)
+++ src/java/org/apache/nutch/protocol/ProtocolOutput.java	(working copy)
@@ -18,8 +18,9 @@
 package org.apache.nutch.protocol;
 
 /**
- * Simple aggregate to pass from protocol plugins both content and
- * protocol status.
+ * Simple aggregate to pass from protocol plugins both content and protocol
+ * status.
+ * 
  * @author Andrzej Bialecki &lt;ab@getopt.org&gt;
  */
 public class ProtocolOutput {
@@ -30,12 +31,12 @@
     this.content = content;
     this.status = status;
   }
-  
+
   public ProtocolOutput(Content content) {
     this.content = content;
     this.status = ProtocolStatus.STATUS_SUCCESS;
   }
-  
+
   public Content getContent() {
     return content;
   }
Index: src/java/org/apache/nutch/protocol/ProtocolStatus.java
===================================================================
--- src/java/org/apache/nutch/protocol/ProtocolStatus.java	(revision 1188252)
+++ src/java/org/apache/nutch/protocol/ProtocolStatus.java	(working copy)
@@ -30,65 +30,76 @@
  * @author Andrzej Bialecki
  */
 public class ProtocolStatus implements Writable {
-  
+
   private final static byte VERSION = 2;
-  
+
   /** Content was retrieved without errors. */
-  public static final int SUCCESS              = 1;
+  public static final int SUCCESS = 1;
   /** Content was not retrieved. Any further errors may be indicated in args. */
-  public static final int FAILED               = 2;
-  
-  /** This protocol was not found.  Application may attempt to retry later. */
-  public static final int PROTO_NOT_FOUND      = 10;
+  public static final int FAILED = 2;
+
+  /** This protocol was not found. Application may attempt to retry later. */
+  public static final int PROTO_NOT_FOUND = 10;
   /** Resource is gone. */
-  public static final int GONE                 = 11;
+  public static final int GONE = 11;
   /** Resource has moved permanently. New url should be found in args. */
-  public static final int MOVED                = 12;
+  public static final int MOVED = 12;
   /** Resource has moved temporarily. New url should be found in args. */
-  public static final int TEMP_MOVED           = 13;
+  public static final int TEMP_MOVED = 13;
   /** Resource was not found. */
-  public static final int NOTFOUND             = 14;
+  public static final int NOTFOUND = 14;
   /** Temporary failure. Application may retry immediately. */
-  public static final int RETRY                = 15;
-  /** Unspecified exception occured. Further information may be provided in args. */
-  public static final int EXCEPTION            = 16;
+  public static final int RETRY = 15;
+  /**
+   * Unspecified exception occured. Further information may be provided in args.
+   */
+  public static final int EXCEPTION = 16;
   /** Access denied - authorization required, but missing/incorrect. */
-  public static final int ACCESS_DENIED        = 17;
+  public static final int ACCESS_DENIED = 17;
   /** Access denied by robots.txt rules. */
-  public static final int ROBOTS_DENIED        = 18;
+  public static final int ROBOTS_DENIED = 18;
   /** Too many redirects. */
-  public static final int REDIR_EXCEEDED       = 19;
+  public static final int REDIR_EXCEEDED = 19;
   /** Not fetching. */
-  public static final int NOTFETCHING          = 20;
+  public static final int NOTFETCHING = 20;
   /** Unchanged since the last fetch. */
-  public static final int NOTMODIFIED          = 21;
-  /** Request was refused by protocol plugins, because it would block.
-   * The expected number of milliseconds to wait before retry may be provided
-   * in args. */
-  public static final int WOULDBLOCK           = 22;
+  public static final int NOTMODIFIED = 21;
+  /**
+   * Request was refused by protocol plugins, because it would block. The
+   * expected number of milliseconds to wait before retry may be provided in
+   * args.
+   */
+  public static final int WOULDBLOCK = 22;
   /** Thread was blocked http.max.delays times during fetching. */
-  public static final int BLOCKED              = 23;
-   
+  public static final int BLOCKED = 23;
+
   // Useful static instances for status codes that don't usually require any
   // additional arguments.
-  public static final ProtocolStatus STATUS_SUCCESS = new ProtocolStatus(SUCCESS);
+  public static final ProtocolStatus STATUS_SUCCESS = new ProtocolStatus(
+      SUCCESS);
   public static final ProtocolStatus STATUS_FAILED = new ProtocolStatus(FAILED);
   public static final ProtocolStatus STATUS_GONE = new ProtocolStatus(GONE);
-  public static final ProtocolStatus STATUS_NOTFOUND = new ProtocolStatus(NOTFOUND);
+  public static final ProtocolStatus STATUS_NOTFOUND = new ProtocolStatus(
+      NOTFOUND);
   public static final ProtocolStatus STATUS_RETRY = new ProtocolStatus(RETRY);
-  public static final ProtocolStatus STATUS_ROBOTS_DENIED = new ProtocolStatus(ROBOTS_DENIED);
-  public static final ProtocolStatus STATUS_REDIR_EXCEEDED = new ProtocolStatus(REDIR_EXCEEDED);
-  public static final ProtocolStatus STATUS_NOTFETCHING = new ProtocolStatus(NOTFETCHING);
-  public static final ProtocolStatus STATUS_NOTMODIFIED = new ProtocolStatus(NOTMODIFIED);
-  public static final ProtocolStatus STATUS_WOULDBLOCK = new ProtocolStatus(WOULDBLOCK);
-  public static final ProtocolStatus STATUS_BLOCKED = new ProtocolStatus(BLOCKED);
-  
+  public static final ProtocolStatus STATUS_ROBOTS_DENIED = new ProtocolStatus(
+      ROBOTS_DENIED);
+  public static final ProtocolStatus STATUS_REDIR_EXCEEDED = new ProtocolStatus(
+      REDIR_EXCEEDED);
+  public static final ProtocolStatus STATUS_NOTFETCHING = new ProtocolStatus(
+      NOTFETCHING);
+  public static final ProtocolStatus STATUS_NOTMODIFIED = new ProtocolStatus(
+      NOTMODIFIED);
+  public static final ProtocolStatus STATUS_WOULDBLOCK = new ProtocolStatus(
+      WOULDBLOCK);
+  public static final ProtocolStatus STATUS_BLOCKED = new ProtocolStatus(
+      BLOCKED);
+
   private int code;
   private long lastModified;
   private String[] args;
-  
-  private static final HashMap<Integer, String> codeToName =
-    new HashMap<Integer, String>();
+
+  private static final HashMap<Integer, String> codeToName = new HashMap<Integer, String>();
   static {
     codeToName.put(new Integer(SUCCESS), "success");
     codeToName.put(new Integer(FAILED), "failed");
@@ -107,40 +118,41 @@
     codeToName.put(new Integer(WOULDBLOCK), "wouldblock");
     codeToName.put(new Integer(BLOCKED), "blocked");
   }
-  
+
   public ProtocolStatus() {
-    
+
   }
 
   public ProtocolStatus(int code, String[] args) {
     this.code = code;
     this.args = args;
   }
-  
+
   public ProtocolStatus(int code, String[] args, long lastModified) {
     this.code = code;
     this.args = args;
     this.lastModified = lastModified;
   }
-  
+
   public ProtocolStatus(int code) {
     this(code, null);
   }
-  
+
   public ProtocolStatus(int code, long lastModified) {
     this(code, null, lastModified);
   }
-  
+
   public ProtocolStatus(int code, Object message) {
     this(code, message, 0L);
   }
-  
+
   public ProtocolStatus(int code, Object message, long lastModified) {
     this.code = code;
     this.lastModified = lastModified;
-    if (message != null) this.args = new String[]{String.valueOf(message)};
+    if (message != null)
+      this.args = new String[] { String.valueOf(message) };
   }
-  
+
   public ProtocolStatus(Throwable t) {
     this(EXCEPTION, t);
   }
@@ -150,10 +162,10 @@
     res.readFields(in);
     return res;
   }
-  
+
   public void readFields(DataInput in) throws IOException {
     byte version = in.readByte();
-    switch(version) {
+    switch (version) {
     case 1:
       code = in.readByte();
       lastModified = in.readLong();
@@ -168,10 +180,10 @@
       throw new VersionMismatchException(VERSION, version);
     }
   }
-  
+
   public void write(DataOutput out) throws IOException {
     out.writeByte(VERSION);
-    out.writeByte((byte)code);
+    out.writeByte((byte) code);
     out.writeLong(lastModified);
     if (args == null) {
       out.writeInt(-1);
@@ -183,7 +195,7 @@
   public void setArgs(String[] args) {
     this.args = args;
   }
-  
+
   public String[] getArgs() {
     return args;
   }
@@ -195,74 +207,77 @@
   public String getName() {
     return codeToName.get(this.code);
   }
-  
+
   public void setCode(int code) {
     this.code = code;
   }
-  
+
   public boolean isSuccess() {
-    return code == SUCCESS; 
+    return code == SUCCESS;
   }
-  
+
   public boolean isTransientFailure() {
-    return
-        code == ACCESS_DENIED ||
-        code == EXCEPTION ||
-        code == REDIR_EXCEEDED ||
-        code == RETRY ||
-        code == TEMP_MOVED ||
-        code == WOULDBLOCK ||
-        code == PROTO_NOT_FOUND; 
+    return code == ACCESS_DENIED || code == EXCEPTION || code == REDIR_EXCEEDED
+        || code == RETRY || code == TEMP_MOVED || code == WOULDBLOCK
+        || code == PROTO_NOT_FOUND;
   }
-  
+
   public boolean isPermanentFailure() {
-    return
-        code == FAILED ||
-        code == GONE ||
-        code == MOVED ||
-        code == NOTFOUND ||
-        code == ROBOTS_DENIED;
+    return code == FAILED || code == GONE || code == MOVED || code == NOTFOUND
+        || code == ROBOTS_DENIED;
   }
-  
+
   public String getMessage() {
-    if (args != null && args.length > 0) return args[0];
+    if (args != null && args.length > 0)
+      return args[0];
     return null;
   }
-  
+
   public void setMessage(String msg) {
-    if (args != null && args.length > 0) args[0] = msg;
-    else args = new String[] {msg};
+    if (args != null && args.length > 0)
+      args[0] = msg;
+    else
+      args = new String[] { msg };
   }
-  
+
   public long getLastModified() {
     return lastModified;
   }
-  
+
   public void setLastModified(long lastModified) {
     this.lastModified = lastModified;
   }
-  
+
   public boolean equals(Object o) {
-    if (o == null) return false;
-    if (!(o instanceof ProtocolStatus)) return false;
-    ProtocolStatus other = (ProtocolStatus)o;
-    if (this.code != other.code || this.lastModified != other.lastModified) return false;
+    if (o == null)
+      return false;
+    if (!(o instanceof ProtocolStatus))
+      return false;
+    ProtocolStatus other = (ProtocolStatus) o;
+    if (this.code != other.code || this.lastModified != other.lastModified)
+      return false;
     if (this.args == null) {
-      if (other.args == null) return true;
-      else return false;
+      if (other.args == null)
+        return true;
+      else
+        return false;
     } else {
-      if (other.args == null) return false;
-      if (other.args.length != this.args.length) return false;
+      if (other.args == null)
+        return false;
+      if (other.args.length != this.args.length)
+        return false;
       for (int i = 0; i < this.args.length; i++) {
-        if (!this.args[i].equals(other.args[i])) return false;
+        if (!this.args[i].equals(other.args[i]))
+          return false;
       }
     }
     return true;
   }
-  
+
   public String toString() {
     StringBuffer res = new StringBuffer();
-    res.append(codeToName.get(new Integer(code)) + "(" + code + "), lastModified=" + lastModified);
+    res.append(codeToName.get(new Integer(code)) + "(" + code
+        + "), lastModified=" + lastModified);
     if (args != null) {
       if (args.length == 1) {
         res.append(": " + String.valueOf(args[0]));
Index: src/java/org/apache/nutch/protocol/ProtocolFactory.java
===================================================================
--- src/java/org/apache/nutch/protocol/ProtocolFactory.java	(revision 1188252)
+++ src/java/org/apache/nutch/protocol/ProtocolFactory.java	(working copy)
@@ -38,7 +38,8 @@
  */
 public class ProtocolFactory {
 
-  public static final Logger LOG = LoggerFactory.getLogger(ProtocolFactory.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(ProtocolFactory.class);
 
   private ExtensionPoint extensionPoint;
 
@@ -59,7 +60,8 @@
    * 
    * @param urlString
    *          Url String
-   * @return The appropriate {@link Protocol} implementation for a given {@link URL}.
+   * @return The appropriate {@link Protocol} implementation for a given
+   *         {@link URL}.
    * @throws ProtocolNotFound
    *           when Protocol can not be found for urlString
    */
@@ -106,13 +108,14 @@
     }
     return null;
   }
-  
-  boolean contains(String what, String where){
-    String parts[]=where.split("[, ]");
-    for(int i=0;i<parts.length;i++) {
-      if(parts[i].equals(what)) return true;
+
+  boolean contains(String what, String where) {
+    String parts[] = where.split("[, ]");
+    for (int i = 0; i < parts.length; i++) {
+      if (parts[i].equals(what))
+        return true;
     }
     return false;
   }
-  
+
 }
Index: src/java/org/apache/nutch/protocol/Content.java
===================================================================
--- src/java/org/apache/nutch/protocol/Content.java	(revision 1188252)
+++ src/java/org/apache/nutch/protocol/Content.java	(working copy)
@@ -43,7 +43,7 @@
 import org.apache.nutch.util.MimeUtil;
 import org.apache.nutch.util.NutchConfiguration;
 
-public final class Content implements Writable{
+public final class Content implements Writable {
 
   public static final String DIR_NAME = "content";
 
@@ -122,11 +122,11 @@
       metadata.readFields(in); // read meta data
       break;
     default:
-      throw new VersionMismatchException((byte)2, oldVersion);
+      throw new VersionMismatchException((byte) 2, oldVersion);
     }
 
   }
-  
+
   public final void readFields(DataInput in) throws IOException {
     metadata.clear();
     int sizeOrVersion = in.readInt();
@@ -144,14 +144,14 @@
         metadata.readFields(in);
         break;
       default:
-        throw new VersionMismatchException((byte)VERSION, (byte)version);
+        throw new VersionMismatchException((byte) VERSION, (byte) version);
       }
     } else { // size
       byte[] compressed = new byte[sizeOrVersion];
       in.readFully(compressed, 0, compressed.length);
       ByteArrayInputStream deflated = new ByteArrayInputStream(compressed);
-      DataInput inflater =
-        new DataInputStream(new InflaterInputStream(deflated));
+      DataInput inflater = new DataInputStream(
+          new InflaterInputStream(deflated));
       readFieldsCompressed(inflater);
     }
   }
@@ -185,8 +185,9 @@
     return url;
   }
 
-  /** The base url for relative links contained in the content.
-   * Maybe be different from url if the request redirected.
+  /**
+   * The base url for relative links contained in the content. Maybe be
+   * different from url if the request redirected.
    */
   public String getBaseUrl() {
     return base;
@@ -201,7 +202,9 @@
     this.content = content;
   }
 
-  /** The media type of the retrieved content.
+  /**
+   * The media type of the retrieved content.
+   * 
    * @see <a href="http://www.iana.org/assignments/media-types/">
    *      http://www.iana.org/assignments/media-types/</a>
    */
@@ -259,13 +262,12 @@
     }
     Options opts = new Options();
     Configuration conf = NutchConfiguration.create();
-    
-    GenericOptionsParser parser =
-      new GenericOptionsParser(conf, opts, argv);
-    
+
+    GenericOptionsParser parser = new GenericOptionsParser(conf, opts, argv);
+
     String[] remainingArgs = parser.getRemainingArgs();
     FileSystem fs = FileSystem.get(conf);
-    
+
     try {
       int recno = Integer.parseInt(remainingArgs[0]);
       String segment = remainingArgs[1];
Index: src/java/org/apache/nutch/protocol/ProtocolNotFound.java
===================================================================
--- src/java/org/apache/nutch/protocol/ProtocolNotFound.java	(revision 1188252)
+++ src/java/org/apache/nutch/protocol/ProtocolNotFound.java	(working copy)
@@ -22,7 +22,7 @@
   private String url;
 
   public ProtocolNotFound(String url) {
-    this(url, "protocol not found for url="+url);
+    this(url, "protocol not found for url=" + url);
   }
 
   public ProtocolNotFound(String url, String message) {
@@ -30,5 +30,7 @@
     this.url = url;
   }
 
-  public String getUrl() { return url; }
+  public String getUrl() {
+    return url;
+  }
 }
Index: src/java/org/apache/nutch/segment/SegmentMerger.java
===================================================================
--- src/java/org/apache/nutch/segment/SegmentMerger.java	(revision 1188252)
+++ src/java/org/apache/nutch/segment/SegmentMerger.java	(working copy)
@@ -72,40 +72,47 @@
  * <p>
  * Also, it's possible to slice the resulting segment into chunks of fixed size.
  * </p>
- * <h3>Important Notes</h3>
- * <h4>Which parts are merged?</h4>
- * <p>It doesn't make sense to merge data from segments, which are at different stages
- * of processing (e.g. one unfetched segment, one fetched but not parsed, and
- * one fetched and parsed). Therefore, prior to merging, the tool will determine
- * the lowest common set of input data, and only this data will be merged.
- * This may have some unintended consequences:
- * e.g. if majority of input segments are fetched and parsed, but one of them is unfetched,
- * the tool will fall back to just merging fetchlists, and it will skip all other data
- * from all segments.</p>
+ * <h3>Important Notes</h3> <h4>Which parts are merged?</h4>
+ * <p>
+ * It doesn't make sense to merge data from segments, which are at different
+ * stages of processing (e.g. one unfetched segment, one fetched but not parsed,
+ * and one fetched and parsed). Therefore, prior to merging, the tool will
+ * determine the lowest common set of input data, and only this data will be
+ * merged. This may have some unintended consequences: e.g. if majority of input
+ * segments are fetched and parsed, but one of them is unfetched, the tool will
+ * fall back to just merging fetchlists, and it will skip all other data from
+ * all segments.
+ * </p>
  * <h4>Merging fetchlists</h4>
- * <p>Merging segments, which contain just fetchlists (i.e. prior to fetching)
- * is not recommended, because this tool (unlike the {@link org.apache.nutch.crawl.Generator}
- * doesn't ensure that fetchlist parts for each map task are disjoint.</p>
  * <p>
+ * Merging segments, which contain just fetchlists (i.e. prior to fetching) is
+ * not recommended, because this tool (unlike the
+ * {@link org.apache.nutch.crawl.Generator} doesn't ensure that fetchlist parts
+ * for each map task are disjoint.
+ * </p>
+ * <p>
  * <h4>Duplicate content</h4>
- * Merging segments removes older content whenever possible (see below). However,
- * this is NOT the same as de-duplication, which in addition removes identical
- * content found at different URL-s. In other words, running DeleteDuplicates is
- * still necessary.
+ * Merging segments removes older content whenever possible (see below).
+ * However, this is NOT the same as de-duplication, which in addition removes
+ * identical content found at different URL-s. In other words, running
+ * DeleteDuplicates is still necessary.
  * </p>
- * <p>For some types of data (especially ParseText) it's not possible to determine
- * which version is really older. Therefore the tool always uses segment names as
- * timestamps, for all types of input data. Segment names are compared in forward lexicographic
- * order (0-9a-zA-Z), and data from segments with "higher" names will prevail.
- * It follows then that it is extremely important that segments be named in an
- * increasing lexicographic order as their creation time increases.</p>
  * <p>
+ * For some types of data (especially ParseText) it's not possible to determine
+ * which version is really older. Therefore the tool always uses segment names
+ * as timestamps, for all types of input data. Segment names are compared in
+ * forward lexicographic order (0-9a-zA-Z), and data from segments with "higher"
+ * names will prevail. It follows then that it is extremely important that
+ * segments be named in an increasing lexicographic order as their creation time
+ * increases.
+ * </p>
+ * <p>
  * <h4>Merging and indexes</h4>
  * Merged segment gets a different name. Since Indexer embeds segment names in
- * indexes, any indexes originally created for the input segments will NOT work with the
- * merged segment. Newly created merged segment(s) need to be indexed afresh.
- * This tool doesn't use existing indexes in any way, so if
- * you plan to merge segments you don't have to index them prior to merging.
+ * indexes, any indexes originally created for the input segments will NOT work
+ * with the merged segment. Newly created merged segment(s) need to be indexed
+ * afresh. This tool doesn't use existing indexes in any way, so if you plan to
+ * merge segments you don't have to index them prior to merging.
  * 
  * 
  * @author Andrzej Bialecki
@@ -113,7 +120,8 @@
 public class SegmentMerger extends Configured implements
     Mapper<Text, MetaWrapper, Text, MetaWrapper>,
     Reducer<Text, MetaWrapper, Text, MetaWrapper> {
-  private static final Logger LOG = LoggerFactory.getLogger(SegmentMerger.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(SegmentMerger.class);
 
   private static final String SEGMENT_PART_KEY = "part";
   private static final String SEGMENT_SLICE_KEY = "slice";
@@ -123,20 +131,21 @@
   private SegmentMergeFilters mergeFilters = null;
   private long sliceSize = -1;
   private long curCount = 0;
-  
+
   /**
-   * Wraps inputs in an {@link MetaWrapper}, to permit merging different
-   * types in reduce and use additional metadata.
+   * Wraps inputs in an {@link MetaWrapper}, to permit merging different types
+   * in reduce and use additional metadata.
    */
   public static class ObjectInputFormat extends
-    SequenceFileInputFormat<Text, MetaWrapper> {
-    
+      SequenceFileInputFormat<Text, MetaWrapper> {
+
     @Override
-    public RecordReader<Text, MetaWrapper> getRecordReader(final InputSplit split,
-        final JobConf job, Reporter reporter) throws IOException {
+    public RecordReader<Text, MetaWrapper> getRecordReader(
+        final InputSplit split, final JobConf job, Reporter reporter)
+        throws IOException {
 
       reporter.setStatus(split.toString());
-      
+
       // find part name
       SegmentPart segmentPart;
       final String spString;
@@ -147,10 +156,10 @@
       } catch (IOException e) {
         throw new RuntimeException("Cannot identify segment:", e);
       }
-      
-      SequenceFile.Reader reader =
-        new SequenceFile.Reader(FileSystem.get(job), fSplit.getPath(), job);
-      
+
+      SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(job),
+          fSplit.getPath(), job);
+
       final Writable w;
       try {
         w = (Writable) reader.getValueClass().newInstance();
@@ -163,13 +172,14 @@
           // ignore
         }
       }
-      final SequenceFileRecordReader<Text,Writable> splitReader =
-        new SequenceFileRecordReader<Text,Writable>(job, (FileSplit)split);
+      final SequenceFileRecordReader<Text, Writable> splitReader = new SequenceFileRecordReader<Text, Writable>(
+          job, (FileSplit) split);
 
       try {
         return new SequenceFileRecordReader<Text, MetaWrapper>(job, fSplit) {
-          
-          public synchronized boolean next(Text key, MetaWrapper wrapper) throws IOException {
+
+          public synchronized boolean next(Text key, MetaWrapper wrapper)
+              throws IOException {
             LOG.debug("Running OIF.next()");
 
             boolean res = splitReader.next(key, w);
@@ -177,17 +187,17 @@
             wrapper.setMeta(SEGMENT_PART_KEY, spString);
             return res;
           }
-          
+
           @Override
           public synchronized void close() throws IOException {
             splitReader.close();
           }
-          
+
           @Override
           public MetaWrapper createValue() {
             return new MetaWrapper();
           }
-          
+
         };
       } catch (IOException e) {
         throw new RuntimeException("Cannot create RecordReader: ", e);
@@ -195,11 +205,14 @@
     }
   }
 
-  public static class SegmentOutputFormat extends FileOutputFormat<Text, MetaWrapper> {
+  public static class SegmentOutputFormat extends
+      FileOutputFormat<Text, MetaWrapper> {
     private static final String DEFAULT_SLICE = "default";
-    
+
     @Override
-    public RecordWriter<Text, MetaWrapper> getRecordWriter(final FileSystem fs, final JobConf job, final String name, final Progressable progress) throws IOException {
+    public RecordWriter<Text, MetaWrapper> getRecordWriter(final FileSystem fs,
+        final JobConf job, final String name, final Progressable progress)
+        throws IOException {
       return new RecordWriter<Text, MetaWrapper>() {
         MapFile.Writer c_out = null;
         MapFile.Writer f_out = null;
@@ -209,24 +222,26 @@
         SequenceFile.Writer p_out = null;
         HashMap sliceWriters = new HashMap();
         String segmentName = job.get("segment.merger.segmentName");
-        
+
         public void write(Text key, MetaWrapper wrapper) throws IOException {
           // unwrap
           SegmentPart sp = SegmentPart.parse(wrapper.getMeta(SEGMENT_PART_KEY));
-          Writable o = (Writable)wrapper.get();
+          Writable o = (Writable) wrapper.get();
           String slice = wrapper.getMeta(SEGMENT_SLICE_KEY);
           if (o instanceof CrawlDatum) {
             if (sp.partName.equals(CrawlDatum.GENERATE_DIR_NAME)) {
               g_out = ensureSequenceFile(slice, CrawlDatum.GENERATE_DIR_NAME);
               g_out.append(key, o);
             } else if (sp.partName.equals(CrawlDatum.FETCH_DIR_NAME)) {
-              f_out = ensureMapFile(slice, CrawlDatum.FETCH_DIR_NAME, CrawlDatum.class);
+              f_out = ensureMapFile(slice, CrawlDatum.FETCH_DIR_NAME,
+                  CrawlDatum.class);
               f_out.append(key, o);
             } else if (sp.partName.equals(CrawlDatum.PARSE_DIR_NAME)) {
               p_out = ensureSequenceFile(slice, CrawlDatum.PARSE_DIR_NAME);
               p_out.append(key, o);
             } else {
-              throw new IOException("Cannot determine segment part: " + sp.partName);
+              throw new IOException("Cannot determine segment part: "
+                  + sp.partName);
             }
           } else if (o instanceof Content) {
             c_out = ensureMapFile(slice, Content.DIR_NAME, Content.class);
@@ -234,9 +249,11 @@
           } else if (o instanceof ParseData) {
             // update the segment name inside contentMeta - required by Indexer
             if (slice == null) {
-              ((ParseData)o).getContentMeta().set(Nutch.SEGMENT_NAME_KEY, segmentName);
+              ((ParseData) o).getContentMeta().set(Nutch.SEGMENT_NAME_KEY,
+                  segmentName);
             } else {
-              ((ParseData)o).getContentMeta().set(Nutch.SEGMENT_NAME_KEY, segmentName + "-" + slice);
+              ((ParseData) o).getContentMeta().set(Nutch.SEGMENT_NAME_KEY,
+                  segmentName + "-" + slice);
             }
             pd_out = ensureMapFile(slice, ParseData.DIR_NAME, ParseData.class);
             pd_out.append(key, o);
@@ -245,20 +262,26 @@
             pt_out.append(key, o);
           }
         }
-        
+
         // lazily create SequenceFile-s.
-        private SequenceFile.Writer ensureSequenceFile(String slice, String dirName) throws IOException {
-          if (slice == null) slice = DEFAULT_SLICE;
-          SequenceFile.Writer res = (SequenceFile.Writer)sliceWriters.get(slice + dirName);
-          if (res != null) return res;
+        private SequenceFile.Writer ensureSequenceFile(String slice,
+            String dirName) throws IOException {
+          if (slice == null)
+            slice = DEFAULT_SLICE;
+          SequenceFile.Writer res = (SequenceFile.Writer) sliceWriters
+              .get(slice + dirName);
+          if (res != null)
+            return res;
           Path wname;
           Path out = FileOutputFormat.getOutputPath(job);
           if (slice == DEFAULT_SLICE) {
-            wname = new Path(new Path(new Path(out, segmentName), dirName), name);
+            wname = new Path(new Path(new Path(out, segmentName), dirName),
+                name);
           } else {
-            wname = new Path(new Path(new Path(out, segmentName + "-" + slice), dirName), name);
+            wname = new Path(new Path(new Path(out, segmentName + "-" + slice),
+                dirName), name);
           }
-          res = SequenceFile.createWriter(fs, job, wname, Text.class, 
+          res = SequenceFile.createWriter(fs, job, wname, Text.class,
               CrawlDatum.class,
               SequenceFileOutputFormat.getOutputCompressionType(job), progress);
           sliceWriters.put(slice + dirName, res);
@@ -266,23 +289,30 @@
         }
 
         // lazily create MapFile-s.
-        private MapFile.Writer ensureMapFile(String slice, String dirName, Class<? extends Writable> clazz) throws IOException {
-          if (slice == null) slice = DEFAULT_SLICE;
-          MapFile.Writer res = (MapFile.Writer)sliceWriters.get(slice + dirName);
-          if (res != null) return res;
+        private MapFile.Writer ensureMapFile(String slice, String dirName,
+            Class<? extends Writable> clazz) throws IOException {
+          if (slice == null)
+            slice = DEFAULT_SLICE;
+          MapFile.Writer res = (MapFile.Writer) sliceWriters.get(slice
+              + dirName);
+          if (res != null)
+            return res;
           Path wname;
           Path out = FileOutputFormat.getOutputPath(job);
           if (slice == DEFAULT_SLICE) {
-            wname = new Path(new Path(new Path(out, segmentName), dirName), name);
+            wname = new Path(new Path(new Path(out, segmentName), dirName),
+                name);
           } else {
-            wname = new Path(new Path(new Path(out, segmentName + "-" + slice), dirName), name);
+            wname = new Path(new Path(new Path(out, segmentName + "-" + slice),
+                dirName), name);
           }
-          CompressionType compType = 
-              SequenceFileOutputFormat.getOutputCompressionType(job);
+          CompressionType compType = SequenceFileOutputFormat
+              .getOutputCompressionType(job);
           if (clazz.isAssignableFrom(ParseText.class)) {
             compType = CompressionType.RECORD;
           }
-          res = new MapFile.Writer(job, fs, wname.toString(), Text.class, clazz, compType, progress);
+          res = new MapFile.Writer(job, fs, wname.toString(), Text.class,
+              clazz, compType, progress);
           sliceWriters.put(slice + dirName, res);
           return res;
         }
@@ -292,9 +322,9 @@
           while (it.hasNext()) {
             Object o = it.next();
             if (o instanceof SequenceFile.Writer) {
-              ((SequenceFile.Writer)o).close();
+              ((SequenceFile.Writer) o).close();
             } else {
-              ((MapFile.Writer)o).close();
+              ((MapFile.Writer) o).close();
             }
           }
         }
@@ -305,14 +335,15 @@
   public SegmentMerger() {
     super(null);
   }
-  
+
   public SegmentMerger(Configuration conf) {
     super(conf);
   }
-  
+
   public void setConf(Configuration conf) {
     super.setConf(conf);
-    if (conf == null) return;
+    if (conf == null)
+      return;
     if (conf.getBoolean("segment.merger.filter", false)) {
       filters = new URLFilters(conf);
       mergeFilters = new SegmentMergeFilters(conf);
@@ -334,15 +365,18 @@
       sliceSize = sliceSize / conf.getNumReduceTasks();
     }
   }
-  
+
   private Text newKey = new Text();
-  
+
   public void map(Text key, MetaWrapper value,
-      OutputCollector<Text, MetaWrapper> output, Reporter reporter) throws IOException {
+      OutputCollector<Text, MetaWrapper> output, Reporter reporter)
+      throws IOException {
     String url = key.toString();
     if (normalizers != null) {
       try {
-        url = normalizers.normalize(url, URLNormalizers.SCOPE_DEFAULT); // normalize the url
+        url = normalizers.normalize(url, URLNormalizers.SCOPE_DEFAULT); // normalize
+                                                                        // the
+                                                                        // url
       } catch (Exception e) {
         LOG.warn("Skipping " + url + ":" + e.getMessage());
         url = null;
@@ -356,7 +390,7 @@
         url = null;
       }
     }
-    if(url != null) {
+    if (url != null) {
       newKey.set(url);
       output.collect(newKey, value);
     }
@@ -364,12 +398,13 @@
 
   /**
    * NOTE: in selecting the latest version we rely exclusively on the segment
-   * name (not all segment data contain time information). Therefore it is extremely
-   * important that segments be named in an increasing lexicographic order as
-   * their creation time increases.
+   * name (not all segment data contain time information). Therefore it is
+   * extremely important that segments be named in an increasing lexicographic
+   * order as their creation time increases.
    */
   public void reduce(Text key, Iterator<MetaWrapper> values,
-      OutputCollector<Text, MetaWrapper> output, Reporter reporter) throws IOException {
+      OutputCollector<Text, MetaWrapper> output, Reporter reporter)
+      throws IOException {
     CrawlDatum lastG = null;
     CrawlDatum lastF = null;
     CrawlDatum lastSig = null;
@@ -382,18 +417,17 @@
     String lastCname = null;
     String lastPDname = null;
     String lastPTname = null;
-    TreeMap<String, ArrayList<CrawlDatum>> linked =
-      new TreeMap<String, ArrayList<CrawlDatum>>();
+    TreeMap<String, ArrayList<CrawlDatum>> linked = new TreeMap<String, ArrayList<CrawlDatum>>();
     while (values.hasNext()) {
       MetaWrapper wrapper = values.next();
       Object o = wrapper.get();
       String spString = wrapper.getMeta(SEGMENT_PART_KEY);
       if (spString == null) {
-        throw new IOException("Null segment part, key=" + key);        
+        throw new IOException("Null segment part, key=" + key);
       }
       SegmentPart sp = SegmentPart.parse(spString);
       if (o instanceof CrawlDatum) {
-        CrawlDatum val = (CrawlDatum)o;
+        CrawlDatum val = (CrawlDatum) o;
         // check which output dir it belongs to
         if (sp.partName.equals(CrawlDatum.GENERATE_DIR_NAME)) {
           if (lastG == null) {
@@ -443,43 +477,43 @@
         }
       } else if (o instanceof Content) {
         if (lastC == null) {
-          lastC = (Content)o;
+          lastC = (Content) o;
           lastCname = sp.segmentName;
         } else {
           if (lastCname.compareTo(sp.segmentName) < 0) {
-            lastC = (Content)o;
+            lastC = (Content) o;
             lastCname = sp.segmentName;
           }
         }
       } else if (o instanceof ParseData) {
         if (lastPD == null) {
-          lastPD = (ParseData)o;
+          lastPD = (ParseData) o;
           lastPDname = sp.segmentName;
         } else {
           if (lastPDname.compareTo(sp.segmentName) < 0) {
-            lastPD = (ParseData)o;
+            lastPD = (ParseData) o;
             lastPDname = sp.segmentName;
           }
         }
       } else if (o instanceof ParseText) {
         if (lastPT == null) {
-          lastPT = (ParseText)o;
+          lastPT = (ParseText) o;
           lastPTname = sp.segmentName;
         } else {
           if (lastPTname.compareTo(sp.segmentName) < 0) {
-            lastPT = (ParseText)o;
+            lastPT = (ParseText) o;
             lastPTname = sp.segmentName;
           }
         }
       }
     }
-	// perform filtering based on full merge record
-    if (mergeFilters != null && 
-    	 !mergeFilters.filter(key, lastG, lastF, lastSig, lastC, lastPD, lastPT, 
-    			 			   linked.isEmpty() ? null : linked.lastEntry().getValue())){
+    // perform filtering based on full merge record
+    if (mergeFilters != null
+        && !mergeFilters.filter(key, lastG, lastF, lastSig, lastC, lastPD,
+            lastPT, linked.isEmpty() ? null : linked.lastEntry().getValue())) {
       return;
     }
-    	
+
     curCount++;
     String sliceName = null;
     MetaWrapper wrapper = new MetaWrapper();
@@ -545,10 +579,12 @@
     }
   }
 
-  public void merge(Path out, Path[] segs, boolean filter, boolean normalize, long slice) throws Exception {
+  public void merge(Path out, Path[] segs, boolean filter, boolean normalize,
+      long slice) throws Exception {
     String segmentName = Generator.generateSegmentName();
     if (LOG.isInfoEnabled()) {
-      LOG.info("Merging " + segs.length + " segments to " + out + "/" + segmentName);
+      LOG.info("Merging " + segs.length + " segments to " + out + "/"
+          + segmentName);
     }
     JobConf job = new NutchJob(getConf());
     job.setJobName("mergesegs " + out + "/" + segmentName);
@@ -589,17 +625,24 @@
       pt = pt && fs.exists(ptDir);
     }
     StringBuffer sb = new StringBuffer();
-    if (c) sb.append(" " + Content.DIR_NAME);
-    if (g) sb.append(" " + CrawlDatum.GENERATE_DIR_NAME);
-    if (f) sb.append(" " + CrawlDatum.FETCH_DIR_NAME);
-    if (p) sb.append(" " + CrawlDatum.PARSE_DIR_NAME);
-    if (pd) sb.append(" " + ParseData.DIR_NAME);
-    if (pt) sb.append(" " + ParseText.DIR_NAME);
+    if (c)
+      sb.append(" " + Content.DIR_NAME);
+    if (g)
+      sb.append(" " + CrawlDatum.GENERATE_DIR_NAME);
+    if (f)
+      sb.append(" " + CrawlDatum.FETCH_DIR_NAME);
+    if (p)
+      sb.append(" " + CrawlDatum.PARSE_DIR_NAME);
+    if (pd)
+      sb.append(" " + ParseData.DIR_NAME);
+    if (pt)
+      sb.append(" " + ParseText.DIR_NAME);
     if (LOG.isInfoEnabled()) {
       LOG.info("SegmentMerger: using segment data from:" + sb.toString());
     }
     for (int i = 0; i < segs.length; i++) {
-      if (segs[i] == null) continue;
+      if (segs[i] == null)
+        continue;
       if (g) {
         Path gDir = new Path(segs[i], CrawlDatum.GENERATE_DIR_NAME);
         FileInputFormat.addInputPath(job, gDir);
@@ -632,9 +675,9 @@
     job.setOutputKeyClass(Text.class);
     job.setOutputValueClass(MetaWrapper.class);
     job.setOutputFormat(SegmentOutputFormat.class);
-    
+
     setConf(job);
-    
+
     JobClient.runJob(job);
   }
 
@@ -643,12 +686,17 @@
    */
   public static void main(String[] args) throws Exception {
     if (args.length < 2) {
-      System.err.println("SegmentMerger output_dir (-dir segments | seg1 seg2 ...) [-filter] [-slice NNNN]");
-      System.err.println("\toutput_dir\tname of the parent dir for output segment slice(s)");
-      System.err.println("\t-dir segments\tparent dir containing several segments");
+      System.err
+          .println("SegmentMerger output_dir (-dir segments | seg1 seg2 ...) [-filter] [-slice NNNN]");
+      System.err
+          .println("\toutput_dir\tname of the parent dir for output segment slice(s)");
+      System.err
+          .println("\t-dir segments\tparent dir containing several segments");
       System.err.println("\tseg1 seg2 ...\tlist of segment dirs");
-      System.err.println("\t-filter\t\tfilter out URL-s prohibited by current URLFilters");
-      System.err.println("\t-slice NNNN\tcreate many output segments, each containing NNNN URLs");
+      System.err
+          .println("\t-filter\t\tfilter out URL-s prohibited by current URLFilters");
+      System.err
+          .println("\t-slice NNNN\tcreate many output segments, each containing NNNN URLs");
       return;
     }
     Configuration conf = NutchConfiguration.create();
@@ -680,7 +728,8 @@
       return;
     }
     SegmentMerger merger = new SegmentMerger(conf);
-    merger.merge(out, segs.toArray(new Path[segs.size()]), filter, normalize, sliceSize);
+    merger.merge(out, segs.toArray(new Path[segs.size()]), filter, normalize,
+        sliceSize);
   }
 
 }
Index: src/java/org/apache/nutch/segment/SegmentReader.java
===================================================================
--- src/java/org/apache/nutch/segment/SegmentReader.java	(revision 1188252)
+++ src/java/org/apache/nutch/segment/SegmentReader.java	(working copy)
@@ -77,7 +77,7 @@
   public static final Logger LOG = LoggerFactory.getLogger(SegmentReader.class);
 
   long recNo = 0L;
-  
+
   private boolean co, fe, ge, pa, pd, pt;
   private FileSystem fs;
 
@@ -86,32 +86,37 @@
     private Text newKey = new Text();
 
     public void map(WritableComparable key, Writable value,
-        OutputCollector<Text, NutchWritable> collector, Reporter reporter) throws IOException {
+        OutputCollector<Text, NutchWritable> collector, Reporter reporter)
+        throws IOException {
       // convert on the fly from old formats with UTF8 keys
       if (key instanceof UTF8) {
         newKey.set(key.toString());
         key = newKey;
       }
-      collector.collect((Text)key, new NutchWritable(value));
+      collector.collect((Text) key, new NutchWritable(value));
     }
-    
+
   }
 
   /** Implements a text output format */
   public static class TextOutputFormat extends
       FileOutputFormat<WritableComparable, Writable> {
     public RecordWriter<WritableComparable, Writable> getRecordWriter(
-        final FileSystem fs, JobConf job,
-        String name, final Progressable progress) throws IOException {
+        final FileSystem fs, JobConf job, String name,
+        final Progressable progress) throws IOException {
 
-      final Path segmentDumpFile = new Path(FileOutputFormat.getOutputPath(job), name);
+      final Path segmentDumpFile = new Path(
+          FileOutputFormat.getOutputPath(job), name);
 
       // Get the old copy out of the way
-      if (fs.exists(segmentDumpFile)) fs.delete(segmentDumpFile, true);
+      if (fs.exists(segmentDumpFile))
+        fs.delete(segmentDumpFile, true);
 
-      final PrintStream printStream = new PrintStream(fs.create(segmentDumpFile));
+      final PrintStream printStream = new PrintStream(
+          fs.create(segmentDumpFile));
       return new RecordWriter<WritableComparable, Writable>() {
-        public synchronized void write(WritableComparable key, Writable value) throws IOException {
+        public synchronized void write(WritableComparable key, Writable value)
+            throws IOException {
           printStream.println(value);
         }
 
@@ -125,9 +130,9 @@
   public SegmentReader() {
     super(null);
   }
-  
-  public SegmentReader(Configuration conf, boolean co, boolean fe, boolean ge, boolean pa,
-          boolean pd, boolean pt) {
+
+  public SegmentReader(Configuration conf, boolean co, boolean fe, boolean ge,
+      boolean pa, boolean pd, boolean pt) {
     super(conf);
     this.co = co;
     this.fe = fe;
@@ -167,12 +172,12 @@
     job.setBoolean("segment.reader.pt", this.pt);
     return job;
   }
-  
-  public void close() {}
 
+  public void close() {
+  }
+
   public void reduce(Text key, Iterator<NutchWritable> values,
-      OutputCollector<Text, Text> output, Reporter reporter)
-          throws IOException {
+      OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
     StringBuffer dump = new StringBuffer();
 
     dump.append("\nRecno:: ").append(recNo++).append("\n");
@@ -195,7 +200,7 @@
   }
 
   public void dump(Path segment, Path output) throws IOException {
-    
+
     if (LOG.isInfoEnabled()) {
       LOG.info("SegmentReader: dump segment: " + segment);
     }
@@ -203,20 +208,30 @@
     JobConf job = createJobConf();
     job.setJobName("read " + segment);
 
-    if (ge) FileInputFormat.addInputPath(job, new Path(segment, CrawlDatum.GENERATE_DIR_NAME));
-    if (fe) FileInputFormat.addInputPath(job, new Path(segment, CrawlDatum.FETCH_DIR_NAME));
-    if (pa) FileInputFormat.addInputPath(job, new Path(segment, CrawlDatum.PARSE_DIR_NAME));
-    if (co) FileInputFormat.addInputPath(job, new Path(segment, Content.DIR_NAME));
-    if (pd) FileInputFormat.addInputPath(job, new Path(segment, ParseData.DIR_NAME));
-    if (pt) FileInputFormat.addInputPath(job, new Path(segment, ParseText.DIR_NAME));
+    if (ge)
+      FileInputFormat.addInputPath(job, new Path(segment,
+          CrawlDatum.GENERATE_DIR_NAME));
+    if (fe)
+      FileInputFormat.addInputPath(job, new Path(segment,
+          CrawlDatum.FETCH_DIR_NAME));
+    if (pa)
+      FileInputFormat.addInputPath(job, new Path(segment,
+          CrawlDatum.PARSE_DIR_NAME));
+    if (co)
+      FileInputFormat.addInputPath(job, new Path(segment, Content.DIR_NAME));
+    if (pd)
+      FileInputFormat.addInputPath(job, new Path(segment, ParseData.DIR_NAME));
+    if (pt)
+      FileInputFormat.addInputPath(job, new Path(segment, ParseText.DIR_NAME));
 
     job.setInputFormat(SequenceFileInputFormat.class);
     job.setMapperClass(InputCompatMapper.class);
     job.setReducerClass(SegmentReader.class);
 
-    Path tempDir = new Path(job.get("hadoop.tmp.dir", "/tmp") + "/segread-" + new java.util.Random().nextInt());
+    Path tempDir = new Path(job.get("hadoop.tmp.dir", "/tmp") + "/segread-"
+        + new java.util.Random().nextInt());
     fs.delete(tempDir, true);
-    
+
     FileOutputFormat.setOutputPath(job, tempDir);
     job.setOutputFormat(TextOutputFormat.class);
     job.setOutputKeyClass(Text.class);
@@ -229,22 +244,25 @@
 
     // remove the old file
     fs.delete(dumpFile, true);
-    FileStatus[] fstats = fs.listStatus(tempDir, HadoopFSUtil.getPassAllFilter());
+    FileStatus[] fstats = fs.listStatus(tempDir,
+        HadoopFSUtil.getPassAllFilter());
     Path[] files = HadoopFSUtil.getPaths(fstats);
 
     PrintWriter writer = null;
     int currentRecordNumber = 0;
     if (files.length > 0) {
-      writer = new PrintWriter(new BufferedWriter(new OutputStreamWriter(fs.create(dumpFile))));
+      writer = new PrintWriter(new BufferedWriter(new OutputStreamWriter(
+          fs.create(dumpFile))));
       try {
         for (int i = 0; i < files.length; i++) {
           Path partFile = (Path) files[i];
           try {
-            currentRecordNumber = append(fs, job, partFile, writer, currentRecordNumber);
+            currentRecordNumber = append(fs, job, partFile, writer,
+                currentRecordNumber);
           } catch (IOException exception) {
             if (LOG.isWarnEnabled()) {
-              LOG.warn("Couldn't copy the content of " + partFile.toString() +
-                       " into " + dumpFile.toString());
+              LOG.warn("Couldn't copy the content of " + partFile.toString()
+                  + " into " + dumpFile.toString());
               LOG.warn(exception.getMessage());
             }
           }
@@ -254,13 +272,16 @@
       }
     }
     fs.delete(tempDir);
-    if (LOG.isInfoEnabled()) { LOG.info("SegmentReader: done"); }
+    if (LOG.isInfoEnabled()) {
+      LOG.info("SegmentReader: done");
+    }
   }
 
   /** Appends two files and updates the Recno counter */
-  private int append(FileSystem fs, Configuration conf, Path src, PrintWriter writer, int currentRecordNumber)
-          throws IOException {
-    BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(src)));
+  private int append(FileSystem fs, Configuration conf, Path src,
+      PrintWriter writer, int currentRecordNumber) throws IOException {
+    BufferedReader reader = new BufferedReader(new InputStreamReader(
+        fs.open(src)));
     try {
       String line = reader.readLine();
       while (line != null) {
@@ -277,89 +298,101 @@
   }
 
   private static final String[][] keys = new String[][] {
-          {"co", "Content::\n"},
-          {"ge", "Crawl Generate::\n"},
-          {"fe", "Crawl Fetch::\n"},
-          {"pa", "Crawl Parse::\n"},
-          {"pd", "ParseData::\n"},
-          {"pt", "ParseText::\n"}
-  };
+      { "co", "Content::\n" }, { "ge", "Crawl Generate::\n" },
+      { "fe", "Crawl Fetch::\n" }, { "pa", "Crawl Parse::\n" },
+      { "pd", "ParseData::\n" }, { "pt", "ParseText::\n" } };
 
   public void get(final Path segment, final Text key, Writer writer,
-          final Map<String, List<Writable>> results) throws Exception {
+      final Map<String, List<Writable>> results) throws Exception {
     LOG.info("SegmentReader: get '" + key + "'");
     ArrayList<Thread> threads = new ArrayList<Thread>();
-    if (co) threads.add(new Thread() {
-      public void run() {
-        try {
-          List<Writable> res = getMapRecords(new Path(segment, Content.DIR_NAME), key);
-          results.put("co", res);
-        } catch (Exception e) {
-          e.printStackTrace(LogUtil.getWarnStream(LOG));
+    if (co)
+      threads.add(new Thread() {
+        public void run() {
+          try {
+            List<Writable> res = getMapRecords(new Path(segment,
+                Content.DIR_NAME), key);
+            results.put("co", res);
+          } catch (Exception e) {
+            e.printStackTrace(LogUtil.getWarnStream(LOG));
+          }
         }
-      }
-    });
-    if (fe) threads.add(new Thread() {
-      public void run() {
-        try {
-          List<Writable> res = getMapRecords(new Path(segment, CrawlDatum.FETCH_DIR_NAME), key);
-          results.put("fe", res);
-        } catch (Exception e) {
-          e.printStackTrace(LogUtil.getWarnStream(LOG));
+      });
+    if (fe)
+      threads.add(new Thread() {
+        public void run() {
+          try {
+            List<Writable> res = getMapRecords(new Path(segment,
+                CrawlDatum.FETCH_DIR_NAME), key);
+            results.put("fe", res);
+          } catch (Exception e) {
+            e.printStackTrace(LogUtil.getWarnStream(LOG));
+          }
         }
-      }
-    });
-    if (ge) threads.add(new Thread() {
-      public void run() {
-        try {
-          List<Writable> res = getSeqRecords(new Path(segment, CrawlDatum.GENERATE_DIR_NAME), key);
-          results.put("ge", res);
-        } catch (Exception e) {
-          e.printStackTrace(LogUtil.getWarnStream(LOG));
+      });
+    if (ge)
+      threads.add(new Thread() {
+        public void run() {
+          try {
+            List<Writable> res = getSeqRecords(new Path(segment,
+                CrawlDatum.GENERATE_DIR_NAME), key);
+            results.put("ge", res);
+          } catch (Exception e) {
+            e.printStackTrace(LogUtil.getWarnStream(LOG));
+          }
         }
-      }
-    });
-    if (pa) threads.add(new Thread() {
-      public void run() {
-        try {
-          List<Writable> res = getSeqRecords(new Path(segment, CrawlDatum.PARSE_DIR_NAME), key);
-          results.put("pa", res);
-        } catch (Exception e) {
-          e.printStackTrace(LogUtil.getWarnStream(LOG));
+      });
+    if (pa)
+      threads.add(new Thread() {
+        public void run() {
+          try {
+            List<Writable> res = getSeqRecords(new Path(segment,
+                CrawlDatum.PARSE_DIR_NAME), key);
+            results.put("pa", res);
+          } catch (Exception e) {
+            e.printStackTrace(LogUtil.getWarnStream(LOG));
+          }
         }
-      }
-    });
-    if (pd) threads.add(new Thread() {
-      public void run() {
-        try {
-          List<Writable> res = getMapRecords(new Path(segment, ParseData.DIR_NAME), key);
-          results.put("pd", res);
-        } catch (Exception e) {
-          e.printStackTrace(LogUtil.getWarnStream(LOG));
+      });
+    if (pd)
+      threads.add(new Thread() {
+        public void run() {
+          try {
+            List<Writable> res = getMapRecords(new Path(segment,
+                ParseData.DIR_NAME), key);
+            results.put("pd", res);
+          } catch (Exception e) {
+            e.printStackTrace(LogUtil.getWarnStream(LOG));
+          }
         }
-      }
-    });
-    if (pt) threads.add(new Thread() {
-      public void run() {
-        try {
-          List<Writable> res = getMapRecords(new Path(segment, ParseText.DIR_NAME), key);
-          results.put("pt", res);
-        } catch (Exception e) {
-          e.printStackTrace(LogUtil.getWarnStream(LOG));
+      });
+    if (pt)
+      threads.add(new Thread() {
+        public void run() {
+          try {
+            List<Writable> res = getMapRecords(new Path(segment,
+                ParseText.DIR_NAME), key);
+            results.put("pt", res);
+          } catch (Exception e) {
+            e.printStackTrace(LogUtil.getWarnStream(LOG));
+          }
         }
-      }
-    });
+      });
     Iterator<Thread> it = threads.iterator();
-    while (it.hasNext()) it.next().start();
+    while (it.hasNext())
+      it.next().start();
     int cnt;
     do {
       cnt = 0;
       try {
         Thread.sleep(5000);
-      } catch (Exception e) {};
+      } catch (Exception e) {
+      }
+      ;
       it = threads.iterator();
       while (it.hasNext()) {
-        if (it.next().isAlive()) cnt++;
+        if (it.next().isAlive())
+          cnt++;
       }
       if ((cnt > 0) && (LOG.isDebugEnabled())) {
         LOG.debug("(" + cnt + " to retrieve)");
@@ -376,15 +409,16 @@
       writer.flush();
     }
   }
-  
+
   private List<Writable> getMapRecords(Path dir, Text key) throws Exception {
-    MapFile.Reader[] readers = MapFileOutputFormat.getReaders(fs, dir, getConf());
+    MapFile.Reader[] readers = MapFileOutputFormat.getReaders(fs, dir,
+        getConf());
     ArrayList<Writable> res = new ArrayList<Writable>();
     Class keyClass = readers[0].getKeyClass();
     Class valueClass = readers[0].getValueClass();
     if (!keyClass.getName().equals("org.apache.hadoop.io.Text"))
       throw new IOException("Incompatible key (" + keyClass.getName() + ")");
-    Writable value = (Writable)valueClass.newInstance();
+    Writable value = (Writable) valueClass.newInstance();
     // we don't know the partitioning schema
     for (int i = 0; i < readers.length; i++) {
       if (readers[i].get(key, value) != null)
@@ -395,14 +429,15 @@
   }
 
   private List<Writable> getSeqRecords(Path dir, Text key) throws Exception {
-    SequenceFile.Reader[] readers = SequenceFileOutputFormat.getReaders(getConf(), dir);
+    SequenceFile.Reader[] readers = SequenceFileOutputFormat.getReaders(
+        getConf(), dir);
     ArrayList<Writable> res = new ArrayList<Writable>();
     Class keyClass = readers[0].getKeyClass();
     Class valueClass = readers[0].getValueClass();
     if (!keyClass.getName().equals("org.apache.hadoop.io.Text"))
       throw new IOException("Incompatible key (" + keyClass.getName() + ")");
-    Writable aKey = (Writable)keyClass.newInstance();
-    Writable value = (Writable)valueClass.newInstance();
+    Writable aKey = (Writable) keyClass.newInstance();
+    Writable value = (Writable) valueClass.newInstance();
     for (int i = 0; i < readers.length; i++) {
       while (readers[i].next(aKey, value)) {
         if (aKey.equals(key))
@@ -422,41 +457,55 @@
     public long parsed = -1L;
     public long parseErrors = -1L;
   }
-  
+
   SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");
-  
+
   public void list(List<Path> dirs, Writer writer) throws Exception {
-    writer.write("NAME\t\tGENERATED\tFETCHER START\t\tFETCHER END\t\tFETCHED\tPARSED\n");
+    writer
+        .write("NAME\t\tGENERATED\tFETCHER START\t\tFETCHER END\t\tFETCHED\tPARSED\n");
     for (int i = 0; i < dirs.size(); i++) {
       Path dir = dirs.get(i);
       SegmentReaderStats stats = new SegmentReaderStats();
       getStats(dir, stats);
       writer.write(dir.getName() + "\t");
-      if (stats.generated == -1) writer.write("?");
-      else writer.write(stats.generated + "");
+      if (stats.generated == -1)
+        writer.write("?");
+      else
+        writer.write(stats.generated + "");
       writer.write("\t\t");
-      if (stats.start == -1) writer.write("?\t");
-      else writer.write(sdf.format(new Date(stats.start)));
+      if (stats.start == -1)
+        writer.write("?\t");
+      else
+        writer.write(sdf.format(new Date(stats.start)));
       writer.write("\t");
-      if (stats.end == -1) writer.write("?");
-      else writer.write(sdf.format(new Date(stats.end)));
+      if (stats.end == -1)
+        writer.write("?");
+      else
+        writer.write(sdf.format(new Date(stats.end)));
       writer.write("\t");
-      if (stats.fetched == -1) writer.write("?");
-      else writer.write(stats.fetched + "");
+      if (stats.fetched == -1)
+        writer.write("?");
+      else
+        writer.write(stats.fetched + "");
       writer.write("\t");
-      if (stats.parsed == -1) writer.write("?");
-      else writer.write(stats.parsed + "");
+      if (stats.parsed == -1)
+        writer.write("?");
+      else
+        writer.write(stats.parsed + "");
       writer.write("\n");
       writer.flush();
     }
   }
-  
-  public void getStats(Path segment, final SegmentReaderStats stats) throws Exception {
-    SequenceFile.Reader[] readers = SequenceFileOutputFormat.getReaders(getConf(), new Path(segment, CrawlDatum.GENERATE_DIR_NAME));
+
+  public void getStats(Path segment, final SegmentReaderStats stats)
+      throws Exception {
+    SequenceFile.Reader[] readers = SequenceFileOutputFormat.getReaders(
+        getConf(), new Path(segment, CrawlDatum.GENERATE_DIR_NAME));
     long cnt = 0L;
     Text key = new Text();
     for (int i = 0; i < readers.length; i++) {
-      while (readers[i].next(key)) cnt++;
+      while (readers[i].next(key))
+        cnt++;
       readers[i].close();
     }
     stats.generated = cnt;
@@ -466,12 +515,15 @@
       long start = Long.MAX_VALUE;
       long end = Long.MIN_VALUE;
       CrawlDatum value = new CrawlDatum();
-      MapFile.Reader[] mreaders = MapFileOutputFormat.getReaders(fs, fetchDir, getConf());
+      MapFile.Reader[] mreaders = MapFileOutputFormat.getReaders(fs, fetchDir,
+          getConf());
       for (int i = 0; i < mreaders.length; i++) {
         while (mreaders[i].next(key, value)) {
           cnt++;
-          if (value.getFetchTime() < start) start = value.getFetchTime();
-          if (value.getFetchTime() > end) end = value.getFetchTime();
+          if (value.getFetchTime() < start)
+            start = value.getFetchTime();
+          if (value.getFetchTime() > end)
+            end = value.getFetchTime();
         }
         mreaders[i].close();
       }
@@ -484,11 +536,13 @@
       cnt = 0L;
       long errors = 0L;
       ParseData value = new ParseData();
-      MapFile.Reader[] mreaders = MapFileOutputFormat.getReaders(fs, parseDir, getConf());
+      MapFile.Reader[] mreaders = MapFileOutputFormat.getReaders(fs, parseDir,
+          getConf());
       for (int i = 0; i < mreaders.length; i++) {
         while (mreaders[i].next(key, value)) {
           cnt++;
-          if (!value.getStatus().isSuccess()) errors++;
+          if (!value.getStatus().isSuccess())
+            errors++;
         }
         mreaders[i].close();
       }
@@ -496,7 +550,7 @@
       stats.parseErrors = errors;
     }
   }
-  
+
   private static final int MODE_DUMP = 0;
 
   private static final int MODE_LIST = 1;
@@ -513,7 +567,8 @@
       mode = MODE_DUMP;
     else if (args[0].equals("-list"))
       mode = MODE_LIST;
-    else if (args[0].equals("-get")) mode = MODE_GET;
+    else if (args[0].equals("-get"))
+      mode = MODE_GET;
 
     boolean co = true;
     boolean fe = true;
@@ -545,63 +600,69 @@
     }
     Configuration conf = NutchConfiguration.create();
     final FileSystem fs = FileSystem.get(conf);
-    SegmentReader segmentReader = new SegmentReader(conf, co, fe, ge, pa, pd, pt);
+    SegmentReader segmentReader = new SegmentReader(conf, co, fe, ge, pa, pd,
+        pt);
     // collect required args
     switch (mode) {
-      case MODE_DUMP:
-        String input = args[1];
-        if (input == null) {
-          System.err.println("Missing required argument: <segment_dir>");
-          usage();
-          return;
-        }
-        String output = args.length > 2 ? args[2] : null;
-        if (output == null) {
-          System.err.println("Missing required argument: <output>");
-          usage();
-          return;
-        }
-        segmentReader.dump(new Path(input), new Path(output));
+    case MODE_DUMP:
+      String input = args[1];
+      if (input == null) {
+        System.err.println("Missing required argument: <segment_dir>");
+        usage();
         return;
-      case MODE_LIST:
-        ArrayList<Path> dirs = new ArrayList<Path>();
-        for (int i = 1; i < args.length; i++) {
-          if (args[i] == null) continue;
-          if (args[i].equals("-dir")) {
-            Path dir = new Path(args[++i]);
-            FileStatus[] fstats = fs.listStatus(dir, HadoopFSUtil.getPassDirectoriesFilter(fs));
-            Path[] files = HadoopFSUtil.getPaths(fstats);
-            if (files != null && files.length > 0) {
-              dirs.addAll(Arrays.asList(files));
-            }
-          } else dirs.add(new Path(args[i]));
-        }
-        segmentReader.list(dirs, new OutputStreamWriter(System.out, "UTF-8"));
+      }
+      String output = args.length > 2 ? args[2] : null;
+      if (output == null) {
+        System.err.println("Missing required argument: <output>");
+        usage();
         return;
-      case MODE_GET:
-        input = args[1];
-        if (input == null) {
-          System.err.println("Missing required argument: <segment_dir>");
-          usage();
-          return;
-        }
-        String key = args.length > 2 ? args[2] : null;
-        if (key == null) {
-          System.err.println("Missing required argument: <keyValue>");
-          usage();
-          return;
-        }
-        segmentReader.get(new Path(input), new Text(key), new OutputStreamWriter(System.out, "UTF-8"), new HashMap<String, List<Writable>>());
+      }
+      segmentReader.dump(new Path(input), new Path(output));
+      return;
+    case MODE_LIST:
+      ArrayList<Path> dirs = new ArrayList<Path>();
+      for (int i = 1; i < args.length; i++) {
+        if (args[i] == null)
+          continue;
+        if (args[i].equals("-dir")) {
+          Path dir = new Path(args[++i]);
+          FileStatus[] fstats = fs.listStatus(dir,
+              HadoopFSUtil.getPassDirectoriesFilter(fs));
+          Path[] files = HadoopFSUtil.getPaths(fstats);
+          if (files != null && files.length > 0) {
+            dirs.addAll(Arrays.asList(files));
+          }
+        } else
+          dirs.add(new Path(args[i]));
+      }
+      segmentReader.list(dirs, new OutputStreamWriter(System.out, "UTF-8"));
+      return;
+    case MODE_GET:
+      input = args[1];
+      if (input == null) {
+        System.err.println("Missing required argument: <segment_dir>");
+        usage();
         return;
-      default:
-        System.err.println("Invalid operation: " + args[0]);
+      }
+      String key = args.length > 2 ? args[2] : null;
+      if (key == null) {
+        System.err.println("Missing required argument: <keyValue>");
         usage();
         return;
+      }
+      segmentReader.get(new Path(input), new Text(key), new OutputStreamWriter(
+          System.out, "UTF-8"), new HashMap<String, List<Writable>>());
+      return;
+    default:
+      System.err.println("Invalid operation: " + args[0]);
+      usage();
+      return;
     }
   }
 
   private static void usage() {
-    System.err.println("Usage: SegmentReader (-dump ... | -list ... | -get ...) [general options]\n");
+    System.err
+        .println("Usage: SegmentReader (-dump ... | -list ... | -get ...) [general options]\n");
     System.err.println("* General options:");
     System.err.println("\t-nocontent\tignore content directory");
     System.err.println("\t-nofetch\tignore crawl_fetch directory");
@@ -610,21 +671,32 @@
     System.err.println("\t-noparsedata\tignore parse_data directory");
     System.err.println("\t-noparsetext\tignore parse_text directory");
     System.err.println();
-    System.err.println("* SegmentReader -dump <segment_dir> <output> [general options]");
-    System.err.println("  Dumps content of a <segment_dir> as a text file to <output>.\n");
+    System.err
+        .println("* SegmentReader -dump <segment_dir> <output> [general options]");
+    System.err
+        .println("  Dumps content of a <segment_dir> as a text file to <output>.\n");
     System.err.println("\t<segment_dir>\tname of the segment directory.");
-    System.err.println("\t<output>\tname of the (non-existent) output directory.");
+    System.err
+        .println("\t<output>\tname of the (non-existent) output directory.");
     System.err.println();
-    System.err.println("* SegmentReader -list (<segment_dir1> ... | -dir <segments>) [general options]");
-    System.err.println("  List a synopsis of segments in specified directories, or all segments in");
-    System.err.println("  a directory <segments>, and print it on System.out\n");
-    System.err.println("\t<segment_dir1> ...\tlist of segment directories to process");
-    System.err.println("\t-dir <segments>\t\tdirectory that contains multiple segments");
+    System.err
+        .println("* SegmentReader -list (<segment_dir1> ... | -dir <segments>) [general options]");
+    System.err
+        .println("  List a synopsis of segments in specified directories, or all segments in");
+    System.err
+        .println("  a directory <segments>, and print it on System.out\n");
+    System.err
+        .println("\t<segment_dir1> ...\tlist of segment directories to process");
+    System.err
+        .println("\t-dir <segments>\t\tdirectory that contains multiple segments");
     System.err.println();
-    System.err.println("* SegmentReader -get <segment_dir> <keyValue> [general options]");
-    System.err.println("  Get a specified record from a segment, and print it on System.out.\n");
+    System.err
+        .println("* SegmentReader -get <segment_dir> <keyValue> [general options]");
+    System.err
+        .println("  Get a specified record from a segment, and print it on System.out.\n");
     System.err.println("\t<segment_dir>\tname of the segment directory.");
     System.err.println("\t<keyValue>\tvalue of the key (url).");
-    System.err.println("\t\tNote: put double-quotes around strings with spaces.");
+    System.err
+        .println("\t\tNote: put double-quotes around strings with spaces.");
   }
 }
Index: src/java/org/apache/nutch/segment/SegmentPart.java
===================================================================
--- src/java/org/apache/nutch/segment/SegmentPart.java	(revision 1188252)
+++ src/java/org/apache/nutch/segment/SegmentPart.java	(working copy)
@@ -30,16 +30,16 @@
   public String segmentName;
   /** Name of the segment part (ie. one of subdirectories inside a segment). */
   public String partName;
-  
+
   public SegmentPart() {
-    
+
   }
-  
+
   public SegmentPart(String segmentName, String partName) {
     this.segmentName = segmentName;
     this.partName = partName;
   }
-  
+
   /**
    * Return a String representation of this class, in the form
    * "segmentName/partName".
@@ -47,23 +47,27 @@
   public String toString() {
     return segmentName + "/" + partName;
   }
-  
+
   /**
    * Create SegmentPart from a FileSplit.
+   * 
    * @param split
-   * @return A {@link SegmentPart} resultant from a 
-   * {@link FileSplit}.
+   * @return A {@link SegmentPart} resultant from a {@link FileSplit}.
    * @throws Exception
    */
   public static SegmentPart get(FileSplit split) throws IOException {
     return get(split.getPath().toString());
   }
-  
+
   /**
    * Create SegmentPart from a full path of a location inside any segment part.
-   * @param path full path into a segment part (may include "part-xxxxx" components)
+   * 
+   * @param path
+   *          full path into a segment part (may include "part-xxxxx"
+   *          components)
    * @return SegmentPart instance describing this part.
-   * @throws IOException if any required path components are missing.
+   * @throws IOException
+   *           if any required path components are missing.
    */
   public static SegmentPart get(String path) throws IOException {
     // find part name
@@ -87,12 +91,15 @@
     String segment = dir.substring(idx + 1);
     return new SegmentPart(segment, part);
   }
-  
+
   /**
    * Create SegmentPart from a String in format "segmentName/partName".
-   * @param string input String
+   * 
+   * @param string
+   *          input String
    * @return parsed instance of SegmentPart
-   * @throws IOException if "/" is missing.
+   * @throws IOException
+   *           if "/" is missing.
    */
   public static SegmentPart parse(String string) throws IOException {
     int idx = string.indexOf('/');
Index: src/java/org/apache/nutch/segment/SegmentMergeFilters.java
===================================================================
--- src/java/org/apache/nutch/segment/SegmentMergeFilters.java	(revision 1188252)
+++ src/java/org/apache/nutch/segment/SegmentMergeFilters.java	(working copy)
@@ -39,7 +39,8 @@
  * 
  */
 public class SegmentMergeFilters {
-  private static final Logger LOG = LoggerFactory.getLogger(SegmentMergeFilters.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(SegmentMergeFilters.class);
   private SegmentMergeFilter[] filters;
 
   public SegmentMergeFilters(Configuration conf) {
@@ -72,9 +73,7 @@
       if (!filter.filter(key, generateData, fetchData, sigData, content,
           parseData, parseText, linked)) {
         if (LOG.isTraceEnabled())
-          LOG
-              .trace("Key " + key + " dropped by "
-                  + filter.getClass().getName());
+          LOG.trace("Key " + key + " dropped by " + filter.getClass().getName());
         return false;
       }
     }
Index: src/java/org/apache/nutch/segment/ContentAsTextInputFormat.java
===================================================================
--- src/java/org/apache/nutch/segment/ContentAsTextInputFormat.java	(revision 1188252)
+++ src/java/org/apache/nutch/segment/ContentAsTextInputFormat.java	(working copy)
@@ -31,14 +31,14 @@
 
 /**
  * An input format that takes Nutch Content objects and converts them to text
- * while converting newline endings to spaces.  This format is useful for working
+ * while converting newline endings to spaces. This format is useful for working
  * with Nutch content objects in Hadoop Streaming with other languages.
  */
-public class ContentAsTextInputFormat
-  extends SequenceFileInputFormat<Text, Text> {
+public class ContentAsTextInputFormat extends
+    SequenceFileInputFormat<Text, Text> {
 
-  private static class ContentAsTextRecordReader
-    implements RecordReader<Text, Text> {
+  private static class ContentAsTextRecordReader implements
+      RecordReader<Text, Text> {
 
     private final SequenceFileRecordReader<Text, Content> sequenceFileRecordReader;
 
@@ -46,11 +46,11 @@
     private Content innerValue;
 
     public ContentAsTextRecordReader(Configuration conf, FileSplit split)
-      throws IOException {
+        throws IOException {
       sequenceFileRecordReader = new SequenceFileRecordReader<Text, Content>(
-        conf, split);
-      innerKey = (Text)sequenceFileRecordReader.createKey();
-      innerValue = (Content)sequenceFileRecordReader.createValue();
+          conf, split);
+      innerKey = (Text) sequenceFileRecordReader.createKey();
+      innerValue = (Content) sequenceFileRecordReader.createValue();
     }
 
     public Text createKey() {
@@ -61,9 +61,8 @@
       return new Text();
     }
 
-    public synchronized boolean next(Text key, Text value)
-      throws IOException {
-      
+    public synchronized boolean next(Text key, Text value) throws IOException {
+
       // convert the content object to text
       Text tKey = key;
       Text tValue = value;
@@ -72,26 +71,23 @@
       }
       tKey.set(innerKey.toString());
       String contentAsStr = new String(innerValue.getContent());
-      
+
       // replace new line endings with spaces
       contentAsStr = contentAsStr.replaceAll("\n", " ");
       value.set(contentAsStr);
-     
+
       return true;
     }
 
-    public float getProgress()
-      throws IOException {
+    public float getProgress() throws IOException {
       return sequenceFileRecordReader.getProgress();
     }
 
-    public synchronized long getPos()
-      throws IOException {
+    public synchronized long getPos() throws IOException {
       return sequenceFileRecordReader.getPos();
     }
 
-    public synchronized void close()
-      throws IOException {
+    public synchronized void close() throws IOException {
       sequenceFileRecordReader.close();
     }
   }
@@ -101,10 +97,9 @@
   }
 
   public RecordReader<Text, Text> getRecordReader(InputSplit split,
-    JobConf job, Reporter reporter)
-    throws IOException {
+      JobConf job, Reporter reporter) throws IOException {
 
     reporter.setStatus(split.toString());
-    return new ContentAsTextRecordReader(job, (FileSplit)split);
+    return new ContentAsTextRecordReader(job, (FileSplit) split);
   }
 }
Index: src/java/org/apache/nutch/scoring/webgraph/WebGraph.java
===================================================================
--- src/java/org/apache/nutch/scoring/webgraph/WebGraph.java	(revision 1188252)
+++ src/java/org/apache/nutch/scoring/webgraph/WebGraph.java	(working copy)
@@ -89,9 +89,7 @@
  * complete before the WebGraph is updated and some type of analysis, such as
  * LinkRank, is run to update scores in the Node database in a stable fashion.
  */
-public class WebGraph
-  extends Configured
-  implements Tool {
+public class WebGraph extends Configured implements Tool {
 
   public static final Logger LOG = LoggerFactory.getLogger(WebGraph.class);
   public static final String LOCK_NAME = ".locked";
@@ -104,10 +102,9 @@
    * by domain and host can be ignored. The number of Outlinks out to a given
    * page or domain can also be limited.
    */
-  public static class OutlinkDb
-    extends Configured
-    implements Mapper<Text, Writable, Text, LinkDatum>,
-    Reducer<Text, LinkDatum, Text, LinkDatum> {
+  public static class OutlinkDb extends Configured implements
+      Mapper<Text, Writable, Text, LinkDatum>,
+      Reducer<Text, LinkDatum, Text, LinkDatum> {
 
     // ignoring internal domains, internal hosts
     private boolean ignoreDomain = true;
@@ -124,7 +121,8 @@
     /**
      * Normalizes and trims extra whitespace from the given url.
      * 
-     * @param url The url to normalize.
+     * @param url
+     *          The url to normalize.
      * 
      * @return The normalized url.
      */
@@ -136,10 +134,9 @@
 
           // normalize and trim the url
           normalized = urlNormalizers.normalize(url,
-            URLNormalizers.SCOPE_DEFAULT);
+              URLNormalizers.SCOPE_DEFAULT);
           normalized = normalized.trim();
-        }
-        catch (Exception e) {
+        } catch (Exception e) {
           LOG.warn("Skipping " + url + ":" + e);
           normalized = null;
         }
@@ -151,7 +148,8 @@
      * Returns the fetch time from the parse data or the current system time if
      * the fetch time doesn't exist.
      * 
-     * @param data The parse data.
+     * @param data
+     *          The parse data.
      * 
      * @return The fetch time as a long.
      */
@@ -164,8 +162,7 @@
 
         // get the fetch time from the parse data
         fetchTime = Long.parseLong(fetchTimeStr);
-      }
-      catch (Exception e) {
+      } catch (Exception e) {
         fetchTime = System.currentTimeMillis();
       }
       return fetchTime;
@@ -201,8 +198,8 @@
      * maps out new LinkDatum objects from new crawls ParseData.
      */
     public void map(Text key, Writable value,
-      OutputCollector<Text, LinkDatum> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, LinkDatum> output, Reporter reporter)
+        throws IOException {
 
       // normalize url, stop processing if null
       String url = normalizeUrl(key.toString());
@@ -214,7 +211,7 @@
 
         // get the parse data and the outlinks from the parse data, along with
         // the fetch time for those links
-        ParseData data = (ParseData)value;
+        ParseData data = (ParseData) value;
         long fetchTime = getFetchTime(data);
         Outlink[] outlinkAr = data.getOutlinks();
         Map<String, String> outlinkMap = new LinkedHashMap<String, String>();
@@ -230,7 +227,7 @@
             // url is existing
             boolean existingUrl = outlinkMap.containsKey(toUrl);
             if (toUrl != null
-              && (!existingUrl || (existingUrl && outlinkMap.get(toUrl) == null))) {
+                && (!existingUrl || (existingUrl && outlinkMap.get(toUrl) == null))) {
               outlinkMap.put(toUrl, outlink.getAnchor());
             }
           }
@@ -242,17 +239,16 @@
           LinkDatum datum = new LinkDatum(outlinkUrl, anchor, fetchTime);
           output.collect(key, datum);
         }
-      }
-      else if (value instanceof LinkDatum) {
+      } else if (value instanceof LinkDatum) {
 
         // collect existing outlinks from existing OutlinkDb
-        output.collect(key, (LinkDatum)value);
+        output.collect(key, (LinkDatum) value);
       }
     }
 
     public void reduce(Text key, Iterator<LinkDatum> values,
-      OutputCollector<Text, LinkDatum> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, LinkDatum> output, Reporter reporter)
+        throws IOException {
 
       // aggregate all outlinks, get the most recent timestamp for a fetch
       // which should be the timestamp for all of the most recent outlinks
@@ -266,7 +262,7 @@
         if (mostRecent == 0L || mostRecent < timestamp) {
           mostRecent = timestamp;
         }
-        outlinkList.add((LinkDatum)WritableUtils.clone(next, conf));
+        outlinkList.add((LinkDatum) WritableUtils.clone(next, conf));
       }
 
       // get the url, domain, and host for the url
@@ -291,10 +287,11 @@
         // outlinks must be the most recent and conform to internal url and
         // limiting rules, if it does collect it
         if (datum.getTimestamp() == mostRecent
-          && (!limitPages || (limitPages && !pages.contains(toPage)))
-          && (!limitDomains || (limitDomains && !domains.contains(toDomain)))
-          && (!ignoreHost || (ignoreHost && !toHost.equalsIgnoreCase(host)))
-          && (!ignoreDomain || (ignoreDomain && !toDomain.equalsIgnoreCase(domain)))) {
+            && (!limitPages || (limitPages && !pages.contains(toPage)))
+            && (!limitDomains || (limitDomains && !domains.contains(toDomain)))
+            && (!ignoreHost || (ignoreHost && !toHost.equalsIgnoreCase(host)))
+            && (!ignoreDomain || (ignoreDomain && !toDomain
+                .equalsIgnoreCase(domain)))) {
           output.collect(key, datum);
           pages.add(toPage);
           domains.add(toDomain);
@@ -311,9 +308,8 @@
    * OutlinkDb LinkDatum objects and are regenerated each time the WebGraph is
    * updated.
    */
-  private static class InlinkDb
-    extends Configured
-    implements Mapper<Text, LinkDatum, Text, LinkDatum> {
+  private static class InlinkDb extends Configured implements
+      Mapper<Text, LinkDatum, Text, LinkDatum> {
 
     private JobConf conf;
     private long timestamp;
@@ -348,8 +344,8 @@
      * new system timestamp, type and to and from url switched.
      */
     public void map(Text key, LinkDatum datum,
-      OutputCollector<Text, LinkDatum> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, LinkDatum> output, Reporter reporter)
+        throws IOException {
 
       // get the to and from url and the anchor
       String fromUrl = key.toString();
@@ -367,9 +363,8 @@
    * Creates the Node database which consists of the number of in and outlinks
    * for each url and a score slot for analysis programs such as LinkRank.
    */
-  private static class NodeDb
-    extends Configured
-    implements Reducer<Text, LinkDatum, Text, Node> {
+  private static class NodeDb extends Configured implements
+      Reducer<Text, LinkDatum, Text, Node> {
 
     private JobConf conf;
 
@@ -401,8 +396,8 @@
      * score of 0.0 for each url (node) in the webgraph.
      */
     public void reduce(Text key, Iterator<LinkDatum> values,
-      OutputCollector<Text, Node> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, Node> output, Reporter reporter)
+        throws IOException {
 
       Node node = new Node();
       int numInlinks = 0;
@@ -413,8 +408,7 @@
         LinkDatum next = values.next();
         if (next.getLinkType() == LinkDatum.INLINK) {
           numInlinks++;
-        }
-        else if (next.getLinkType() == LinkDatum.OUTLINK) {
+        } else if (next.getLinkType() == LinkDatum.OUTLINK) {
           numOutlinks++;
         }
       }
@@ -432,14 +426,17 @@
    * Node. If a current WebGraph exists then it is updated, if it doesn't exist
    * then a new WebGraph database is created.
    * 
-   * @param webGraphDb The WebGraph to create or update.
-   * @param segments The array of segments used to update the WebGraph. Newer
-   * segments and fetch times will overwrite older segments.
+   * @param webGraphDb
+   *          The WebGraph to create or update.
+   * @param segments
+   *          The array of segments used to update the WebGraph. Newer segments
+   *          and fetch times will overwrite older segments.
    * 
-   * @throws IOException If an error occurs while processing the WebGraph.
+   * @throws IOException
+   *           If an error occurs while processing the WebGraph.
    */
   public void createWebGraph(Path webGraphDb, Path[] segments)
-    throws IOException {
+      throws IOException {
 
     SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
     long start = System.currentTimeMillis();
@@ -467,7 +464,7 @@
     }
 
     Path tempOutlinkDb = new Path(outlinkDb + "-"
-      + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
+        + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
     JobConf outlinkJob = new NutchJob(conf);
     outlinkJob.setJobName("Outlinkdb: " + outlinkDb);
 
@@ -503,9 +500,8 @@
       LOG.info("OutlinkDb: installing " + outlinkDb);
       FSUtils.replace(fs, outlinkDb, tempOutlinkDb, true);
       LOG.info("OutlinkDb: finished");
-    }
-    catch (IOException e) {
-      
+    } catch (IOException e) {
+
       // remove lock file and and temporary directory if an error occurs
       LockUtil.removeLockFile(fs, lock);
       if (fs.exists(tempOutlinkDb)) {
@@ -518,7 +514,7 @@
     // inlink and temp link database paths
     Path inlinkDb = new Path(webGraphDb, INLINK_DIR);
     Path tempInlinkDb = new Path(inlinkDb + "-"
-      + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
+        + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
 
     JobConf inlinkJob = new NutchJob(conf);
     inlinkJob.setJobName("Inlinkdb " + inlinkDb);
@@ -534,21 +530,20 @@
     inlinkJob.setOutputFormat(MapFileOutputFormat.class);
 
     try {
-      
+
       // run the inlink and replace any old with new
       LOG.info("InlinkDb: running");
       JobClient.runJob(inlinkJob);
       LOG.info("InlinkDb: installing " + inlinkDb);
       FSUtils.replace(fs, inlinkDb, tempInlinkDb, true);
       LOG.info("InlinkDb: finished");
-    }
-    catch (IOException e) {
-      
+    } catch (IOException e) {
+
       // remove lock file and and temporary directory if an error occurs
       LockUtil.removeLockFile(fs, lock);
       if (fs.exists(tempInlinkDb)) {
         fs.delete(tempInlinkDb, true);
-      }      
+      }
       LOG.error(StringUtils.stringifyException(e));
       throw e;
     }
@@ -556,7 +551,7 @@
     // node and temp node database paths
     Path nodeDb = new Path(webGraphDb, NODE_DIR);
     Path tempNodeDb = new Path(nodeDb + "-"
-      + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
+        + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
 
     JobConf nodeJob = new NutchJob(conf);
     nodeJob.setJobName("NodeDb " + nodeDb);
@@ -574,21 +569,20 @@
     nodeJob.setOutputFormat(MapFileOutputFormat.class);
 
     try {
-      
+
       // run the node job and replace old nodedb with new
       LOG.info("NodeDb: running");
       JobClient.runJob(nodeJob);
       LOG.info("NodeDb: installing " + nodeDb);
       FSUtils.replace(fs, nodeDb, tempNodeDb, true);
       LOG.info("NodeDb: finished");
-    }
-    catch (IOException e) {
-      
+    } catch (IOException e) {
+
       // remove lock file and and temporary directory if an error occurs
       LockUtil.removeLockFile(fs, lock);
       if (fs.exists(tempNodeDb)) {
         fs.delete(tempNodeDb, true);
-      }      
+      }
       LOG.error(StringUtils.stringifyException(e));
       throw e;
     }
@@ -597,11 +591,11 @@
     LockUtil.removeLockFile(fs, lock);
 
     long end = System.currentTimeMillis();
-    LOG.info("WebGraphDb: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    LOG.info("WebGraphDb: finished at " + sdf.format(end) + ", elapsed: "
+        + TimingUtil.elapsedTime(start, end));
   }
 
-  public static void main(String[] args)
-    throws Exception {
+  public static void main(String[] args) throws Exception {
     int res = ToolRunner.run(NutchConfiguration.create(), new WebGraph(), args);
     System.exit(res);
   }
@@ -609,18 +603,17 @@
   /**
    * Parses command link arguments and runs the WebGraph jobs.
    */
-  public int run(String[] args)
-    throws Exception {
+  public int run(String[] args) throws Exception {
 
     Options options = new Options();
-    Option helpOpts = OptionBuilder.withArgName("help").withDescription(
-      "show this help message").create("help");
-    Option webGraphDbOpts = OptionBuilder.withArgName("webgraphdb").hasArg().withDescription(
-      "the web graph database to use").create("webgraphdb");
-    Option segOpts = OptionBuilder.withArgName("segment").hasArgs().withDescription(
-      "the segment(s) to use").create("segment");
-    Option segDirOpts = OptionBuilder.withArgName("segmentDir").hasArgs().withDescription(
-      "the segment directory to use").create("segmentDir");
+    Option helpOpts = OptionBuilder.withArgName("help")
+        .withDescription("show this help message").create("help");
+    Option webGraphDbOpts = OptionBuilder.withArgName("webgraphdb").hasArg()
+        .withDescription("the web graph database to use").create("webgraphdb");
+    Option segOpts = OptionBuilder.withArgName("segment").hasArgs()
+        .withDescription("the segment(s) to use").create("segment");
+    Option segDirOpts = OptionBuilder.withArgName("segmentDir").hasArgs()
+        .withDescription("the segment directory to use").create("segmentDir");
     options.addOption(helpOpts);
     options.addOption(webGraphDbOpts);
     options.addOption(segOpts);
@@ -631,8 +624,7 @@
 
       CommandLine line = parser.parse(options, args);
       if (line.hasOption("help") || !line.hasOption("webgraphdb")
-        || (!line.hasOption("segment") && !line.hasOption("segmentDir"))
-) {
+          || (!line.hasOption("segment") && !line.hasOption("segmentDir"))) {
         HelpFormatter formatter = new HelpFormatter();
         formatter.printHelp("WebGraph", options);
         return -1;
@@ -655,14 +647,14 @@
       if (line.hasOption("segmentDir")) {
         Path dir = new Path(line.getOptionValue("segmentDir"));
         FileSystem fs = dir.getFileSystem(getConf());
-        FileStatus[] fstats = fs.listStatus(dir, HadoopFSUtil.getPassDirectoriesFilter(fs));
+        FileStatus[] fstats = fs.listStatus(dir,
+            HadoopFSUtil.getPassDirectoriesFilter(fs));
         segPaths = HadoopFSUtil.getPaths(fstats);
       }
 
       createWebGraph(new Path(webGraphDb), segPaths);
       return 0;
-    }
-    catch (Exception e) {
+    } catch (Exception e) {
       LOG.error("WebGraph: " + StringUtils.stringifyException(e));
       return -2;
     }
Index: src/java/org/apache/nutch/scoring/webgraph/ScoreUpdater.java
===================================================================
--- src/java/org/apache/nutch/scoring/webgraph/ScoreUpdater.java	(revision 1188252)
+++ src/java/org/apache/nutch/scoring/webgraph/ScoreUpdater.java	(working copy)
@@ -58,13 +58,12 @@
 
 /**
  * Updates the score from the WebGraph node database into the crawl database.
- * Any score that is not in the node database is set to the clear score in the 
+ * Any score that is not in the node database is set to the clear score in the
  * crawl database.
  */
-public class ScoreUpdater
-  extends Configured
-  implements Tool, Mapper<Text, Writable, Text, ObjectWritable>,
-  Reducer<Text, ObjectWritable, Text, CrawlDatum> {
+public class ScoreUpdater extends Configured implements Tool,
+    Mapper<Text, Writable, Text, ObjectWritable>,
+    Reducer<Text, ObjectWritable, Text, CrawlDatum> {
 
   public static final Logger LOG = LoggerFactory.getLogger(ScoreUpdater.class);
 
@@ -80,8 +79,8 @@
    * Changes input into ObjectWritables.
    */
   public void map(Text key, Writable value,
-    OutputCollector<Text, ObjectWritable> output, Reporter reporter)
-    throws IOException {
+      OutputCollector<Text, ObjectWritable> output, Reporter reporter)
+      throws IOException {
 
     ObjectWritable objWrite = new ObjectWritable();
     objWrite.set(value);
@@ -93,8 +92,8 @@
    * with a cleared score.
    */
   public void reduce(Text key, Iterator<ObjectWritable> values,
-    OutputCollector<Text, CrawlDatum> output, Reporter reporter)
-    throws IOException {
+      OutputCollector<Text, CrawlDatum> output, Reporter reporter)
+      throws IOException {
 
     String url = key.toString();
     Node node = null;
@@ -106,34 +105,31 @@
       ObjectWritable next = values.next();
       Object value = next.get();
       if (value instanceof Node) {
-        node = (Node)value;
+        node = (Node) value;
+      } else if (value instanceof CrawlDatum) {
+        datum = (CrawlDatum) value;
       }
-      else if (value instanceof CrawlDatum) {
-        datum = (CrawlDatum)value;
-      }
     }
 
-    // datum should never be null, could happen if somehow the url was 
+    // datum should never be null, could happen if somehow the url was
     // normalized or changed after being pulled from the crawldb
     if (datum != null) {
 
       if (node != null) {
-        
+
         // set the inlink score in the nodedb
         float inlinkScore = node.getInlinkScore();
         datum.setScore(inlinkScore);
         LOG.debug(url + ": setting to score " + inlinkScore);
-      }
-      else {
-        
+      } else {
+
         // clear out the score in the crawldb
         datum.setScore(clearScore);
         LOG.debug(url + ": setting to clear score of " + clearScore);
       }
 
       output.collect(key, datum);
-    }
-    else {
+    } else {
       LOG.debug(url + ": no datum");
     }
   }
@@ -142,16 +138,18 @@
   }
 
   /**
-   * Updates the inlink score in the web graph node databsae into the crawl 
+   * Updates the inlink score in the web graph node databsae into the crawl
    * database.
    * 
-   * @param crawlDb The crawl database to update
-   * @param webGraphDb The webgraph database to use.
+   * @param crawlDb
+   *          The crawl database to update
+   * @param webGraphDb
+   *          The webgraph database to use.
    * 
-   * @throws IOException If an error occurs while updating the scores.
+   * @throws IOException
+   *           If an error occurs while updating the scores.
    */
-  public void update(Path crawlDb, Path webGraphDb)
-    throws IOException {
+  public void update(Path crawlDb, Path webGraphDb) throws IOException {
 
     SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
     long start = System.currentTimeMillis();
@@ -164,8 +162,8 @@
     LOG.info("Running crawldb update " + crawlDb);
     Path nodeDb = new Path(webGraphDb, WebGraph.NODE_DIR);
     Path crawlDbCurrent = new Path(crawlDb, CrawlDb.CURRENT_NAME);
-    Path newCrawlDb = new Path(crawlDb,
-      Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
+    Path newCrawlDb = new Path(crawlDb, Integer.toString(new Random()
+        .nextInt(Integer.MAX_VALUE)));
 
     // run the updater job outputting to the temp crawl database
     JobConf updater = new NutchJob(conf);
@@ -184,10 +182,9 @@
 
     try {
       JobClient.runJob(updater);
-    }
-    catch (IOException e) {
+    } catch (IOException e) {
       LOG.error(StringUtils.stringifyException(e));
-      
+
       // remove the temp crawldb on error
       if (fs.exists(newCrawlDb)) {
         fs.delete(newCrawlDb, true);
@@ -200,29 +197,28 @@
     CrawlDb.install(updater, crawlDb);
 
     long end = System.currentTimeMillis();
-    LOG.info("ScoreUpdater: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    LOG.info("ScoreUpdater: finished at " + sdf.format(end) + ", elapsed: "
+        + TimingUtil.elapsedTime(start, end));
   }
 
-  public static void main(String[] args)
-    throws Exception {
+  public static void main(String[] args) throws Exception {
     int res = ToolRunner.run(NutchConfiguration.create(), new ScoreUpdater(),
-      args);
+        args);
     System.exit(res);
   }
 
   /**
    * Runs the ScoreUpdater tool.
    */
-  public int run(String[] args)
-    throws Exception {
+  public int run(String[] args) throws Exception {
 
     Options options = new Options();
-    Option helpOpts = OptionBuilder.withArgName("help").withDescription(
-      "show this help message").create("help");
-    Option crawlDbOpts = OptionBuilder.withArgName("crawldb").hasArg().withDescription(
-      "the crawldb to use").create("crawldb");
-    Option webGraphOpts = OptionBuilder.withArgName("webgraphdb").hasArg().withDescription(
-      "the webgraphdb to use").create("webgraphdb");
+    Option helpOpts = OptionBuilder.withArgName("help")
+        .withDescription("show this help message").create("help");
+    Option crawlDbOpts = OptionBuilder.withArgName("crawldb").hasArg()
+        .withDescription("the crawldb to use").create("crawldb");
+    Option webGraphOpts = OptionBuilder.withArgName("webgraphdb").hasArg()
+        .withDescription("the webgraphdb to use").create("webgraphdb");
     options.addOption(helpOpts);
     options.addOption(crawlDbOpts);
     options.addOption(webGraphOpts);
@@ -232,7 +228,7 @@
 
       CommandLine line = parser.parse(options, args);
       if (line.hasOption("help") || !line.hasOption("webgraphdb")
-        || !line.hasOption("crawldb")) {
+          || !line.hasOption("crawldb")) {
         HelpFormatter formatter = new HelpFormatter();
         formatter.printHelp("ScoreUpdater", options);
         return -1;
@@ -242,8 +238,7 @@
       String webGraphDb = line.getOptionValue("webgraphdb");
       update(new Path(crawlDb), new Path(webGraphDb));
       return 0;
-    }
-    catch (Exception e) {
+    } catch (Exception e) {
       LOG.error("ScoreUpdater: " + StringUtils.stringifyException(e));
       return -1;
     }
Index: src/java/org/apache/nutch/scoring/webgraph/NodeDumper.java
===================================================================
--- src/java/org/apache/nutch/scoring/webgraph/NodeDumper.java	(revision 1188252)
+++ src/java/org/apache/nutch/scoring/webgraph/NodeDumper.java	(working copy)
@@ -61,16 +61,12 @@
  * have been run. For link analysis score a program such as LinkRank will need
  * to have been run which updates the NodeDb of the WebGraph.
  */
-public class NodeDumper
-  extends Configured
-  implements Tool {
+public class NodeDumper extends Configured implements Tool {
 
   public static final Logger LOG = LoggerFactory.getLogger(NodeDumper.class);
 
   private static enum DumpType {
-    INLINKS,
-    OUTLINKS,
-    SCORES
+    INLINKS, OUTLINKS, SCORES
   }
 
   /**
@@ -78,10 +74,9 @@
    * on the command line, the top urls could be for number of inlinks, for
    * number of outlinks, or for link analysis score.
    */
-  public static class Sorter
-    extends Configured
-    implements Mapper<Text, Node, FloatWritable, Text>,
-    Reducer<FloatWritable, Text, Text, FloatWritable> {
+  public static class Sorter extends Configured implements
+      Mapper<Text, Node, FloatWritable, Text>,
+      Reducer<FloatWritable, Text, Text, FloatWritable> {
 
     private JobConf conf;
     private boolean inlinks = false;
@@ -109,17 +104,15 @@
      * score.
      */
     public void map(Text key, Node node,
-      OutputCollector<FloatWritable, Text> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<FloatWritable, Text> output, Reporter reporter)
+        throws IOException {
 
       float number = 0;
       if (inlinks) {
         number = node.getNumInlinks();
-      }
-      else if (outlinks) {
+      } else if (outlinks) {
         number = node.getNumOutlinks();
-      }
-      else {
+      } else {
         number = node.getInlinkScore();
       }
 
@@ -131,8 +124,8 @@
      * Flips and collects the url and numeric sort value.
      */
     public void reduce(FloatWritable key, Iterator<Text> values,
-      OutputCollector<Text, FloatWritable> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, FloatWritable> output, Reporter reporter)
+        throws IOException {
 
       // take the negative of the negative to get original value, sometimes 0
       // value are a little weird
@@ -142,7 +135,7 @@
 
       // collect all values, this time with the url as key
       while (values.hasNext() && (numCollected < topn)) {
-        Text url = (Text)WritableUtils.clone(values.next(), conf);
+        Text url = (Text) WritableUtils.clone(values.next(), conf);
         output.collect(url, number);
         numCollected++;
       }
@@ -152,15 +145,17 @@
   /**
    * Runs the process to dump the top urls out to a text file.
    * 
-   * @param webGraphDb The WebGraph from which to pull values.
+   * @param webGraphDb
+   *          The WebGraph from which to pull values.
    * 
    * @param topN
    * @param output
    * 
-   * @throws IOException If an error occurs while dumping the top values.
+   * @throws IOException
+   *           If an error occurs while dumping the top values.
    */
-  public void dumpNodes(Path webGraphDb, DumpType type, long topN, Path output, boolean asEff)
-    throws IOException {
+  public void dumpNodes(Path webGraphDb, DumpType type, long topN, Path output,
+      boolean asEff) throws IOException {
 
     SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
     long start = System.currentTimeMillis();
@@ -194,45 +189,44 @@
     try {
       LOG.info("NodeDumper: running");
       JobClient.runJob(dumper);
-    }
-    catch (IOException e) {
+    } catch (IOException e) {
       LOG.error(StringUtils.stringifyException(e));
       throw e;
     }
     long end = System.currentTimeMillis();
-    LOG.info("NodeDumper: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    LOG.info("NodeDumper: finished at " + sdf.format(end) + ", elapsed: "
+        + TimingUtil.elapsedTime(start, end));
   }
 
-  public static void main(String[] args)
-    throws Exception {
+  public static void main(String[] args) throws Exception {
     int res = ToolRunner.run(NutchConfiguration.create(), new NodeDumper(),
-      args);
+        args);
     System.exit(res);
   }
 
   /**
    * Runs the node dumper tool.
    */
-  public int run(String[] args)
-    throws Exception {
+  public int run(String[] args) throws Exception {
 
     Options options = new Options();
-    Option helpOpts = OptionBuilder.withArgName("help").withDescription(
-      "show this help message").create("help");
-    Option webGraphDbOpts = OptionBuilder.withArgName("webgraphdb").hasArg().withDescription(
-      "the web graph database to use").create("webgraphdb");
-    Option inlinkOpts = OptionBuilder.withArgName("inlinks").withDescription(
-      "show highest inlinks").create("inlinks");
-    Option outlinkOpts = OptionBuilder.withArgName("outlinks").withDescription(
-      "show highest outlinks").create("outlinks");
-    Option scoreOpts = OptionBuilder.withArgName("scores").withDescription(
-      "show highest scores").create("scores");
-    Option topNOpts = OptionBuilder.withArgName("topn").hasOptionalArg().withDescription(
-      "show topN scores").create("topn");
-    Option outputOpts = OptionBuilder.withArgName("output").hasArg().withDescription(
-      "the output directory to use").create("output");
-    Option effOpts = OptionBuilder.withArgName("asEff").withDescription(
-      "Solr ExternalFileField compatible output format").create("asEff");
+    Option helpOpts = OptionBuilder.withArgName("help")
+        .withDescription("show this help message").create("help");
+    Option webGraphDbOpts = OptionBuilder.withArgName("webgraphdb").hasArg()
+        .withDescription("the web graph database to use").create("webgraphdb");
+    Option inlinkOpts = OptionBuilder.withArgName("inlinks")
+        .withDescription("show highest inlinks").create("inlinks");
+    Option outlinkOpts = OptionBuilder.withArgName("outlinks")
+        .withDescription("show highest outlinks").create("outlinks");
+    Option scoreOpts = OptionBuilder.withArgName("scores")
+        .withDescription("show highest scores").create("scores");
+    Option topNOpts = OptionBuilder.withArgName("topn").hasOptionalArg()
+        .withDescription("show topN scores").create("topn");
+    Option outputOpts = OptionBuilder.withArgName("output").hasArg()
+        .withDescription("the output directory to use").create("output");
+    Option effOpts = OptionBuilder.withArgName("asEff")
+        .withDescription("Solr ExternalFileField compatible output format")
+        .create("asEff");
     options.addOption(helpOpts);
     options.addOption(webGraphDbOpts);
     options.addOption(inlinkOpts);
@@ -256,21 +250,20 @@
       boolean inlinks = line.hasOption("inlinks");
       boolean outlinks = line.hasOption("outlinks");
       boolean scores = line.hasOption("scores");
-      long topN = (line.hasOption("topn")
-        ? Long.parseLong(line.getOptionValue("topn")) : Long.MAX_VALUE);
+      long topN = (line.hasOption("topn") ? Long.parseLong(line
+          .getOptionValue("topn")) : Long.MAX_VALUE);
 
       // get the correct dump type
       String output = line.getOptionValue("output");
-      DumpType type = (inlinks ? DumpType.INLINKS : outlinks
-        ? DumpType.OUTLINKS : DumpType.SCORES);
+      DumpType type = (inlinks ? DumpType.INLINKS
+          : outlinks ? DumpType.OUTLINKS : DumpType.SCORES);
 
       // Use ExternalFileField?
       boolean asEff = line.hasOption("asEff");
 
       dumpNodes(new Path(webGraphDb), type, topN, new Path(output), asEff);
       return 0;
-    }
-    catch (Exception e) {
+    } catch (Exception e) {
       LOG.error("NodeDumper: " + StringUtils.stringifyException(e));
       return -2;
     }
Index: src/java/org/apache/nutch/scoring/webgraph/Node.java
===================================================================
--- src/java/org/apache/nutch/scoring/webgraph/Node.java	(revision 1188252)
+++ src/java/org/apache/nutch/scoring/webgraph/Node.java	(working copy)
@@ -25,12 +25,11 @@
 
 /**
  * A class which holds the number of inlinks and outlinks for a given url along
- * with an inlink score from a link analysis program and any metadata.  
+ * with an inlink score from a link analysis program and any metadata.
  * 
  * The Node is the core unit of the NodeDb in the WebGraph.
  */
-public class Node
-  implements Writable {
+public class Node implements Writable {
 
   private int numInlinks = 0;
   private int numOutlinks = 0;
@@ -77,8 +76,7 @@
     this.metadata = metadata;
   }
 
-  public void readFields(DataInput in)
-    throws IOException {
+  public void readFields(DataInput in) throws IOException {
 
     numInlinks = in.readInt();
     numOutlinks = in.readInt();
@@ -87,8 +85,7 @@
     metadata.readFields(in);
   }
 
-  public void write(DataOutput out)
-    throws IOException {
+  public void write(DataOutput out) throws IOException {
 
     out.writeInt(numInlinks);
     out.writeInt(numOutlinks);
@@ -98,8 +95,8 @@
 
   public String toString() {
     return "num inlinks: " + numInlinks + ", num outlinks: " + numOutlinks
-      + ", inlink score: " + inlinkScore + ", outlink score: "
-      + getOutlinkScore() + ", metadata: " + metadata.toString();
+        + ", inlink score: " + inlinkScore + ", outlink score: "
+        + getOutlinkScore() + ", metadata: " + metadata.toString();
   }
 
 }
Index: src/java/org/apache/nutch/scoring/webgraph/LinkDatum.java
===================================================================
--- src/java/org/apache/nutch/scoring/webgraph/LinkDatum.java	(revision 1188252)
+++ src/java/org/apache/nutch/scoring/webgraph/LinkDatum.java	(working copy)
@@ -27,8 +27,7 @@
  * A class for holding link information including the url, anchor text, a score,
  * the timestamp of the link and a link type.
  */
-public class LinkDatum
-  implements Writable {
+public class LinkDatum implements Writable {
 
   public final static byte INLINK = 1;
   public final static byte OUTLINK = 2;
@@ -49,7 +48,8 @@
   /**
    * Creates a LinkDatum with a given url. Timestamp is set to current time.
    * 
-   * @param url The link url.
+   * @param url
+   *          The link url.
    */
   public LinkDatum(String url) {
     this(url, "", System.currentTimeMillis());
@@ -59,8 +59,10 @@
    * Creates a LinkDatum with a url and an anchor text. Timestamp is set to
    * current time.
    * 
-   * @param url The link url.
-   * @param anchor The link anchor text.
+   * @param url
+   *          The link url.
+   * @param anchor
+   *          The link anchor text.
    */
   public LinkDatum(String url, String anchor) {
     this(url, anchor, System.currentTimeMillis());
@@ -112,8 +114,7 @@
     this.linkType = linkType;
   }
 
-  public void readFields(DataInput in)
-    throws IOException {
+  public void readFields(DataInput in) throws IOException {
     url = Text.readString(in);
     anchor = Text.readString(in);
     score = in.readFloat();
@@ -121,8 +122,7 @@
     linkType = in.readByte();
   }
 
-  public void write(DataOutput out)
-    throws IOException {
+  public void write(DataOutput out) throws IOException {
     Text.writeString(out, url);
     Text.writeString(out, anchor != null ? anchor : "");
     out.writeFloat(score);
@@ -132,9 +132,9 @@
 
   public String toString() {
 
-    String type = (linkType == INLINK ? "inlink" : (linkType == OUTLINK)
-      ? "outlink" : "unknown");
+    String type = (linkType == INLINK ? "inlink"
+        : (linkType == OUTLINK) ? "outlink" : "unknown");
     return "url: " + url + ", anchor: " + anchor + ", score: " + score
-      + ", timestamp: " + timestamp + ", link type: " + type;
+        + ", timestamp: " + timestamp + ", link type: " + type;
   }
 }
Index: src/java/org/apache/nutch/scoring/webgraph/NodeReader.java
===================================================================
--- src/java/org/apache/nutch/scoring/webgraph/NodeReader.java	(revision 1188252)
+++ src/java/org/apache/nutch/scoring/webgraph/NodeReader.java	(working copy)
@@ -37,7 +37,7 @@
 import org.apache.nutch.util.NutchConfiguration;
 
 /**
- * Reads and prints to system out information for a single node from the NodeDb 
+ * Reads and prints to system out information for a single node from the NodeDb
  * in the WebGraph.
  */
 public class NodeReader extends Configured {
@@ -46,33 +46,35 @@
   private MapFile.Reader[] nodeReaders;
 
   public NodeReader() {
-    
+
   }
-  
+
   public NodeReader(Configuration conf) {
     super(conf);
   }
-  
+
   /**
    * Prints the content of the Node represented by the url to system out.
    * 
-   * @param webGraphDb The webgraph from which to get the node.
-   * @param url The url of the node.
+   * @param webGraphDb
+   *          The webgraph from which to get the node.
+   * @param url
+   *          The url of the node.
    * 
-   * @throws IOException If an error occurs while getting the node.
+   * @throws IOException
+   *           If an error occurs while getting the node.
    */
-  public void dumpUrl(Path webGraphDb, String url)
-    throws IOException {
+  public void dumpUrl(Path webGraphDb, String url) throws IOException {
 
     fs = FileSystem.get(getConf());
     nodeReaders = MapFileOutputFormat.getReaders(fs, new Path(webGraphDb,
-      WebGraph.NODE_DIR), getConf());
+        WebGraph.NODE_DIR), getConf());
 
     // open the readers, get the node, print out the info, and close the readers
     Text key = new Text(url);
     Node node = new Node();
     MapFileOutputFormat.getEntry(nodeReaders,
-      new HashPartitioner<Text, Node>(), key, node);
+        new HashPartitioner<Text, Node>(), key, node);
     System.out.println(url + ":");
     System.out.println("  inlink score: " + node.getInlinkScore());
     System.out.println("  outlink score: " + node.getOutlinkScore());
@@ -82,20 +84,19 @@
   }
 
   /**
-   * Runs the NodeReader tool.  The command line arguments must contain a 
-   * webgraphdb path and a url.  The url must match the normalized url that is
+   * Runs the NodeReader tool. The command line arguments must contain a
+   * webgraphdb path and a url. The url must match the normalized url that is
    * contained in the NodeDb of the WebGraph.
    */
-  public static void main(String[] args)
-    throws Exception {
+  public static void main(String[] args) throws Exception {
 
     Options options = new Options();
-    Option helpOpts = OptionBuilder.withArgName("help").withDescription(
-      "show this help message").create("help");
+    Option helpOpts = OptionBuilder.withArgName("help")
+        .withDescription("show this help message").create("help");
     Option webGraphOpts = OptionBuilder.withArgName("webgraphdb").hasArg()
-      .withDescription("the webgraphdb to use").create("webgraphdb");
+        .withDescription("the webgraphdb to use").create("webgraphdb");
     Option urlOpts = OptionBuilder.withArgName("url").hasOptionalArg()
-      .withDescription("the url to dump").create("url");
+        .withDescription("the url to dump").create("url");
     options.addOption(helpOpts);
     options.addOption(webGraphOpts);
     options.addOption(urlOpts);
@@ -106,7 +107,7 @@
       // command line must take a webgraphdb and a url
       CommandLine line = parser.parse(options, args);
       if (line.hasOption("help") || !line.hasOption("webgraphdb")
-        || !line.hasOption("url")) {
+          || !line.hasOption("url")) {
         HelpFormatter formatter = new HelpFormatter();
         formatter.printHelp("WebGraphReader", options);
         return;
@@ -117,10 +118,9 @@
       String url = line.getOptionValue("url");
       NodeReader reader = new NodeReader(NutchConfiguration.create());
       reader.dumpUrl(new Path(webGraphDb), url);
-      
+
       return;
-    }
-    catch (Exception e) {
+    } catch (Exception e) {
       e.printStackTrace();
       return;
     }
Index: src/java/org/apache/nutch/scoring/webgraph/LinkRank.java
===================================================================
--- src/java/org/apache/nutch/scoring/webgraph/LinkRank.java	(revision 1188252)
+++ src/java/org/apache/nutch/scoring/webgraph/LinkRank.java	(working copy)
@@ -68,9 +68,7 @@
 import org.apache.nutch.util.TimingUtil;
 import org.apache.nutch.util.URLUtil;
 
-public class LinkRank
-  extends Configured
-  implements Tool {
+public class LinkRank extends Configured implements Tool {
 
   public static final Logger LOG = LoggerFactory.getLogger(LinkRank.class);
   private static final String NUM_NODES = "_num_nodes_";
@@ -79,14 +77,16 @@
    * Runs the counter job. The counter job determines the number of links in the
    * webgraph. This is used during analysis.
    * 
-   * @param fs The job file system.
-   * @param webGraphDb The web graph database to use.
+   * @param fs
+   *          The job file system.
+   * @param webGraphDb
+   *          The web graph database to use.
    * 
    * @return The number of nodes in the web graph.
-   * @throws IOException If an error occurs while running the counter job.
+   * @throws IOException
+   *           If an error occurs while running the counter job.
    */
-  private int runCounter(FileSystem fs, Path webGraphDb)
-    throws IOException {
+  private int runCounter(FileSystem fs, Path webGraphDb) throws IOException {
 
     // configure the counter job
     Path numLinksPath = new Path(webGraphDb, NUM_NODES);
@@ -110,8 +110,7 @@
     LOG.info("Starting link counter job");
     try {
       JobClient.runJob(counter);
-    }
-    catch (IOException e) {
+    } catch (IOException e) {
       LOG.error(StringUtils.stringifyException(e));
       throw e;
     }
@@ -124,13 +123,13 @@
     BufferedReader buffer = new BufferedReader(new InputStreamReader(readLinks));
     String numLinksLine = buffer.readLine();
     readLinks.close();
-    
+
     // check if there are links to process, if none, webgraph might be empty
     if (numLinksLine == null || numLinksLine.length() == 0) {
       fs.delete(numLinksPath, true);
       throw new IOException("No links to process, is the webgraph empty?");
     }
-    
+
     // delete temp file and convert and return the number of links as an int
     LOG.info("Deleting numlinks temp file");
     fs.delete(numLinksPath, true);
@@ -142,13 +141,15 @@
    * Runs the initializer job. The initializer job sets up the nodes with a
    * default starting score for link analysis.
    * 
-   * @param nodeDb The node database to use.
-   * @param output The job output directory.
+   * @param nodeDb
+   *          The node database to use.
+   * @param output
+   *          The job output directory.
    * 
-   * @throws IOException If an error occurs while running the initializer job.
+   * @throws IOException
+   *           If an error occurs while running the initializer job.
    */
-  private void runInitializer(Path nodeDb, Path output)
-    throws IOException {
+  private void runInitializer(Path nodeDb, Path output) throws IOException {
 
     // configure the initializer
     JobConf initializer = new NutchJob(getConf());
@@ -167,8 +168,7 @@
     LOG.info("Starting initialization job");
     try {
       JobClient.runJob(initializer);
-    }
-    catch (IOException e) {
+    } catch (IOException e) {
       LOG.error(StringUtils.stringifyException(e));
       throw e;
     }
@@ -184,15 +184,20 @@
    * space requirements but it can be very useful is weeding out and eliminating
    * link farms and other spam pages.
    * 
-   * @param nodeDb The node database to use.
-   * @param outlinkDb The outlink database to use.
-   * @param loopDb The loop database to use if it exists.
-   * @param output The output directory.
+   * @param nodeDb
+   *          The node database to use.
+   * @param outlinkDb
+   *          The outlink database to use.
+   * @param loopDb
+   *          The loop database to use if it exists.
+   * @param output
+   *          The output directory.
    * 
-   * @throws IOException If an error occurs while running the inverter job.
+   * @throws IOException
+   *           If an error occurs while running the inverter job.
    */
   private void runInverter(Path nodeDb, Path outlinkDb, Path loopDb, Path output)
-    throws IOException {
+      throws IOException {
 
     // configure the inverter
     JobConf inverter = new NutchJob(getConf());
@@ -218,8 +223,7 @@
     LOG.info("Starting inverter job");
     try {
       JobClient.runJob(inverter);
-    }
-    catch (IOException e) {
+    } catch (IOException e) {
       LOG.error(StringUtils.stringifyException(e));
       throw e;
     }
@@ -233,23 +237,28 @@
    * Typically the link analysis job is run a number of times to allow the link
    * rank scores to converge.
    * 
-   * @param nodeDb The node database from which we are getting previous link
-   * rank scores.
-   * @param inverted The inverted inlinks
-   * @param output The link analysis output.
-   * @param iteration The current iteration number.
-   * @param numIterations The total number of link analysis iterations
+   * @param nodeDb
+   *          The node database from which we are getting previous link rank
+   *          scores.
+   * @param inverted
+   *          The inverted inlinks
+   * @param output
+   *          The link analysis output.
+   * @param iteration
+   *          The current iteration number.
+   * @param numIterations
+   *          The total number of link analysis iterations
    * 
-   * @throws IOException If an error occurs during link analysis.
+   * @throws IOException
+   *           If an error occurs during link analysis.
    */
   private void runAnalysis(Path nodeDb, Path inverted, Path output,
-    int iteration, int numIterations, float rankOne)
-    throws IOException {
+      int iteration, int numIterations, float rankOne) throws IOException {
 
     JobConf analyzer = new NutchJob(getConf());
     analyzer.set("link.analyze.iteration", String.valueOf(iteration + 1));
     analyzer.setJobName("LinkAnalysis Analyzer, iteration " + (iteration + 1)
-      + " of " + numIterations);
+        + " of " + numIterations);
     FileInputFormat.addInputPath(analyzer, nodeDb);
     FileInputFormat.addInputPath(analyzer, inverted);
     FileOutputFormat.setOutputPath(analyzer, output);
@@ -266,8 +275,7 @@
     LOG.info("Starting analysis job");
     try {
       JobClient.runJob(analyzer);
-    }
-    catch (IOException e) {
+    } catch (IOException e) {
       LOG.error(StringUtils.stringifyException(e));
       throw e;
     }
@@ -279,9 +287,9 @@
    * This is used to determine a rank one score for pages with zero inlinks but
    * that contain outlinks.
    */
-  private static class Counter
-    implements Mapper<Text, Node, Text, LongWritable>,
-    Reducer<Text, LongWritable, Text, LongWritable> {
+  private static class Counter implements
+      Mapper<Text, Node, Text, LongWritable>,
+      Reducer<Text, LongWritable, Text, LongWritable> {
 
     private JobConf conf;
     private static Text numNodes = new Text(NUM_NODES);
@@ -295,8 +303,8 @@
      * Outputs one for every node.
      */
     public void map(Text key, Node value,
-      OutputCollector<Text, LongWritable> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, LongWritable> output, Reporter reporter)
+        throws IOException {
       output.collect(numNodes, one);
     }
 
@@ -304,8 +312,8 @@
      * Totals the node number and outputs a single total value.
      */
     public void reduce(Text key, Iterator<LongWritable> values,
-      OutputCollector<Text, LongWritable> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, LongWritable> output, Reporter reporter)
+        throws IOException {
 
       long total = 0;
       while (values.hasNext()) {
@@ -318,8 +326,7 @@
     }
   }
 
-  private static class Initializer
-    implements Mapper<Text, Node, Text, Node> {
+  private static class Initializer implements Mapper<Text, Node, Text, Node> {
 
     private JobConf conf;
     private float initialScore = 1.0f;
@@ -330,11 +337,10 @@
     }
 
     public void map(Text key, Node node, OutputCollector<Text, Node> output,
-      Reporter reporter)
-      throws IOException {
+        Reporter reporter) throws IOException {
 
       String url = key.toString();
-      Node outNode = (Node)WritableUtils.clone(node, conf);
+      Node outNode = (Node) WritableUtils.clone(node, conf);
       outNode.setInlinkScore(initialScore);
 
       output.collect(new Text(url), outNode);
@@ -349,9 +355,9 @@
    * WebGraph. The link analysis process consists of inverting, analyzing and
    * scoring, in a loop for a given number of iterations.
    */
-  private static class Inverter
-    implements Mapper<Text, Writable, Text, ObjectWritable>,
-    Reducer<Text, ObjectWritable, Text, LinkDatum> {
+  private static class Inverter implements
+      Mapper<Text, Writable, Text, ObjectWritable>,
+      Reducer<Text, ObjectWritable, Text, LinkDatum> {
 
     private JobConf conf;
 
@@ -363,8 +369,8 @@
      * Convert values to ObjectWritable
      */
     public void map(Text key, Writable value,
-      OutputCollector<Text, ObjectWritable> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, ObjectWritable> output, Reporter reporter)
+        throws IOException {
 
       ObjectWritable objWrite = new ObjectWritable();
       objWrite.set(value);
@@ -377,8 +383,8 @@
      * within the loopset.
      */
     public void reduce(Text key, Iterator<ObjectWritable> values,
-      OutputCollector<Text, LinkDatum> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, LinkDatum> output, Reporter reporter)
+        throws IOException {
 
       String fromUrl = key.toString();
       List<LinkDatum> outlinks = new ArrayList<LinkDatum>();
@@ -390,14 +396,12 @@
         ObjectWritable write = values.next();
         Object obj = write.get();
         if (obj instanceof Node) {
-          node = (Node)obj;
+          node = (Node) obj;
+        } else if (obj instanceof LinkDatum) {
+          outlinks.add((LinkDatum) WritableUtils.clone((LinkDatum) obj, conf));
+        } else if (obj instanceof LoopSet) {
+          loops = (LoopSet) obj;
         }
-        else if (obj instanceof LinkDatum) {
-          outlinks.add((LinkDatum)WritableUtils.clone((LinkDatum)obj, conf));
-        }
-        else if (obj instanceof LoopSet) {
-          loops = (LoopSet)obj;
-        }
       }
 
       // get the number of outlinks and the current inlink and outlink scores
@@ -418,7 +422,7 @@
           // remove any url that is contained in the loopset
           if (loopSet != null && loopSet.contains(toUrl)) {
             LOG.debug(fromUrl + ": Skipping inverting inlink from loop "
-              + toUrl);
+                + toUrl);
             continue;
           }
           outlink.setUrl(fromUrl);
@@ -427,8 +431,8 @@
           // collect the inverted outlink
           output.collect(new Text(toUrl), outlink);
           LOG.debug(toUrl + ": inverting inlink from " + fromUrl
-            + " origscore: " + inlinkScore + " numOutlinks: " + numOutlinks
-            + " inlinkscore: " + outlinkScore);
+              + " origscore: " + inlinkScore + " numOutlinks: " + numOutlinks
+              + " inlinkscore: " + outlinkScore);
         }
       }
     }
@@ -440,9 +444,9 @@
   /**
    * Runs a single link analysis iteration.
    */
-  private static class Analyzer
-    implements Mapper<Text, Writable, Text, ObjectWritable>,
-    Reducer<Text, ObjectWritable, Text, Node> {
+  private static class Analyzer implements
+      Mapper<Text, Writable, Text, ObjectWritable>,
+      Reducer<Text, ObjectWritable, Text, Node> {
 
     private JobConf conf;
     private float dampingFactor = 0.85f;
@@ -459,13 +463,13 @@
 
       try {
         this.conf = conf;
-        this.dampingFactor = conf.getFloat("link.analyze.damping.factor", 0.85f);
+        this.dampingFactor = conf
+            .getFloat("link.analyze.damping.factor", 0.85f);
         this.rankOne = conf.getFloat("link.analyze.rank.one", 0.0f);
         this.itNum = conf.getInt("link.analyze.iteration", 0);
         limitPages = conf.getBoolean("link.ignore.limit.page", true);
         limitDomains = conf.getBoolean("link.ignore.limit.domain", true);
-      }
-      catch (Exception e) {
+      } catch (Exception e) {
         LOG.error(StringUtils.stringifyException(e));
         throw new IllegalArgumentException(e);
       }
@@ -475,8 +479,8 @@
      * Convert values to ObjectWritable
      */
     public void map(Text key, Writable value,
-      OutputCollector<Text, ObjectWritable> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, ObjectWritable> output, Reporter reporter)
+        throws IOException {
 
       ObjectWritable objWrite = new ObjectWritable();
       objWrite.set(WritableUtils.clone(value, conf));
@@ -488,8 +492,8 @@
      * stored in a temporary NodeDb which replaces the NodeDb of the WebGraph.
      */
     public void reduce(Text key, Iterator<ObjectWritable> values,
-      OutputCollector<Text, Node> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, Node> output, Reporter reporter)
+        throws IOException {
 
       String url = key.toString();
       Set<String> domains = new HashSet<String>();
@@ -505,11 +509,10 @@
         ObjectWritable next = values.next();
         Object value = next.get();
         if (value instanceof Node) {
-          node = (Node)value;
-        }
-        else if (value instanceof LinkDatum) {
+          node = (Node) value;
+        } else if (value instanceof LinkDatum) {
 
-          LinkDatum linkDatum = (LinkDatum)value;
+          LinkDatum linkDatum = (LinkDatum) value;
           float scoreFromInlink = linkDatum.getScore();
           String inlinkUrl = linkDatum.getUrl();
           String inLinkDomain = URLUtil.getDomainName(inlinkUrl);
@@ -517,9 +520,9 @@
 
           // limit counting duplicate inlinks by pages or domains
           if ((limitPages && pages.contains(inLinkPage))
-            || (limitDomains && domains.contains(inLinkDomain))) {
+              || (limitDomains && domains.contains(inLinkDomain))) {
             LOG.debug(url + ": ignoring " + scoreFromInlink + " from "
-              + inlinkUrl + ", duplicate page or domain");
+                + inlinkUrl + ", duplicate page or domain");
             continue;
           }
 
@@ -529,25 +532,24 @@
           domains.add(inLinkDomain);
           pages.add(inLinkPage);
           LOG.debug(url + ": adding " + scoreFromInlink + " from " + inlinkUrl
-            + ", total: " + totalInlinkScore);
+              + ", total: " + totalInlinkScore);
         }
       }
 
       // calculate linkRank score formula
       float linkRankScore = (1 - this.dampingFactor)
-        + (this.dampingFactor * totalInlinkScore);
+          + (this.dampingFactor * totalInlinkScore);
 
       LOG.info(url + ": score: " + linkRankScore + " num inlinks: "
-        + numInlinks + " iteration: " + itNum);
+          + numInlinks + " iteration: " + itNum);
 
       // store the score in a temporary NodeDb
-      Node outNode = (Node)WritableUtils.clone(node, conf);
+      Node outNode = (Node) WritableUtils.clone(node, conf);
       outNode.setInlinkScore(linkRankScore);
       output.collect(key, outNode);
     }
 
-    public void close()
-      throws IOException {
+    public void close() throws IOException {
     }
   }
 
@@ -574,12 +576,13 @@
    * by default 10. And finally replaces the NodeDb in the WebGraph with the
    * link rank output.
    * 
-   * @param webGraphDb The WebGraph to run link analysis on.
+   * @param webGraphDb
+   *          The WebGraph to run link analysis on.
    * 
-   * @throws IOException If an error occurs during link analysis.
+   * @throws IOException
+   *           If an error occurs during link analysis.
    */
-  public void analyze(Path webGraphDb)
-    throws IOException {
+  public void analyze(Path webGraphDb) throws IOException {
 
     SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
     long start = System.currentTimeMillis();
@@ -609,7 +612,7 @@
     // initialze all urls with a default score
     int numLinks = runCounter(fs, webGraphDb);
     runInitializer(wgNodeDb, nodeDb);
-    float rankOneScore = (1f / (float)numLinks);
+    float rankOneScore = (1f / (float) numLinks);
 
     if (LOG.isInfoEnabled()) {
       LOG.info("Analysis: Number of links: " + numLinks);
@@ -622,9 +625,10 @@
     for (int i = 0; i < numIterations; i++) {
 
       // the input to inverting is always the previous output from analysis
-      LOG.info("Analysis: Starting iteration " + (i + 1) + " of " + numIterations);
+      LOG.info("Analysis: Starting iteration " + (i + 1) + " of "
+          + numIterations);
       Path tempRank = new Path(linkRank + "-"
-        + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
+          + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
       fs.mkdirs(tempRank);
       Path tempInverted = new Path(tempRank, "inverted");
       Path tempNodeDb = new Path(tempRank, WebGraph.NODE_DIR);
@@ -632,13 +636,13 @@
       // run invert and analysis
       runInverter(nodeDb, wgOutlinkDb, loopDb, tempInverted);
       runAnalysis(nodeDb, tempInverted, tempNodeDb, i, numIterations,
-        rankOneScore);
+          rankOneScore);
 
       // replace the temporary NodeDb with the output from analysis
       LOG.info("Analysis: Installing new link scores");
       FSUtils.replace(fs, linkRank, tempRank, true);
       LOG.info("Analysis: finished iteration " + (i + 1) + " of "
-        + numIterations);
+          + numIterations);
     }
 
     // replace the NodeDb in the WebGraph with the final output of analysis
@@ -648,11 +652,11 @@
     // remove the temporary link rank folder
     fs.delete(linkRank, true);
     long end = System.currentTimeMillis();
-    LOG.info("Analysis: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    LOG.info("Analysis: finished at " + sdf.format(end) + ", elapsed: "
+        + TimingUtil.elapsedTime(start, end));
   }
 
-  public static void main(String[] args)
-    throws Exception {
+  public static void main(String[] args) throws Exception {
     int res = ToolRunner.run(NutchConfiguration.create(), new LinkRank(), args);
     System.exit(res);
   }
@@ -660,14 +664,13 @@
   /**
    * Runs the LinkRank tool.
    */
-  public int run(String[] args)
-    throws Exception {
+  public int run(String[] args) throws Exception {
 
     Options options = new Options();
-    Option helpOpts = OptionBuilder.withArgName("help").withDescription(
-      "show this help message").create("help");
-    Option webgraphOpts = OptionBuilder.withArgName("webgraphdb").hasArg().withDescription(
-      "the web graph db to use").create("webgraphdb");
+    Option helpOpts = OptionBuilder.withArgName("help")
+        .withDescription("show this help message").create("help");
+    Option webgraphOpts = OptionBuilder.withArgName("webgraphdb").hasArg()
+        .withDescription("the web graph db to use").create("webgraphdb");
     options.addOption(helpOpts);
     options.addOption(webgraphOpts);
 
@@ -685,8 +688,7 @@
 
       analyze(new Path(webGraphDb));
       return 0;
-    }
-    catch (Exception e) {
+    } catch (Exception e) {
       LOG.error("LinkAnalysis: " + StringUtils.stringifyException(e));
       return -2;
     }
Index: src/java/org/apache/nutch/scoring/webgraph/LinkDumper.java
===================================================================
--- src/java/org/apache/nutch/scoring/webgraph/LinkDumper.java	(revision 1188252)
+++ src/java/org/apache/nutch/scoring/webgraph/LinkDumper.java	(working copy)
@@ -67,27 +67,24 @@
 
 /**
  * The LinkDumper tool creates a database of node to inlink information that can
- * be read using the nested Reader class.  This allows the inlink and scoring 
- * state of a single url to be reviewed quickly to determine why a given url is 
- * ranking a certain way.  This tool is to be used with the LinkRank analysis.
+ * be read using the nested Reader class. This allows the inlink and scoring
+ * state of a single url to be reviewed quickly to determine why a given url is
+ * ranking a certain way. This tool is to be used with the LinkRank analysis.
  */
-public class LinkDumper
-  extends Configured
-  implements Tool {
+public class LinkDumper extends Configured implements Tool {
 
   public static final Logger LOG = LoggerFactory.getLogger(LinkDumper.class);
   public static final String DUMP_DIR = "linkdump";
 
   /**
-   * Reader class which will print out the url and all of its inlinks to system 
-   * out.  Each inlinkwill be displayed with its node information including 
-   * score and number of in and outlinks.
+   * Reader class which will print out the url and all of its inlinks to system
+   * out. Each inlinkwill be displayed with its node information including score
+   * and number of in and outlinks.
    */
   public static class Reader {
 
-    public static void main(String[] args)
-      throws Exception {
-      
+    public static void main(String[] args) throws Exception {
+
       if (args == null || args.length < 2) {
         System.out.println("LinkDumper$Reader usage: <webgraphdb> <url>");
         return;
@@ -99,20 +96,20 @@
       Path webGraphDb = new Path(args[0]);
       String url = args[1];
       MapFile.Reader[] readers = MapFileOutputFormat.getReaders(fs, new Path(
-        webGraphDb, DUMP_DIR), conf);
+          webGraphDb, DUMP_DIR), conf);
 
       // get the link nodes for the url
       Text key = new Text(url);
       LinkNodes nodes = new LinkNodes();
       MapFileOutputFormat.getEntry(readers,
-        new HashPartitioner<Text, LinkNodes>(), key, nodes);
+          new HashPartitioner<Text, LinkNodes>(), key, nodes);
 
       // print out the link nodes
       LinkNode[] linkNodesAr = nodes.getLinks();
       System.out.println(url + ":");
       for (LinkNode node : linkNodesAr) {
         System.out.println("  " + node.getUrl() + " - "
-          + node.getNode().toString());
+            + node.getNode().toString());
       }
 
       // close the readers
@@ -123,8 +120,7 @@
   /**
    * Bean class which holds url to node information.
    */
-  public static class LinkNode
-    implements Writable {
+  public static class LinkNode implements Writable {
 
     private String url = null;
     private Node node = null;
@@ -154,15 +150,13 @@
       this.node = node;
     }
 
-    public void readFields(DataInput in)
-      throws IOException {
+    public void readFields(DataInput in) throws IOException {
       url = in.readUTF();
       node = new Node();
       node.readFields(in);
     }
 
-    public void write(DataOutput out)
-      throws IOException {
+    public void write(DataOutput out) throws IOException {
       out.writeUTF(url);
       node.write(out);
     }
@@ -172,8 +166,7 @@
   /**
    * Writable class which holds an array of LinkNode objects.
    */
-  public static class LinkNodes
-    implements Writable {
+  public static class LinkNodes implements Writable {
 
     private LinkNode[] links;
 
@@ -193,8 +186,7 @@
       this.links = links;
     }
 
-    public void readFields(DataInput in)
-      throws IOException {
+    public void readFields(DataInput in) throws IOException {
       int numLinks = in.readInt();
       if (numLinks > 0) {
         links = new LinkNode[numLinks];
@@ -206,8 +198,7 @@
       }
     }
 
-    public void write(DataOutput out)
-      throws IOException {
+    public void write(DataOutput out) throws IOException {
       if (links != null && links.length > 0) {
         int numLinks = links.length;
         out.writeInt(numLinks);
@@ -222,9 +213,9 @@
    * Inverts outlinks from the WebGraph to inlinks and attaches node
    * information.
    */
-  public static class Inverter
-    implements Mapper<Text, Writable, Text, ObjectWritable>,
-    Reducer<Text, ObjectWritable, Text, LinkNode> {
+  public static class Inverter implements
+      Mapper<Text, Writable, Text, ObjectWritable>,
+      Reducer<Text, ObjectWritable, Text, LinkNode> {
 
     private JobConf conf;
 
@@ -236,8 +227,8 @@
      * Wraps all values in ObjectWritables.
      */
     public void map(Text key, Writable value,
-      OutputCollector<Text, ObjectWritable> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, ObjectWritable> output, Reporter reporter)
+        throws IOException {
 
       ObjectWritable objWrite = new ObjectWritable();
       objWrite.set(value);
@@ -245,12 +236,12 @@
     }
 
     /**
-     * Inverts outlinks to inlinks while attaching node information to the 
+     * Inverts outlinks to inlinks while attaching node information to the
      * outlink.
      */
     public void reduce(Text key, Iterator<ObjectWritable> values,
-      OutputCollector<Text, LinkNode> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, LinkNode> output, Reporter reporter)
+        throws IOException {
 
       String fromUrl = key.toString();
       List<LinkDatum> outlinks = new ArrayList<LinkDatum>();
@@ -262,14 +253,12 @@
         ObjectWritable write = values.next();
         Object obj = write.get();
         if (obj instanceof Node) {
-          node = (Node)obj;
+          node = (Node) obj;
+        } else if (obj instanceof LinkDatum) {
+          outlinks.add((LinkDatum) WritableUtils.clone((LinkDatum) obj, conf));
+        } else if (obj instanceof LoopSet) {
+          loops = (LoopSet) obj;
         }
-        else if (obj instanceof LinkDatum) {
-          outlinks.add((LinkDatum)WritableUtils.clone((LinkDatum)obj, conf));
-        }
-        else if (obj instanceof LoopSet) {
-          loops = (LoopSet)obj;
-        }
       }
 
       // only collect if there are outlinks
@@ -280,13 +269,13 @@
         for (int i = 0; i < outlinks.size(); i++) {
           LinkDatum outlink = outlinks.get(i);
           String toUrl = outlink.getUrl();
-          
+
           // remove any url that is in the loopset, same as LinkRank
           if (loopSet != null && loopSet.contains(toUrl)) {
             continue;
           }
-          
-          // collect the outlink as an inlink with the node 
+
+          // collect the outlink as an inlink with the node
           output.collect(new Text(toUrl), new LinkNode(fromUrl, node));
         }
       }
@@ -297,11 +286,11 @@
   }
 
   /**
-   * Merges LinkNode objects into a single array value per url.  This allows 
-   * all values to be quickly retrieved and printed via the Reader tool.
+   * Merges LinkNode objects into a single array value per url. This allows all
+   * values to be quickly retrieved and printed via the Reader tool.
    */
-  public static class Merger
-    implements Reducer<Text, LinkNode, Text, LinkNodes> {
+  public static class Merger implements
+      Reducer<Text, LinkNode, Text, LinkNodes> {
 
     private JobConf conf;
     private int maxInlinks = 50000;
@@ -314,8 +303,8 @@
      * Aggregate all LinkNode objects for a given url.
      */
     public void reduce(Text key, Iterator<LinkNode> values,
-      OutputCollector<Text, LinkNodes> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, LinkNodes> output, Reporter reporter)
+        throws IOException {
 
       List<LinkNode> nodeList = new ArrayList<LinkNode>();
       int numNodes = 0;
@@ -323,10 +312,9 @@
       while (values.hasNext()) {
         LinkNode cur = values.next();
         if (numNodes < maxInlinks) {
-          nodeList.add((LinkNode)WritableUtils.clone(cur, conf));
+          nodeList.add((LinkNode) WritableUtils.clone(cur, conf));
           numNodes++;
-        }
-        else {
+        } else {
           break;
         }
       }
@@ -342,11 +330,10 @@
   }
 
   /**
-   * Runs the inverter and merger jobs of the LinkDumper tool to create the 
-   * url to inlink node database.
+   * Runs the inverter and merger jobs of the LinkDumper tool to create the url
+   * to inlink node database.
    */
-  public void dumpLinks(Path webGraphDb)
-    throws IOException {
+  public void dumpLinks(Path webGraphDb) throws IOException {
 
     SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
     long start = System.currentTimeMillis();
@@ -362,7 +349,7 @@
 
     // run the inverter job
     Path tempInverted = new Path(webGraphDb, "inverted-"
-      + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
+        + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
     JobConf inverter = new NutchJob(conf);
     inverter.setJobName("LinkDumper: inverter");
     FileInputFormat.addInputPath(inverter, nodeDb);
@@ -384,8 +371,7 @@
       LOG.info("LinkDumper: running inverter");
       JobClient.runJob(inverter);
       LOG.info("LinkDumper: finished inverter");
-    }
-    catch (IOException e) {
+    } catch (IOException e) {
       LOG.error(StringUtils.stringifyException(e));
       throw e;
     }
@@ -407,36 +393,34 @@
       LOG.info("LinkDumper: running merger");
       JobClient.runJob(merger);
       LOG.info("LinkDumper: finished merger");
-    }
-    catch (IOException e) {
+    } catch (IOException e) {
       LOG.error(StringUtils.stringifyException(e));
       throw e;
     }
 
     fs.delete(tempInverted, true);
     long end = System.currentTimeMillis();
-    LOG.info("LinkDumper: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    LOG.info("LinkDumper: finished at " + sdf.format(end) + ", elapsed: "
+        + TimingUtil.elapsedTime(start, end));
   }
 
-  public static void main(String[] args)
-    throws Exception {
+  public static void main(String[] args) throws Exception {
     int res = ToolRunner.run(NutchConfiguration.create(), new LinkDumper(),
-      args);
+        args);
     System.exit(res);
   }
 
   /**
-   * Runs the LinkDumper tool.  This simply creates the database, to read the
+   * Runs the LinkDumper tool. This simply creates the database, to read the
    * values the nested Reader tool must be used.
    */
-  public int run(String[] args)
-    throws Exception {
+  public int run(String[] args) throws Exception {
 
     Options options = new Options();
-    Option helpOpts = OptionBuilder.withArgName("help").withDescription(
-      "show this help message").create("help");
+    Option helpOpts = OptionBuilder.withArgName("help")
+        .withDescription("show this help message").create("help");
     Option webGraphDbOpts = OptionBuilder.withArgName("webgraphdb").hasArg()
-      .withDescription("the web graph database to use").create("webgraphdb");
+        .withDescription("the web graph database to use").create("webgraphdb");
     options.addOption(helpOpts);
     options.addOption(webGraphDbOpts);
     CommandLineParser parser = new GnuParser();
@@ -452,8 +436,7 @@
       String webGraphDb = line.getOptionValue("webgraphdb");
       dumpLinks(new Path(webGraphDb));
       return 0;
-    }
-    catch (Exception e) {
+    } catch (Exception e) {
       LOG.error("LinkDumper: " + StringUtils.stringifyException(e));
       return -2;
     }
Index: src/java/org/apache/nutch/scoring/webgraph/LoopReader.java
===================================================================
--- src/java/org/apache/nutch/scoring/webgraph/LoopReader.java	(revision 1188252)
+++ src/java/org/apache/nutch/scoring/webgraph/LoopReader.java	(working copy)
@@ -44,37 +44,39 @@
 
   private FileSystem fs;
   private MapFile.Reader[] loopReaders;
-  
+
   public LoopReader() {
-    
+
   }
-  
+
   public LoopReader(Configuration conf) {
     super(conf);
   }
 
   /**
-   * Prints loopset for a single url.  The loopset information will show any
+   * Prints loopset for a single url. The loopset information will show any
    * outlink url the eventually forms a link cycle.
    * 
-   * @param webGraphDb The WebGraph to check for loops
-   * @param url The url to check.
+   * @param webGraphDb
+   *          The WebGraph to check for loops
+   * @param url
+   *          The url to check.
    * 
-   * @throws IOException If an error occurs while printing loopset information.
+   * @throws IOException
+   *           If an error occurs while printing loopset information.
    */
-  public void dumpUrl(Path webGraphDb, String url)
-    throws IOException {
+  public void dumpUrl(Path webGraphDb, String url) throws IOException {
 
     // open the readers
     fs = FileSystem.get(getConf());
     loopReaders = MapFileOutputFormat.getReaders(fs, new Path(webGraphDb,
-      Loops.LOOPS_DIR), getConf());
+        Loops.LOOPS_DIR), getConf());
 
     // get the loopset for a given url, if any
     Text key = new Text(url);
     LoopSet loop = new LoopSet();
     MapFileOutputFormat.getEntry(loopReaders,
-      new HashPartitioner<Text, LoopSet>(), key, loop);
+        new HashPartitioner<Text, LoopSet>(), key, loop);
 
     // print out each loop url in the set
     System.out.println(url + ":");
@@ -87,19 +89,18 @@
   }
 
   /**
-   * Runs the LoopReader tool.  For this tool to work the loops job must have
+   * Runs the LoopReader tool. For this tool to work the loops job must have
    * already been run on the corresponding WebGraph.
    */
-  public static void main(String[] args)
-    throws Exception {
+  public static void main(String[] args) throws Exception {
 
     Options options = new Options();
-    Option helpOpts = OptionBuilder.withArgName("help").withDescription(
-      "show this help message").create("help");
+    Option helpOpts = OptionBuilder.withArgName("help")
+        .withDescription("show this help message").create("help");
     Option webGraphOpts = OptionBuilder.withArgName("webgraphdb").hasArg()
-      .withDescription("the webgraphdb to use").create("webgraphdb");
+        .withDescription("the webgraphdb to use").create("webgraphdb");
     Option urlOpts = OptionBuilder.withArgName("url").hasOptionalArg()
-      .withDescription("the url to dump").create("url");
+        .withDescription("the url to dump").create("url");
     options.addOption(helpOpts);
     options.addOption(webGraphOpts);
     options.addOption(urlOpts);
@@ -109,7 +110,7 @@
 
       CommandLine line = parser.parse(options, args);
       if (line.hasOption("help") || !line.hasOption("webgraphdb")
-        || !line.hasOption("url")) {
+          || !line.hasOption("url")) {
         HelpFormatter formatter = new HelpFormatter();
         formatter.printHelp("WebGraphReader", options);
         return;
@@ -120,8 +121,7 @@
       LoopReader reader = new LoopReader(NutchConfiguration.create());
       reader.dumpUrl(new Path(webGraphDb), url);
       return;
-    }
-    catch (Exception e) {
+    } catch (Exception e) {
       e.printStackTrace();
       return;
     }
Index: src/java/org/apache/nutch/scoring/webgraph/Loops.java
===================================================================
--- src/java/org/apache/nutch/scoring/webgraph/Loops.java	(revision 1188252)
+++ src/java/org/apache/nutch/scoring/webgraph/Loops.java	(working copy)
@@ -76,9 +76,7 @@
  * rather small. Because of this the Loops job is optional and if it doesn't
  * exist then it won't be factored into the LinkRank program.
  */
-public class Loops
-  extends Configured
-  implements Tool {
+public class Loops extends Configured implements Tool {
 
   public static final Logger LOG = LoggerFactory.getLogger(Loops.class);
   public static final String LOOPS_DIR = "loops";
@@ -87,8 +85,7 @@
   /**
    * A link path or route looking to identify a link cycle.
    */
-  public static class Route
-    implements Writable {
+  public static class Route implements Writable {
 
     private String outlinkUrl = null;
     private String lookingFor = null;
@@ -122,16 +119,14 @@
       this.found = found;
     }
 
-    public void readFields(DataInput in)
-      throws IOException {
+    public void readFields(DataInput in) throws IOException {
 
       outlinkUrl = Text.readString(in);
       lookingFor = Text.readString(in);
       found = in.readBoolean();
     }
 
-    public void write(DataOutput out)
-      throws IOException {
+    public void write(DataOutput out) throws IOException {
       Text.writeString(out, outlinkUrl);
       Text.writeString(out, lookingFor);
       out.writeBoolean(found);
@@ -141,8 +136,7 @@
   /**
    * A set of loops.
    */
-  public static class LoopSet
-    implements Writable {
+  public static class LoopSet implements Writable {
 
     private Set<String> loopSet = new HashSet<String>();
 
@@ -158,8 +152,7 @@
       this.loopSet = loopSet;
     }
 
-    public void readFields(DataInput in)
-      throws IOException {
+    public void readFields(DataInput in) throws IOException {
 
       int numNodes = in.readInt();
       loopSet = new HashSet<String>();
@@ -169,8 +162,7 @@
       }
     }
 
-    public void write(DataOutput out)
-      throws IOException {
+    public void write(DataOutput out) throws IOException {
 
       int numNodes = (loopSet != null ? loopSet.size() : 0);
       out.writeInt(numNodes);
@@ -191,10 +183,9 @@
   /**
    * Initializes the Loop routes.
    */
-  public static class Initializer
-    extends Configured
-    implements Mapper<Text, Writable, Text, ObjectWritable>,
-    Reducer<Text, ObjectWritable, Text, Route> {
+  public static class Initializer extends Configured implements
+      Mapper<Text, Writable, Text, ObjectWritable>,
+      Reducer<Text, ObjectWritable, Text, Route> {
 
     private JobConf conf;
 
@@ -222,8 +213,8 @@
      * Wraps values in ObjectWritable.
      */
     public void map(Text key, Writable value,
-      OutputCollector<Text, ObjectWritable> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, ObjectWritable> output, Reporter reporter)
+        throws IOException {
 
       ObjectWritable objWrite = new ObjectWritable();
       objWrite.set(value);
@@ -236,8 +227,8 @@
      * the Looper job.
      */
     public void reduce(Text key, Iterator<ObjectWritable> values,
-      OutputCollector<Text, Route> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, Route> output, Reporter reporter)
+        throws IOException {
 
       String url = key.toString();
       Node node = null;
@@ -248,11 +239,10 @@
         ObjectWritable objWrite = values.next();
         Object obj = objWrite.get();
         if (obj instanceof LinkDatum) {
-          outlinkList.add((LinkDatum)obj);
+          outlinkList.add((LinkDatum) obj);
+        } else if (obj instanceof Node) {
+          node = (Node) obj;
         }
-        else if (obj instanceof Node) {
-          node = (Node)obj;
-        }
       }
 
       // has to have inlinks otherwise cycle not possible
@@ -282,10 +272,9 @@
    * Follows a route path looking for the start url of the route. If the start
    * url is found then the route is a cyclical path.
    */
-  public static class Looper
-    extends Configured
-    implements Mapper<Text, Writable, Text, ObjectWritable>,
-    Reducer<Text, ObjectWritable, Text, Route> {
+  public static class Looper extends Configured implements
+      Mapper<Text, Writable, Text, ObjectWritable>,
+      Reducer<Text, ObjectWritable, Text, Route> {
 
     private JobConf conf;
     private boolean last = false;
@@ -315,15 +304,14 @@
      * Wrap values in ObjectWritable.
      */
     public void map(Text key, Writable value,
-      OutputCollector<Text, ObjectWritable> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, ObjectWritable> output, Reporter reporter)
+        throws IOException {
 
       ObjectWritable objWrite = new ObjectWritable();
       Writable cloned = null;
       if (value instanceof LinkDatum) {
-        cloned = new Text(((LinkDatum)value).getUrl());
-      }
-      else {
+        cloned = new Text(((LinkDatum) value).getUrl());
+      } else {
         cloned = WritableUtils.clone(value, conf);
       }
       objWrite.set(cloned);
@@ -336,8 +324,8 @@
      * passes.
      */
     public void reduce(Text key, Iterator<ObjectWritable> values,
-      OutputCollector<Text, Route> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, Route> output, Reporter reporter)
+        throws IOException {
 
       List<Route> routeList = new ArrayList<Route>();
       Set<String> outlinkUrls = new LinkedHashSet<String>();
@@ -348,10 +336,9 @@
         ObjectWritable next = values.next();
         Object value = next.get();
         if (value instanceof Route) {
-          routeList.add((Route)WritableUtils.clone((Route)value, conf));
-        }
-        else if (value instanceof Text) {
-          String outlinkUrl = ((Text)value).toString();
+          routeList.add((Route) WritableUtils.clone((Route) value, conf));
+        } else if (value instanceof Text) {
+          String outlinkUrl = ((Text) value).toString();
           if (!outlinkUrls.contains(outlinkUrl)) {
             outlinkUrls.add(outlinkUrl);
           }
@@ -375,16 +362,14 @@
         routeIt.remove();
         if (route.isFound()) {
           output.collect(key, route);
-        }
-        else {
+        } else {
 
           // if the route start url is found, set route to found and collect
           String lookingFor = route.getLookingFor();
           if (outlinkUrls.contains(lookingFor)) {
             route.setFound(true);
             output.collect(key, route);
-          }
-          else if (!last) {
+          } else if (!last) {
 
             // setup for next pass through the loop
             for (String outlink : outlinkUrls) {
@@ -402,10 +387,8 @@
   /**
    * Finishes the Loops job by aggregating and collecting and found routes.
    */
-  public static class Finalizer
-    extends Configured
-    implements Mapper<Text, Route, Text, Route>,
-    Reducer<Text, Route, Text, LoopSet> {
+  public static class Finalizer extends Configured implements
+      Mapper<Text, Route, Text, Route>, Reducer<Text, Route, Text, LoopSet> {
 
     private JobConf conf;
 
@@ -433,8 +416,7 @@
      * Maps out and found routes, those will be the link cycles.
      */
     public void map(Text key, Route value, OutputCollector<Text, Route> output,
-      Reporter reporter)
-      throws IOException {
+        Reporter reporter) throws IOException {
 
       if (value.isFound()) {
         String lookingFor = value.getLookingFor();
@@ -443,12 +425,12 @@
     }
 
     /**
-     * Aggregates all found routes for a given start url into a loopset and 
+     * Aggregates all found routes for a given start url into a loopset and
      * collects the loopset.
      */
     public void reduce(Text key, Iterator<Route> values,
-      OutputCollector<Text, LoopSet> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, LoopSet> output, Reporter reporter)
+        throws IOException {
 
       LoopSet loops = new LoopSet();
       while (values.hasNext()) {
@@ -465,8 +447,7 @@
   /**
    * Runs the various loop jobs.
    */
-  public void findLoops(Path webGraphDb)
-    throws IOException {
+  public void findLoops(Path webGraphDb) throws IOException {
 
     SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
     long start = System.currentTimeMillis();
@@ -481,7 +462,7 @@
     Path nodeDb = new Path(webGraphDb, WebGraph.NODE_DIR);
     Path routes = new Path(webGraphDb, ROUTES_DIR);
     Path tempRoute = new Path(webGraphDb, ROUTES_DIR + "-"
-      + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
+        + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
 
     // run the initializer
     JobConf init = new NutchJob(conf);
@@ -504,8 +485,7 @@
       LOG.info("Loops: installing initializer " + routes);
       FSUtils.replace(fs, routes, tempRoute, true);
       LOG.info("Loops: finished initializer");
-    }
-    catch (IOException e) {
+    } catch (IOException e) {
       LOG.error(StringUtils.stringifyException(e));
       throw e;
     }
@@ -536,8 +516,7 @@
         LOG.info("Loops: installing looper " + routes);
         FSUtils.replace(fs, routes, tempRoute, true);
         LOG.info("Loops: finished looper");
-      }
-      catch (IOException e) {
+      } catch (IOException e) {
         LOG.error(StringUtils.stringifyException(e));
         throw e;
       }
@@ -561,17 +540,16 @@
       LOG.info("Loops: starting finalizer");
       JobClient.runJob(finalizer);
       LOG.info("Loops: finished finalizer");
-    }
-    catch (IOException e) {
+    } catch (IOException e) {
       LOG.error(StringUtils.stringifyException(e));
       throw e;
     }
     long end = System.currentTimeMillis();
-    LOG.info("Loops: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    LOG.info("Loops: finished at " + sdf.format(end) + ", elapsed: "
+        + TimingUtil.elapsedTime(start, end));
   }
 
-  public static void main(String[] args)
-    throws Exception {
+  public static void main(String[] args) throws Exception {
     int res = ToolRunner.run(NutchConfiguration.create(), new Loops(), args);
     System.exit(res);
   }
@@ -579,14 +557,13 @@
   /**
    * Runs the Loops tool.
    */
-  public int run(String[] args)
-    throws Exception {
+  public int run(String[] args) throws Exception {
 
     Options options = new Options();
-    Option helpOpts = OptionBuilder.withArgName("help").withDescription(
-      "show this help message").create("help");
-    Option webGraphDbOpts = OptionBuilder.withArgName("webgraphdb").hasArg().withDescription(
-      "the web graph database to use").create("webgraphdb");
+    Option helpOpts = OptionBuilder.withArgName("help")
+        .withDescription("show this help message").create("help");
+    Option webGraphDbOpts = OptionBuilder.withArgName("webgraphdb").hasArg()
+        .withDescription("the web graph database to use").create("webgraphdb");
     options.addOption(helpOpts);
     options.addOption(webGraphDbOpts);
 
@@ -603,8 +580,7 @@
       String webGraphDb = line.getOptionValue("webgraphdb");
       findLoops(new Path(webGraphDb));
       return 0;
-    }
-    catch (Exception e) {
+    } catch (Exception e) {
       LOG.error("Loops: " + StringUtils.stringifyException(e));
       return -2;
     }
Index: src/java/org/apache/nutch/scoring/ScoringFilterException.java
===================================================================
--- src/java/org/apache/nutch/scoring/ScoringFilterException.java	(revision 1188252)
+++ src/java/org/apache/nutch/scoring/ScoringFilterException.java	(working copy)
@@ -1,19 +1,19 @@
 /*
-* Licensed to the Apache Software Foundation (ASF) under one or more
-* contributor license agreements.  See the NOTICE file distributed with
-* this work for additional information regarding copyright ownership.
-* The ASF licenses this file to You under the Apache License, Version 2.0
-* (the "License"); you may not use this file except in compliance with
-* the License.  You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 package org.apache.nutch.scoring;
 
 /**
Index: src/java/org/apache/nutch/scoring/ScoringFilter.java
===================================================================
--- src/java/org/apache/nutch/scoring/ScoringFilter.java	(revision 1188252)
+++ src/java/org/apache/nutch/scoring/ScoringFilter.java	(working copy)
@@ -33,129 +33,181 @@
 /**
  * A contract defining behavior of scoring plugins.
  * 
- * A scoring filter will manipulate scoring variables in CrawlDatum and
- * in resulting search indexes. Filters can be chained in a specific order,
- * to provide multi-stage scoring adjustments.
+ * A scoring filter will manipulate scoring variables in CrawlDatum and in
+ * resulting search indexes. Filters can be chained in a specific order, to
+ * provide multi-stage scoring adjustments.
  * 
  * @author Andrzej Bialecki
  */
 public interface ScoringFilter extends Configurable, Pluggable {
   /** The name of the extension point. */
   public final static String X_POINT_ID = ScoringFilter.class.getName();
-  
+
   /**
    * Set an initial score for newly injected pages. Note: newly injected pages
-   * may have no inlinks, so filter implementations may wish to set this 
-   * score to a non-zero value, to give newly injected pages some initial
-   * credit.
-   * @param url url of the page
-   * @param datum new datum. Filters will modify it in-place.
+   * may have no inlinks, so filter implementations may wish to set this score
+   * to a non-zero value, to give newly injected pages some initial credit.
+   * 
+   * @param url
+   *          url of the page
+   * @param datum
+   *          new datum. Filters will modify it in-place.
    * @throws ScoringFilterException
    */
-  public void injectedScore(Text url, CrawlDatum datum) throws ScoringFilterException;
-  
+  public void injectedScore(Text url, CrawlDatum datum)
+      throws ScoringFilterException;
+
   /**
-   * Set an initial score for newly discovered pages. Note: newly discovered pages
-   * have at least one inlink with its score contribution, so filter implementations
-   * may choose to set initial score to zero (unknown value), and then the inlink
-   * score contribution will set the "real" value of the new page.
-   * @param url url of the page
-   * @param datum new datum. Filters will modify it in-place.
+   * Set an initial score for newly discovered pages. Note: newly discovered
+   * pages have at least one inlink with its score contribution, so filter
+   * implementations may choose to set initial score to zero (unknown value),
+   * and then the inlink score contribution will set the "real" value of the new
+   * page.
+   * 
+   * @param url
+   *          url of the page
+   * @param datum
+   *          new datum. Filters will modify it in-place.
    * @throws ScoringFilterException
    */
-  public void initialScore(Text url, CrawlDatum datum) throws ScoringFilterException;
-  
+  public void initialScore(Text url, CrawlDatum datum)
+      throws ScoringFilterException;
+
   /**
-   * This method prepares a sort value for the purpose of sorting and
-   * selecting top N scoring pages during fetchlist generation.
-   * @param url url of the page
-   * @param datum page's datum, should not be modified
-   * @param initSort initial sort value, or a value from previous filters in chain
+   * This method prepares a sort value for the purpose of sorting and selecting
+   * top N scoring pages during fetchlist generation.
+   * 
+   * @param url
+   *          url of the page
+   * @param datum
+   *          page's datum, should not be modified
+   * @param initSort
+   *          initial sort value, or a value from previous filters in chain
    */
-  public float generatorSortValue(Text url, CrawlDatum datum, float initSort) throws ScoringFilterException;
-  
+  public float generatorSortValue(Text url, CrawlDatum datum, float initSort)
+      throws ScoringFilterException;
+
   /**
    * This method takes all relevant score information from the current datum
    * (coming from a generated fetchlist) and stores it into
-   * {@link org.apache.nutch.protocol.Content} metadata.
-   * This is needed in order to pass this value(s) to the mechanism that distributes it
-   * to outlinked pages.
-   * @param url url of the page
-   * @param datum source datum. NOTE: modifications to this value are not persisted.
-   * @param content instance of content. Implementations may modify this
-   * in-place, primarily by setting some metadata properties.
+   * {@link org.apache.nutch.protocol.Content} metadata. This is needed in order
+   * to pass this value(s) to the mechanism that distributes it to outlinked
+   * pages.
+   * 
+   * @param url
+   *          url of the page
+   * @param datum
+   *          source datum. NOTE: modifications to this value are not persisted.
+   * @param content
+   *          instance of content. Implementations may modify this in-place,
+   *          primarily by setting some metadata properties.
    */
-  public void passScoreBeforeParsing(Text url, CrawlDatum datum, Content content) throws ScoringFilterException;
-  
+  public void passScoreBeforeParsing(Text url, CrawlDatum datum, Content content)
+      throws ScoringFilterException;
+
   /**
    * Currently a part of score distribution is performed using only data coming
    * from the parsing process. We need this method in order to ensure the
    * presence of score data in these steps.
-   * @param url page url
-   * @param content original content. NOTE: modifications to this value are not persisted.
-   * @param parse target instance to copy the score information to. Implementations
-   * may modify this in-place, primarily by setting some metadata properties.
+   * 
+   * @param url
+   *          page url
+   * @param content
+   *          original content. NOTE: modifications to this value are not
+   *          persisted.
+   * @param parse
+   *          target instance to copy the score information to. Implementations
+   *          may modify this in-place, primarily by setting some metadata
+   *          properties.
    */
-  public void passScoreAfterParsing(Text url, Content content, Parse parse) throws ScoringFilterException;
-  
+  public void passScoreAfterParsing(Text url, Content content, Parse parse)
+      throws ScoringFilterException;
+
   /**
    * Distribute score value from the current page to all its outlinked pages.
-   * @param fromUrl url of the source page
-   * @param parseData ParseData instance, which stores relevant score value(s)
-   * in its metadata. NOTE: filters may modify this in-place, all changes will
-   * be persisted.
-   * @param targets &lt;url, CrawlDatum&gt; pairs. NOTE: filters can modify this in-place,
-   * all changes will be persisted.
-   * @param adjust a CrawlDatum instance, initially null, which implementations
-   * may use to pass adjustment values to the original CrawlDatum. When creating
-   * this instance, set its status to {@link CrawlDatum#STATUS_LINKED}.
-   * @param allCount number of all collected outlinks from the source page
+   * 
+   * @param fromUrl
+   *          url of the source page
+   * @param parseData
+   *          ParseData instance, which stores relevant score value(s) in its
+   *          metadata. NOTE: filters may modify this in-place, all changes will
+   *          be persisted.
+   * @param targets
+   *          &lt;url, CrawlDatum&gt; pairs. NOTE: filters can modify this
+   *          in-place, all changes will be persisted.
+   * @param adjust
+   *          a CrawlDatum instance, initially null, which implementations may
+   *          use to pass adjustment values to the original CrawlDatum. When
+   *          creating this instance, set its status to
+   *          {@link CrawlDatum#STATUS_LINKED}.
+   * @param allCount
+   *          number of all collected outlinks from the source page
    * @return if needed, implementations may return an instance of CrawlDatum,
-   * with status {@link CrawlDatum#STATUS_LINKED}, which contains adjustments
-   * to be applied to the original CrawlDatum score(s) and metadata. This can
-   * be null if not needed.
+   *         with status {@link CrawlDatum#STATUS_LINKED}, which contains
+   *         adjustments to be applied to the original CrawlDatum score(s) and
+   *         metadata. This can be null if not needed.
    * @throws ScoringFilterException
    */
-  public CrawlDatum distributeScoreToOutlinks(Text fromUrl, ParseData parseData, 
-          Collection<Entry<Text, CrawlDatum>> targets, CrawlDatum adjust,
-          int allCount) throws ScoringFilterException;
+  public CrawlDatum distributeScoreToOutlinks(Text fromUrl,
+      ParseData parseData, Collection<Entry<Text, CrawlDatum>> targets,
+      CrawlDatum adjust, int allCount) throws ScoringFilterException;
 
   /**
-   * This method calculates a new score of CrawlDatum during CrawlDb update, based on the
-   * initial value of the original CrawlDatum, and also score values contributed by
-   * inlinked pages.
-   * @param url url of the page
-   * @param old original datum, with original score. May be null if this is a newly
-   * discovered page. If not null, filters should use score values from this parameter
-   * as the starting values - the <code>datum</code> parameter may contain values that are
-   * no longer valid, if other updates occured between generation and this update.
-   * @param datum the new datum, with the original score saved at the time when
-   * fetchlist was generated. Filters should update this in-place, and it will be saved in
-   * the crawldb.
-   * @param inlinked (partial) list of CrawlDatum-s (with their scores) from
-   * links pointing to this page, found in the current update batch.
+   * This method calculates a new score of CrawlDatum during CrawlDb update,
+   * based on the initial value of the original CrawlDatum, and also score
+   * values contributed by inlinked pages.
+   * 
+   * @param url
+   *          url of the page
+   * @param old
+   *          original datum, with original score. May be null if this is a
+   *          newly discovered page. If not null, filters should use score
+   *          values from this parameter as the starting values - the
+   *          <code>datum</code> parameter may contain values that are no longer
+   *          valid, if other updates occured between generation and this
+   *          update.
+   * @param datum
+   *          the new datum, with the original score saved at the time when
+   *          fetchlist was generated. Filters should update this in-place, and
+   *          it will be saved in the crawldb.
+   * @param inlinked
+   *          (partial) list of CrawlDatum-s (with their scores) from links
+   *          pointing to this page, found in the current update batch.
    * @throws ScoringFilterException
    */
-  public void updateDbScore(Text url, CrawlDatum old, CrawlDatum datum, List<CrawlDatum> inlinked) throws ScoringFilterException;
-  
+  public void updateDbScore(Text url, CrawlDatum old, CrawlDatum datum,
+      List<CrawlDatum> inlinked) throws ScoringFilterException;
+
   /**
    * This method calculates a Lucene document boost.
-   * @param url url of the page
-   * @param doc Lucene document. NOTE: this already contains all information collected
-   * by indexing filters. Implementations may modify this instance, in order to store/remove
-   * some information.
-   * @param dbDatum current page from CrawlDb. NOTE: changes made to this instance
-   * are not persisted.
-   * @param fetchDatum datum from FetcherOutput (containing among others the fetching status)
-   * @param parse parsing result. NOTE: changes made to this instance are not persisted.
-   * @param inlinks current inlinks from LinkDb. NOTE: changes made to this instance are
-   * not persisted.
-   * @param initScore initial boost value for the Lucene document.
-   * @return boost value for the Lucene document. This value is passed as an argument
-   * to the next scoring filter in chain. NOTE: implementations may also express
-   * other scoring strategies by modifying Lucene document directly.
+   * 
+   * @param url
+   *          url of the page
+   * @param doc
+   *          Lucene document. NOTE: this already contains all information
+   *          collected by indexing filters. Implementations may modify this
+   *          instance, in order to store/remove some information.
+   * @param dbDatum
+   *          current page from CrawlDb. NOTE: changes made to this instance are
+   *          not persisted.
+   * @param fetchDatum
+   *          datum from FetcherOutput (containing among others the fetching
+   *          status)
+   * @param parse
+   *          parsing result. NOTE: changes made to this instance are not
+   *          persisted.
+   * @param inlinks
+   *          current inlinks from LinkDb. NOTE: changes made to this instance
+   *          are not persisted.
+   * @param initScore
+   *          initial boost value for the Lucene document.
+   * @return boost value for the Lucene document. This value is passed as an
+   *         argument to the next scoring filter in chain. NOTE: implementations
+   *         may also express other scoring strategies by modifying Lucene
+   *         document directly.
    * @throws ScoringFilterException
    */
   public float indexerScore(Text url, NutchDocument doc, CrawlDatum dbDatum,
-          CrawlDatum fetchDatum, Parse parse, Inlinks inlinks, float initScore) throws ScoringFilterException;
+      CrawlDatum fetchDatum, Parse parse, Inlinks inlinks, float initScore)
+      throws ScoringFilterException;
 }
Index: src/java/org/apache/nutch/scoring/ScoringFilters.java
===================================================================
--- src/java/org/apache/nutch/scoring/ScoringFilters.java	(revision 1188252)
+++ src/java/org/apache/nutch/scoring/ScoringFilters.java	(working copy)
@@ -51,7 +51,8 @@
     super(conf);
     ObjectCache objectCache = ObjectCache.get(conf);
     String order = conf.get("scoring.filter.order");
-    this.filters = (ScoringFilter[]) objectCache.getObject(ScoringFilter.class.getName());
+    this.filters = (ScoringFilter[]) objectCache.getObject(ScoringFilter.class
+        .getName());
 
     if (this.filters == null) {
       String[] orderedFilters = null;
@@ -60,20 +61,23 @@
       }
 
       try {
-        ExtensionPoint point = PluginRepository.get(conf).getExtensionPoint(ScoringFilter.X_POINT_ID);
-        if (point == null) throw new RuntimeException(ScoringFilter.X_POINT_ID + " not found.");
+        ExtensionPoint point = PluginRepository.get(conf).getExtensionPoint(
+            ScoringFilter.X_POINT_ID);
+        if (point == null)
+          throw new RuntimeException(ScoringFilter.X_POINT_ID + " not found.");
         Extension[] extensions = point.getExtensions();
-        HashMap<String, ScoringFilter> filterMap =
-          new HashMap<String, ScoringFilter>();
+        HashMap<String, ScoringFilter> filterMap = new HashMap<String, ScoringFilter>();
         for (int i = 0; i < extensions.length; i++) {
           Extension extension = extensions[i];
-          ScoringFilter filter = (ScoringFilter) extension.getExtensionInstance();
+          ScoringFilter filter = (ScoringFilter) extension
+              .getExtensionInstance();
           if (!filterMap.containsKey(filter.getClass().getName())) {
             filterMap.put(filter.getClass().getName(), filter);
           }
         }
         if (orderedFilters == null) {
-          objectCache.setObject(ScoringFilter.class.getName(), filterMap.values().toArray(new ScoringFilter[0]));
+          objectCache.setObject(ScoringFilter.class.getName(), filterMap
+              .values().toArray(new ScoringFilter[0]));
         } else {
           ScoringFilter[] filter = new ScoringFilter[orderedFilters.length];
           for (int i = 0; i < orderedFilters.length; i++) {
@@ -84,12 +88,14 @@
       } catch (PluginRuntimeException e) {
         throw new RuntimeException(e);
       }
-      this.filters = (ScoringFilter[]) objectCache.getObject(ScoringFilter.class.getName());
+      this.filters = (ScoringFilter[]) objectCache
+          .getObject(ScoringFilter.class.getName());
     }
   }
 
   /** Calculate a sort value for Generate. */
-  public float generatorSortValue(Text url, CrawlDatum datum, float initSort) throws ScoringFilterException {
+  public float generatorSortValue(Text url, CrawlDatum datum, float initSort)
+      throws ScoringFilterException {
     for (int i = 0; i < this.filters.length; i++) {
       initSort = this.filters[i].generatorSortValue(url, datum, initSort);
     }
@@ -97,48 +103,59 @@
   }
 
   /** Calculate a new initial score, used when adding newly discovered pages. */
-  public void initialScore(Text url, CrawlDatum datum) throws ScoringFilterException {
+  public void initialScore(Text url, CrawlDatum datum)
+      throws ScoringFilterException {
     for (int i = 0; i < this.filters.length; i++) {
       this.filters[i].initialScore(url, datum);
     }
   }
 
   /** Calculate a new initial score, used when injecting new pages. */
-  public void injectedScore(Text url, CrawlDatum datum) throws ScoringFilterException {
+  public void injectedScore(Text url, CrawlDatum datum)
+      throws ScoringFilterException {
     for (int i = 0; i < this.filters.length; i++) {
       this.filters[i].injectedScore(url, datum);
     }
   }
 
   /** Calculate updated page score during CrawlDb.update(). */
-  public void updateDbScore(Text url, CrawlDatum old, CrawlDatum datum, List<CrawlDatum> inlinked) throws ScoringFilterException {
+  public void updateDbScore(Text url, CrawlDatum old, CrawlDatum datum,
+      List<CrawlDatum> inlinked) throws ScoringFilterException {
     for (int i = 0; i < this.filters.length; i++) {
       this.filters[i].updateDbScore(url, old, datum, inlinked);
     }
   }
 
-  public void passScoreBeforeParsing(Text url, CrawlDatum datum, Content content) throws ScoringFilterException {
+  public void passScoreBeforeParsing(Text url, CrawlDatum datum, Content content)
+      throws ScoringFilterException {
     for (int i = 0; i < this.filters.length; i++) {
       this.filters[i].passScoreBeforeParsing(url, datum, content);
     }
   }
-  
-  public void passScoreAfterParsing(Text url, Content content, Parse parse) throws ScoringFilterException {
+
+  public void passScoreAfterParsing(Text url, Content content, Parse parse)
+      throws ScoringFilterException {
     for (int i = 0; i < this.filters.length; i++) {
       this.filters[i].passScoreAfterParsing(url, content, parse);
     }
   }
-  
-  public CrawlDatum distributeScoreToOutlinks(Text fromUrl, ParseData parseData, Collection<Entry<Text, CrawlDatum>> targets, CrawlDatum adjust, int allCount) throws ScoringFilterException {
+
+  public CrawlDatum distributeScoreToOutlinks(Text fromUrl,
+      ParseData parseData, Collection<Entry<Text, CrawlDatum>> targets,
+      CrawlDatum adjust, int allCount) throws ScoringFilterException {
     for (int i = 0; i < this.filters.length; i++) {
-      adjust = this.filters[i].distributeScoreToOutlinks(fromUrl, parseData, targets, adjust, allCount);
+      adjust = this.filters[i].distributeScoreToOutlinks(fromUrl, parseData,
+          targets, adjust, allCount);
     }
     return adjust;
   }
 
-  public float indexerScore(Text url, NutchDocument doc, CrawlDatum dbDatum, CrawlDatum fetchDatum, Parse parse, Inlinks inlinks, float initScore) throws ScoringFilterException {
+  public float indexerScore(Text url, NutchDocument doc, CrawlDatum dbDatum,
+      CrawlDatum fetchDatum, Parse parse, Inlinks inlinks, float initScore)
+      throws ScoringFilterException {
     for (int i = 0; i < this.filters.length; i++) {
-      initScore = this.filters[i].indexerScore(url, doc, dbDatum, fetchDatum, parse, inlinks, initScore);
+      initScore = this.filters[i].indexerScore(url, doc, dbDatum, fetchDatum,
+          parse, inlinks, initScore);
     }
     return initScore;
   }
Index: src/java/org/apache/nutch/net/protocols/ProtocolException.java
===================================================================
--- src/java/org/apache/nutch/net/protocols/ProtocolException.java	(revision 1188252)
+++ src/java/org/apache/nutch/net/protocols/ProtocolException.java	(working copy)
@@ -21,12 +21,12 @@
 
 /**
  * Base exception for all protocol handlers
+ * 
  * @deprecated Use {@link org.apache.nutch.protocol.ProtocolException} instead.
  */
 @SuppressWarnings("serial")
 public class ProtocolException extends Exception implements Serializable {
 
-
   public ProtocolException() {
     super();
   }
Index: src/java/org/apache/nutch/net/protocols/HttpDateFormat.java
===================================================================
--- src/java/org/apache/nutch/net/protocols/HttpDateFormat.java	(revision 1188252)
+++ src/java/org/apache/nutch/net/protocols/HttpDateFormat.java	(working copy)
@@ -26,15 +26,15 @@
 
 /**
  * class to handle HTTP dates.
- *
+ * 
  * Modified from FastHttpDateFormat.java in jakarta-tomcat.
- *
+ * 
  * @author John Xing
  */
 public class HttpDateFormat {
 
-  protected static SimpleDateFormat format = 
-    new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss zzz", Locale.US);
+  protected static SimpleDateFormat format = new SimpleDateFormat(
+      "EEE, dd MMM yyyy HH:mm:ss zzz", Locale.US);
 
   /**
    * HTTP date uses TimeZone GMT
@@ -43,29 +43,29 @@
     format.setTimeZone(TimeZone.getTimeZone("GMT"));
   }
 
-  //HttpDate (long t) {
-  //}
+  // HttpDate (long t) {
+  // }
 
-  //HttpDate (String s) {
-  //}
+  // HttpDate (String s) {
+  // }
 
-//  /**
-//   * Get the current date in HTTP format.
-//   */
-//  public static String getCurrentDate() {
-//
-//    long now = System.currentTimeMillis();
-//    if ((now - currentDateGenerated) > 1000) {
-//        synchronized (format) {
-//            if ((now - currentDateGenerated) > 1000) {
-//                currentDateGenerated = now;
-//                currentDate = format.format(new Date(now));
-//            }
-//        }
-//    }
-//    return currentDate;
-//
-//  }
+  // /**
+  // * Get the current date in HTTP format.
+  // */
+  // public static String getCurrentDate() {
+  //
+  // long now = System.currentTimeMillis();
+  // if ((now - currentDateGenerated) > 1000) {
+  // synchronized (format) {
+  // if ((now - currentDateGenerated) > 1000) {
+  // currentDateGenerated = now;
+  // currentDate = format.format(new Date(now));
+  // }
+  // }
+  // }
+  // return currentDate;
+  //
+  // }
 
   /**
    * Get the HTTP format of the specified date.
Index: src/java/org/apache/nutch/net/protocols/Response.java
===================================================================
--- src/java/org/apache/nutch/net/protocols/Response.java	(revision 1188252)
+++ src/java/org/apache/nutch/net/protocols/Response.java	(working copy)
@@ -23,12 +23,11 @@
 import org.apache.nutch.metadata.HttpHeaders;
 import org.apache.nutch.metadata.Metadata;
 
-
 /**
- * A response inteface.  Makes all protocols model HTTP.
+ * A response inteface. Makes all protocols model HTTP.
  */
 public interface Response extends HttpHeaders {
-  
+
   /** Returns the URL used to retrieve this response. */
   public URL getUrl();
 
@@ -40,7 +39,7 @@
 
   /** Returns all the headers. */
   public Metadata getHeaders();
-  
+
   /** Returns the full content of the response. */
   public byte[] getContent();
 
Index: src/java/org/apache/nutch/net/URLNormalizer.java
===================================================================
--- src/java/org/apache/nutch/net/URLNormalizer.java	(revision 1188252)
+++ src/java/org/apache/nutch/net/URLNormalizer.java	(working copy)
@@ -21,13 +21,17 @@
 
 import org.apache.hadoop.conf.Configurable;
 
-/** Interface used to convert URLs to normal form and optionally perform substitutions */
+/**
+ * Interface used to convert URLs to normal form and optionally perform
+ * substitutions
+ */
 public interface URLNormalizer extends Configurable {
-  
+
   /* Extension ID */
   public static final String X_POINT_ID = URLNormalizer.class.getName();
-  
+
   /* Interface for URL normalization */
-  public String normalize(String urlString, String scope) throws MalformedURLException;
+  public String normalize(String urlString, String scope)
+      throws MalformedURLException;
 
 }
Index: src/java/org/apache/nutch/net/URLFilter.java
===================================================================
--- src/java/org/apache/nutch/net/URLFilter.java	(revision 1188252)
+++ src/java/org/apache/nutch/net/URLFilter.java	(working copy)
@@ -23,17 +23,18 @@
 // Nutch imports
 import org.apache.nutch.plugin.Pluggable;
 
-
 /**
- * Interface used to limit which URLs enter Nutch.
- * Used by the injector and the db updater.
+ * Interface used to limit which URLs enter Nutch. Used by the injector and the
+ * db updater.
  */
 
 public interface URLFilter extends Pluggable, Configurable {
   /** The name of the extension point. */
   public final static String X_POINT_ID = URLFilter.class.getName();
 
-  /* Interface for a filter that transforms a URL: it can pass the
-     original URL through or "delete" the URL by returning null */
+  /*
+   * Interface for a filter that transforms a URL: it can pass the original URL
+   * through or "delete" the URL by returning null
+   */
   public String filter(String urlString);
 }
Index: src/java/org/apache/nutch/net/URLNormalizers.java
===================================================================
--- src/java/org/apache/nutch/net/URLNormalizers.java	(revision 1188252)
+++ src/java/org/apache/nutch/net/URLNormalizers.java	(working copy)
@@ -43,47 +43,63 @@
  * contexts where they are used (note however that they need to be activated
  * first through <tt>plugin.include</tt> property).
  * 
- * <p>There is one global scope defined by default, which consists of all
- * active normalizers. The order in which these normalizers
- * are executed may be defined in "urlnormalizer.order" property, which lists
- * space-separated implementation classes (if this property is missing normalizers
- * will be run in random order). If there are more
- * normalizers activated than explicitly named on this list, the remaining ones
- * will be run in random order after the ones specified on the list are executed.</p>
- * <p>You can define a set of contexts (or scopes) in which normalizers may be
+ * <p>
+ * There is one global scope defined by default, which consists of all active
+ * normalizers. The order in which these normalizers are executed may be defined
+ * in "urlnormalizer.order" property, which lists space-separated implementation
+ * classes (if this property is missing normalizers will be run in random
+ * order). If there are more normalizers activated than explicitly named on this
+ * list, the remaining ones will be run in random order after the ones specified
+ * on the list are executed.
+ * </p>
+ * <p>
+ * You can define a set of contexts (or scopes) in which normalizers may be
  * called. Each scope can have its own list of normalizers (defined in
  * "urlnormalizer.scope.<scope_name>" property) and its own order (defined in
  * "urlnormalizer.order.<scope_name>" property). If any of these properties are
- * missing, default settings are used for the global scope.</p>
- * <p>In case no normalizers are required for any given scope, a
- * <code>org.apache.nutch.net.urlnormalizer.pass.PassURLNormalizer</code> should be used.</p>
- * <p>Each normalizer may further select among many configurations, depending on
- * the scope in which it is called, because the scope name is passed as a parameter
- * to each normalizer. You can also use the same normalizer for many scopes.</p>
- * <p>Several scopes have been defined, and various Nutch tools will attempt using
- * scope-specific normalizers first (and fall back to default config if scope-specific
- * configuration is missing).</p>
- * <p>Normalizers may be run several times, to ensure that modifications introduced
+ * missing, default settings are used for the global scope.
+ * </p>
+ * <p>
+ * In case no normalizers are required for any given scope, a
+ * <code>org.apache.nutch.net.urlnormalizer.pass.PassURLNormalizer</code> should
+ * be used.
+ * </p>
+ * <p>
+ * Each normalizer may further select among many configurations, depending on
+ * the scope in which it is called, because the scope name is passed as a
+ * parameter to each normalizer. You can also use the same normalizer for many
+ * scopes.
+ * </p>
+ * <p>
+ * Several scopes have been defined, and various Nutch tools will attempt using
+ * scope-specific normalizers first (and fall back to default config if
+ * scope-specific configuration is missing).
+ * </p>
+ * <p>
+ * Normalizers may be run several times, to ensure that modifications introduced
  * by normalizers at the end of the list can be further reduced by normalizers
- * executed at the beginning. By default this loop is executed just once - if you want
- * to ensure that all possible combinations have been applied you may want to run
- * this loop up to the number of activated normalizers. This loop count can be configured
- * through <tt>urlnormalizer.loop.count</tt> property. As soon as the url is
- * unchanged the loop will stop and return the result.</p>
+ * executed at the beginning. By default this loop is executed just once - if
+ * you want to ensure that all possible combinations have been applied you may
+ * want to run this loop up to the number of activated normalizers. This loop
+ * count can be configured through <tt>urlnormalizer.loop.count</tt> property.
+ * As soon as the url is unchanged the loop will stop and return the result.
+ * </p>
  * 
  * @author Andrzej Bialecki
  */
 public final class URLNormalizers {
-  
-  /** Default scope. If no scope properties are defined then the configuration for
-   * this scope will be used.
+
+  /**
+   * Default scope. If no scope properties are defined then the configuration
+   * for this scope will be used.
    */
   public static final String SCOPE_DEFAULT = "default";
   /** Scope used by {@link org.apache.nutch.crawl.URLPartitioner}. */
   public static final String SCOPE_PARTITION = "partition";
   /** Scope used by {@link org.apache.nutch.crawl.Generator}. */
   public static final String SCOPE_GENERATE_HOST_COUNT = "generate_host_count";
-  /** Scope used by {@link org.apache.nutch.fetcher.Fetcher} when processing
+  /**
+   * Scope used by {@link org.apache.nutch.fetcher.Fetcher} when processing
    * redirect URLs.
    */
   public static final String SCOPE_FETCHER = "fetcher";
@@ -93,15 +109,18 @@
   public static final String SCOPE_LINKDB = "linkdb";
   /** Scope used by {@link org.apache.nutch.crawl.Injector}. */
   public static final String SCOPE_INJECT = "inject";
-  /** Scope used when constructing new {@link org.apache.nutch.parse.Outlink} instances. */
+  /**
+   * Scope used when constructing new {@link org.apache.nutch.parse.Outlink}
+   * instances.
+   */
   public static final String SCOPE_OUTLINK = "outlink";
-  
 
-  public static final Logger LOG = LoggerFactory.getLogger(URLNormalizers.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(URLNormalizers.class);
 
   /* Empty extension list for caching purposes. */
   private final List<Extension> EMPTY_EXTENSION_LIST = Collections.EMPTY_LIST;
-  
+
   private final URLNormalizer[] EMPTY_NORMALIZERS = new URLNormalizer[0];
 
   private Configuration conf;
@@ -109,37 +128,39 @@
   private ExtensionPoint extensionPoint;
 
   private URLNormalizer[] normalizers;
-  
+
   private int loopCount;
 
   public URLNormalizers(Configuration conf, String scope) {
     this.conf = conf;
     this.extensionPoint = PluginRepository.get(conf).getExtensionPoint(
-            URLNormalizer.X_POINT_ID);
+        URLNormalizer.X_POINT_ID);
     ObjectCache objectCache = ObjectCache.get(conf);
-    
+
     if (this.extensionPoint == null) {
       throw new RuntimeException("x point " + URLNormalizer.X_POINT_ID
-              + " not found.");
+          + " not found.");
     }
 
-    normalizers = (URLNormalizer[])objectCache.getObject(URLNormalizer.X_POINT_ID + "_" + scope);
+    normalizers = (URLNormalizer[]) objectCache
+        .getObject(URLNormalizer.X_POINT_ID + "_" + scope);
     if (normalizers == null) {
       normalizers = getURLNormalizers(scope);
     }
     if (normalizers == EMPTY_NORMALIZERS) {
-      normalizers = (URLNormalizer[])objectCache.getObject(URLNormalizer.X_POINT_ID + "_" + SCOPE_DEFAULT);
+      normalizers = (URLNormalizer[]) objectCache
+          .getObject(URLNormalizer.X_POINT_ID + "_" + SCOPE_DEFAULT);
       if (normalizers == null) {
         normalizers = getURLNormalizers(SCOPE_DEFAULT);
       }
     }
-    
+
     loopCount = conf.getInt("urlnormalizer.loop.count", 1);
   }
 
   /**
-   * Function returns an array of {@link URLNormalizer}s for a given scope,
-   * with a specified order.
+   * Function returns an array of {@link URLNormalizer}s for a given scope, with
+   * a specified order.
    * 
    * @param scope
    *          The scope to return the <code>Array</code> of
@@ -151,13 +172,14 @@
   URLNormalizer[] getURLNormalizers(String scope) {
     List<Extension> extensions = getExtensions(scope);
     ObjectCache objectCache = ObjectCache.get(conf);
-    
+
     if (extensions == EMPTY_EXTENSION_LIST) {
       return EMPTY_NORMALIZERS;
     }
-    
-    List<URLNormalizer> normalizers = new Vector<URLNormalizer>(extensions.size());
 
+    List<URLNormalizer> normalizers = new Vector<URLNormalizer>(
+        extensions.size());
+
     Iterator<Extension> it = extensions.iterator();
     while (it.hasNext()) {
       Extension ext = it.next();
@@ -174,14 +196,13 @@
       } catch (PluginRuntimeException e) {
         e.printStackTrace();
         LOG.warn("URLNormalizers:PluginRuntimeException when "
-                + "initializing url normalizer plugin "
-                + ext.getDescriptor().getPluginId()
-                + " instance in getURLNormalizers "
-                + "function: attempting to continue instantiating plugins");
+            + "initializing url normalizer plugin "
+            + ext.getDescriptor().getPluginId()
+            + " instance in getURLNormalizers "
+            + "function: attempting to continue instantiating plugins");
       }
     }
-    return normalizers.toArray(new URLNormalizer[normalizers
-            .size()]);
+    return normalizers.toArray(new URLNormalizer[normalizers.size()]);
   }
 
   /**
@@ -195,9 +216,8 @@
    */
   private List<Extension> getExtensions(String scope) {
     ObjectCache objectCache = ObjectCache.get(conf);
-    List<Extension> extensions = 
-      (List<Extension>) objectCache.getObject(URLNormalizer.X_POINT_ID + "_x_"
-                                                + scope);
+    List<Extension> extensions = (List<Extension>) objectCache
+        .getObject(URLNormalizer.X_POINT_ID + "_x_" + scope);
 
     // Just compare the reference:
     // if this is the empty list, we know we will find no extension.
@@ -208,11 +228,13 @@
     if (extensions == null) {
       extensions = findExtensions(scope);
       if (extensions != null) {
-        objectCache.setObject(URLNormalizer.X_POINT_ID + "_x_" + scope, extensions);
+        objectCache.setObject(URLNormalizer.X_POINT_ID + "_x_" + scope,
+            extensions);
       } else {
         // Put the empty extension list into cache
         // to remember we don't know any related extension.
-        objectCache.setObject(URLNormalizer.X_POINT_ID + "_x_" + scope, EMPTY_EXTENSION_LIST);
+        objectCache.setObject(URLNormalizer.X_POINT_ID + "_x_" + scope,
+            EMPTY_EXTENSION_LIST);
         extensions = EMPTY_EXTENSION_LIST;
       }
     }
@@ -232,7 +254,8 @@
 
     String[] orders = null;
     String orderlist = conf.get("urlnormalizer.order." + scope);
-    if (orderlist == null) orderlist = conf.get("urlnormalizer.order");
+    if (orderlist == null)
+      orderlist = conf.get("urlnormalizer.order");
     if (orderlist != null && !orderlist.trim().equals("")) {
       orders = orderlist.split("\\s+");
     }
@@ -270,13 +293,17 @@
 
   /**
    * Normalize
-   * @param urlString The URL string to normalize.
-   * @param scope The given scope.
+   * 
+   * @param urlString
+   *          The URL string to normalize.
+   * @param scope
+   *          The given scope.
    * @return A normalized String, using the given <code>scope</code>
-   * @throws MalformedURLException If the given URL string is malformed.
+   * @throws MalformedURLException
+   *           If the given URL string is malformed.
    */
   public String normalize(String urlString, String scope)
-          throws MalformedURLException {
+      throws MalformedURLException {
     // optionally loop several times, and break if no further changes
     String initialString = urlString;
     for (int k = 0; k < loopCount; k++) {
@@ -285,7 +312,8 @@
           return null;
         urlString = this.normalizers[i].normalize(urlString, scope);
       }
-      if (initialString.equals(urlString)) break;
+      if (initialString.equals(urlString))
+        break;
       initialString = urlString;
     }
     return urlString;
Index: src/java/org/apache/nutch/net/URLNormalizerChecker.java
===================================================================
--- src/java/org/apache/nutch/net/URLNormalizerChecker.java	(revision 1188252)
+++ src/java/org/apache/nutch/net/URLNormalizerChecker.java	(working copy)
@@ -36,23 +36,23 @@
   private Configuration conf;
 
   public URLNormalizerChecker(Configuration conf) {
-      this.conf = conf;
+    this.conf = conf;
   }
 
   private void checkOne(String normalizerName, String scope) throws Exception {
     URLNormalizer normalizer = null;
 
-    ExtensionPoint point =
-      PluginRepository.get(conf).getExtensionPoint(URLNormalizer.X_POINT_ID);
+    ExtensionPoint point = PluginRepository.get(conf).getExtensionPoint(
+        URLNormalizer.X_POINT_ID);
 
     if (point == null)
-      throw new RuntimeException(URLNormalizer.X_POINT_ID+" not found.");
+      throw new RuntimeException(URLNormalizer.X_POINT_ID + " not found.");
 
     Extension[] extensions = point.getExtensions();
 
     for (int i = 0; i < extensions.length; i++) {
       Extension extension = extensions[i];
-      normalizer = (URLNormalizer)extension.getExtensionInstance();
+      normalizer = (URLNormalizer) extension.getExtensionInstance();
       if (normalizer.getClass().getName().equals(normalizerName)) {
         break;
       } else {
@@ -61,7 +61,8 @@
     }
 
     if (normalizer == null)
-      throw new RuntimeException("URLNormalizer "+normalizerName+" not found.");
+      throw new RuntimeException("URLNormalizer " + normalizerName
+          + " not found.");
 
     System.out.println("Checking URLNormalizer " + normalizerName);
 
@@ -79,7 +80,7 @@
     BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
     String line;
     URLNormalizers normalizers = new URLNormalizers(conf, scope);
-    while((line = in.readLine()) != null) {
+    while ((line = in.readLine()) != null) {
       String out = normalizers.normalize(line, scope);
       System.out.println(out);
     }
@@ -88,7 +89,7 @@
   public static void main(String[] args) throws Exception {
 
     String usage = "Usage: URLNormalizerChecker [-normalizer <normalizerName>] [-scope <scope>]"
-      + "\n\tscope can be one of: default,partition,generate_host_count,fetcher,crawldb,linkdb,inject,outlink";
+        + "\n\tscope can be one of: default,partition,generate_host_count,fetcher,crawldb,linkdb,inject,outlink";
 
     String normalizerName = null;
     String scope = URLNormalizers.SCOPE_DEFAULT;
@@ -103,7 +104,8 @@
       }
     }
 
-    URLNormalizerChecker checker = new URLNormalizerChecker(NutchConfiguration.create());
+    URLNormalizerChecker checker = new URLNormalizerChecker(
+        NutchConfiguration.create());
     if (normalizerName != null) {
       checker.checkOne(normalizerName, scope);
     } else {
Index: src/java/org/apache/nutch/net/URLFilters.java
===================================================================
--- src/java/org/apache/nutch/net/URLFilters.java	(revision 1188252)
+++ src/java/org/apache/nutch/net/URLFilters.java	(working copy)
@@ -28,7 +28,8 @@
 import org.apache.nutch.util.ObjectCache;
 
 import org.apache.hadoop.conf.Configuration;
-/** Creates and caches {@link URLFilter} implementing plugins.*/
+
+/** Creates and caches {@link URLFilter} implementing plugins. */
 public class URLFilters {
 
   public static final String URLFILTER_ORDER = "urlfilter.order";
@@ -37,7 +38,8 @@
   public URLFilters(Configuration conf) {
     String order = conf.get(URLFILTER_ORDER);
     ObjectCache objectCache = ObjectCache.get(conf);
-    this.filters = (URLFilter[]) objectCache.getObject(URLFilter.class.getName());
+    this.filters = (URLFilter[]) objectCache.getObject(URLFilter.class
+        .getName());
 
     if (this.filters == null) {
       String[] orderedFilters = null;
@@ -60,8 +62,8 @@
           }
         }
         if (orderedFilters == null) {
-          objectCache.setObject(URLFilter.class.getName(), filterMap.values().toArray(
-              new URLFilter[0]));
+          objectCache.setObject(URLFilter.class.getName(), filterMap.values()
+              .toArray(new URLFilter[0]));
         } else {
           ArrayList<URLFilter> filters = new ArrayList<URLFilter>();
           for (int i = 0; i < orderedFilters.length; i++) {
@@ -70,13 +72,14 @@
               filters.add(filter);
             }
           }
-          objectCache.setObject(URLFilter.class.getName(), filters
-              .toArray(new URLFilter[filters.size()]));
+          objectCache.setObject(URLFilter.class.getName(),
+              filters.toArray(new URLFilter[filters.size()]));
         }
       } catch (PluginRuntimeException e) {
         throw new RuntimeException(e);
       }
-      this.filters = (URLFilter[]) objectCache.getObject(URLFilter.class.getName());
+      this.filters = (URLFilter[]) objectCache.getObject(URLFilter.class
+          .getName());
     }
   }
 
Index: src/java/org/apache/nutch/net/URLFilterChecker.java
===================================================================
--- src/java/org/apache/nutch/net/URLFilterChecker.java	(revision 1188252)
+++ src/java/org/apache/nutch/net/URLFilterChecker.java	(working copy)
@@ -38,23 +38,23 @@
   private Configuration conf;
 
   public URLFilterChecker(Configuration conf) {
-      this.conf = conf;
+    this.conf = conf;
   }
 
   private void checkOne(String filterName) throws Exception {
     URLFilter filter = null;
 
-    ExtensionPoint point =
-      PluginRepository.get(conf).getExtensionPoint(URLFilter.X_POINT_ID);
+    ExtensionPoint point = PluginRepository.get(conf).getExtensionPoint(
+        URLFilter.X_POINT_ID);
 
     if (point == null)
-      throw new RuntimeException(URLFilter.X_POINT_ID+" not found.");
+      throw new RuntimeException(URLFilter.X_POINT_ID + " not found.");
 
     Extension[] extensions = point.getExtensions();
 
     for (int i = 0; i < extensions.length; i++) {
       Extension extension = extensions[i];
-      filter = (URLFilter)extension.getExtensionInstance();
+      filter = (URLFilter) extension.getExtensionInstance();
       if (filter.getClass().getName().equals(filterName)) {
         break;
       } else {
@@ -63,19 +63,19 @@
     }
 
     if (filter == null)
-      throw new RuntimeException("Filter "+filterName+" not found.");
+      throw new RuntimeException("Filter " + filterName + " not found.");
 
     // jerome : should we keep this behavior?
-    //if (LogFormatter.hasLoggedSevere())
-    //  throw new RuntimeException("Severe error encountered.");
+    // if (LogFormatter.hasLoggedSevere())
+    // throw new RuntimeException("Severe error encountered.");
 
-    System.out.println("Checking URLFilter "+filterName);
+    System.out.println("Checking URLFilter " + filterName);
 
     BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
     String line;
-    while((line=in.readLine())!=null) {
-      String out=filter.filter(line);
-      if(out!=null) {
+    while ((line = in.readLine()) != null) {
+      String out = filter.filter(line);
+      if (out != null) {
         System.out.print("+");
         System.out.println(out);
       } else {
@@ -90,10 +90,10 @@
 
     BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
     String line;
-    while((line=in.readLine())!=null) {
+    while ((line = in.readLine()) != null) {
       URLFilters filters = new URLFilters(this.conf);
       String out = filters.filter(line);
-      if(out!=null) {
+      if (out != null) {
         System.out.print("+");
         System.out.println(out);
       } else {
Index: src/java/org/apache/nutch/crawl/Signature.java
===================================================================
--- src/java/org/apache/nutch/crawl/Signature.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/Signature.java	(working copy)
@@ -24,7 +24,7 @@
 
 public abstract class Signature implements Configurable {
   protected Configuration conf;
-  
+
   public abstract byte[] calculate(Content content, Parse parse);
 
   public Configuration getConf() {
Index: src/java/org/apache/nutch/crawl/CrawlDbReader.java
===================================================================
--- src/java/org/apache/nutch/crawl/CrawlDbReader.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/CrawlDbReader.java	(working copy)
@@ -69,68 +69,76 @@
 public class CrawlDbReader implements Closeable {
 
   public static final Logger LOG = LoggerFactory.getLogger(CrawlDbReader.class);
-  
+
   public static final int STD_FORMAT = 0;
   public static final int CSV_FORMAT = 1;
-    
+
   private MapFile.Reader[] readers = null;
-  
-  private void openReaders(String crawlDb, Configuration config) throws IOException {
-    if (readers != null) return;
+
+  private void openReaders(String crawlDb, Configuration config)
+      throws IOException {
+    if (readers != null)
+      return;
     FileSystem fs = FileSystem.get(config);
     readers = MapFileOutputFormat.getReaders(fs, new Path(crawlDb,
         CrawlDb.CURRENT_NAME), config);
   }
-  
+
   private void closeReaders() {
-    if (readers == null) return;
+    if (readers == null)
+      return;
     for (int i = 0; i < readers.length; i++) {
       try {
         readers[i].close();
       } catch (Exception e) {
-        
+
       }
     }
   }
-  
-  public static class CrawlDatumCsvOutputFormat extends FileOutputFormat<Text,CrawlDatum> {
-    protected static class LineRecordWriter implements RecordWriter<Text,CrawlDatum> {
+
+  public static class CrawlDatumCsvOutputFormat extends
+      FileOutputFormat<Text, CrawlDatum> {
+    protected static class LineRecordWriter implements
+        RecordWriter<Text, CrawlDatum> {
       private DataOutputStream out;
 
       public LineRecordWriter(DataOutputStream out) {
         this.out = out;
         try {
           out.writeBytes("Url;Status code;Status name;Fetch Time;Modified Time;Retries since fetch;Retry interval;Score;Signature;Metadata\n");
-        } catch (IOException e) {}
+        } catch (IOException e) {
+        }
       }
 
-      public synchronized void write(Text key, CrawlDatum value) throws IOException {
-          out.writeByte('"');
-          out.writeBytes(key.toString());
-          out.writeByte('"');
-          out.writeByte(';');
-          out.writeBytes(Integer.toString(value.getStatus()));
-          out.writeByte(';');
-          out.writeByte('"');
-          out.writeBytes(CrawlDatum.getStatusName(value.getStatus()));
-          out.writeByte('"');
-          out.writeByte(';');
-          out.writeBytes(new Date(value.getFetchTime()).toString());
-          out.writeByte(';');
-          out.writeBytes(new Date(value.getModifiedTime()).toString());
-          out.writeByte(';');
-          out.writeBytes(Integer.toString(value.getRetriesSinceFetch()));
-          out.writeByte(';');
-          out.writeBytes(Float.toString(value.getFetchInterval()));
-          out.writeByte(';');
-          out.writeBytes(Float.toString((value.getFetchInterval() / FetchSchedule.SECONDS_PER_DAY)));
-          out.writeByte(';');
-          out.writeBytes(Float.toString(value.getScore()));
-          out.writeByte(';');
-          out.writeByte('"');
-          out.writeBytes(value.getSignature() != null ? StringUtil.toHexString(value.getSignature()): "null");
-          out.writeByte('"');
-          out.writeByte('\n');
+      public synchronized void write(Text key, CrawlDatum value)
+          throws IOException {
+        out.writeByte('"');
+        out.writeBytes(key.toString());
+        out.writeByte('"');
+        out.writeByte(';');
+        out.writeBytes(Integer.toString(value.getStatus()));
+        out.writeByte(';');
+        out.writeByte('"');
+        out.writeBytes(CrawlDatum.getStatusName(value.getStatus()));
+        out.writeByte('"');
+        out.writeByte(';');
+        out.writeBytes(new Date(value.getFetchTime()).toString());
+        out.writeByte(';');
+        out.writeBytes(new Date(value.getModifiedTime()).toString());
+        out.writeByte(';');
+        out.writeBytes(Integer.toString(value.getRetriesSinceFetch()));
+        out.writeByte(';');
+        out.writeBytes(Float.toString(value.getFetchInterval()));
+        out.writeByte(';');
+        out.writeBytes(Float.toString((value.getFetchInterval() / FetchSchedule.SECONDS_PER_DAY)));
+        out.writeByte(';');
+        out.writeBytes(Float.toString(value.getScore()));
+        out.writeByte(';');
+        out.writeByte('"');
+        out.writeBytes(value.getSignature() != null ? StringUtil
+            .toHexString(value.getSignature()) : "null");
+        out.writeByte('"');
+        out.writeByte('\n');
       }
 
       public synchronized void close(Reporter reporter) throws IOException {
@@ -138,48 +146,65 @@
       }
     }
 
-    public RecordWriter<Text,CrawlDatum> getRecordWriter(FileSystem fs, JobConf job, String name,
-        Progressable progress) throws IOException {
+    public RecordWriter<Text, CrawlDatum> getRecordWriter(FileSystem fs,
+        JobConf job, String name, Progressable progress) throws IOException {
       Path dir = FileOutputFormat.getOutputPath(job);
       DataOutputStream fileOut = fs.create(new Path(dir, name), progress);
       return new LineRecordWriter(fileOut);
-   }
+    }
   }
 
-  public static class CrawlDbStatMapper implements Mapper<Text, CrawlDatum, Text, LongWritable> {
+  public static class CrawlDbStatMapper implements
+      Mapper<Text, CrawlDatum, Text, LongWritable> {
     LongWritable COUNT_1 = new LongWritable(1);
     private boolean sort = false;
+
     public void configure(JobConf job) {
-      sort = job.getBoolean("db.reader.stats.sort", false );
+      sort = job.getBoolean("db.reader.stats.sort", false);
     }
-    public void close() {}
-    public void map(Text key, CrawlDatum value, OutputCollector<Text, LongWritable> output, Reporter reporter)
-            throws IOException {
+
+    public void close() {
+    }
+
+    public void map(Text key, CrawlDatum value,
+        OutputCollector<Text, LongWritable> output, Reporter reporter)
+        throws IOException {
       output.collect(new Text("T"), COUNT_1);
       output.collect(new Text("status " + value.getStatus()), COUNT_1);
-      output.collect(new Text("retry " + value.getRetriesSinceFetch()), COUNT_1);
-      output.collect(new Text("s"), new LongWritable((long) (value.getScore() * 1000.0)));
-      if(sort){
+      output
+          .collect(new Text("retry " + value.getRetriesSinceFetch()), COUNT_1);
+      output.collect(new Text("s"), new LongWritable(
+          (long) (value.getScore() * 1000.0)));
+      if (sort) {
         URL u = new URL(key.toString());
         String host = u.getHost();
-        output.collect(new Text("status " + value.getStatus() + " " + host), COUNT_1);
+        output.collect(new Text("status " + value.getStatus() + " " + host),
+            COUNT_1);
       }
     }
   }
-  
-  public static class CrawlDbStatCombiner implements Reducer<Text, LongWritable, Text, LongWritable> {
+
+  public static class CrawlDbStatCombiner implements
+      Reducer<Text, LongWritable, Text, LongWritable> {
     LongWritable val = new LongWritable();
-    
-    public CrawlDbStatCombiner() { }
-    public void configure(JobConf job) { }
-    public void close() {}
-    public void reduce(Text key, Iterator<LongWritable> values, OutputCollector<Text, LongWritable> output, Reporter reporter)
+
+    public CrawlDbStatCombiner() {
+    }
+
+    public void configure(JobConf job) {
+    }
+
+    public void close() {
+    }
+
+    public void reduce(Text key, Iterator<LongWritable> values,
+        OutputCollector<Text, LongWritable> output, Reporter reporter)
         throws IOException {
       val.set(0L);
-      String k = ((Text)key).toString();
+      String k = ((Text) key).toString();
       if (!k.equals("s")) {
         while (values.hasNext()) {
-          LongWritable cnt = (LongWritable)values.next();
+          LongWritable cnt = (LongWritable) values.next();
           val.set(val.get() + cnt.get());
         }
         output.collect(key, val);
@@ -188,9 +213,11 @@
         long min = Long.MAX_VALUE;
         long max = Long.MIN_VALUE;
         while (values.hasNext()) {
-          LongWritable cnt = (LongWritable)values.next();
-          if (cnt.get() < min) min = cnt.get();
-          if (cnt.get() > max) max = cnt.get();
+          LongWritable cnt = (LongWritable) values.next();
+          if (cnt.get() < min)
+            min = cnt.get();
+          if (cnt.get() > max)
+            max = cnt.get();
           total += cnt.get();
         }
         output.collect(new Text("scn"), new LongWritable(min));
@@ -200,12 +227,18 @@
     }
   }
 
-  public static class CrawlDbStatReducer implements Reducer<Text, LongWritable, Text, LongWritable> {
-    public void configure(JobConf job) {}
-    public void close() {}
-    public void reduce(Text key, Iterator<LongWritable> values, OutputCollector<Text, LongWritable> output, Reporter reporter)
-            throws IOException {
+  public static class CrawlDbStatReducer implements
+      Reducer<Text, LongWritable, Text, LongWritable> {
+    public void configure(JobConf job) {
+    }
 
+    public void close() {
+    }
+
+    public void reduce(Text key, Iterator<LongWritable> values,
+        OutputCollector<Text, LongWritable> output, Reporter reporter)
+        throws IOException {
+
       String k = ((Text) key).toString();
       if (k.equals("T")) {
         // sum all values for this key
@@ -218,28 +251,30 @@
       } else if (k.startsWith("status") || k.startsWith("retry")) {
         LongWritable cnt = new LongWritable();
         while (values.hasNext()) {
-          LongWritable val = (LongWritable)values.next();
+          LongWritable val = (LongWritable) values.next();
           cnt.set(cnt.get() + val.get());
         }
         output.collect(key, cnt);
       } else if (k.equals("scx")) {
         LongWritable cnt = new LongWritable(Long.MIN_VALUE);
         while (values.hasNext()) {
-          LongWritable val = (LongWritable)values.next();
-          if (cnt.get() < val.get()) cnt.set(val.get());
+          LongWritable val = (LongWritable) values.next();
+          if (cnt.get() < val.get())
+            cnt.set(val.get());
         }
         output.collect(key, cnt);
       } else if (k.equals("scn")) {
         LongWritable cnt = new LongWritable(Long.MAX_VALUE);
         while (values.hasNext()) {
-          LongWritable val = (LongWritable)values.next();
-          if (cnt.get() > val.get()) cnt.set(val.get());
+          LongWritable val = (LongWritable) values.next();
+          if (cnt.get() > val.get())
+            cnt.set(val.get());
         }
         output.collect(key, cnt);
       } else if (k.equals("sct")) {
         LongWritable cnt = new LongWritable();
         while (values.hasNext()) {
-          LongWritable val = (LongWritable)values.next();
+          LongWritable val = (LongWritable) values.next();
           cnt.set(cnt.get() + val.get());
         }
         output.collect(key, cnt);
@@ -247,30 +282,39 @@
     }
   }
 
-  public static class CrawlDbTopNMapper implements Mapper<Text, CrawlDatum, FloatWritable, Text> {
+  public static class CrawlDbTopNMapper implements
+      Mapper<Text, CrawlDatum, FloatWritable, Text> {
     private static final FloatWritable fw = new FloatWritable();
     private float min = 0.0f;
-    
+
     public void configure(JobConf job) {
       long lmin = job.getLong("db.reader.topn.min", 0);
       if (lmin != 0) {
-        min = (float)lmin / 1000000.0f;
+        min = (float) lmin / 1000000.0f;
       }
     }
-    public void close() {}
-    public void map(Text key, CrawlDatum value, OutputCollector<FloatWritable, Text> output, Reporter reporter)
-            throws IOException {
-      if (value.getScore() < min) return; // don't collect low-scoring records
+
+    public void close() {
+    }
+
+    public void map(Text key, CrawlDatum value,
+        OutputCollector<FloatWritable, Text> output, Reporter reporter)
+        throws IOException {
+      if (value.getScore() < min)
+        return; // don't collect low-scoring records
       fw.set(-value.getScore()); // reverse sorting order
       output.collect(fw, key); // invert mapping: score -> url
     }
   }
-  
-  public static class CrawlDbTopNReducer implements Reducer<FloatWritable, Text, FloatWritable, Text> {
+
+  public static class CrawlDbTopNReducer implements
+      Reducer<FloatWritable, Text, FloatWritable, Text> {
     private long topN;
     private long count = 0L;
-    
-    public void reduce(FloatWritable key, Iterator<Text> values, OutputCollector<FloatWritable, Text> output, Reporter reporter) throws IOException {
+
+    public void reduce(FloatWritable key, Iterator<Text> values,
+        OutputCollector<FloatWritable, Text> output, Reporter reporter)
+        throws IOException {
       while (values.hasNext() && count < topN) {
         key.set(-key.get());
         output.collect(key, values.next());
@@ -281,20 +325,22 @@
     public void configure(JobConf job) {
       topN = job.getLong("db.reader.topn", 100) / job.getNumReduceTasks();
     }
-    
-    public void close() {}
+
+    public void close() {
+    }
   }
 
   public void close() {
     closeReaders();
   }
-  
-  public void processStatJob(String crawlDb, Configuration config, boolean sort) throws IOException {
 
+  public void processStatJob(String crawlDb, Configuration config, boolean sort)
+      throws IOException {
+
     if (LOG.isInfoEnabled()) {
       LOG.info("CrawlDb statistics start: " + crawlDb);
     }
-    
+
     Path tmpFolder = new Path(crawlDb, "stat_tmp" + System.currentTimeMillis());
 
     JobConf job = new NutchJob(config);
@@ -320,7 +366,8 @@
 
     // reading the result
     FileSystem fileSystem = FileSystem.get(config);
-    SequenceFile.Reader[] readers = SequenceFileOutputFormat.getReaders(config, tmpFolder);
+    SequenceFile.Reader[] readers = SequenceFileOutputFormat.getReaders(config,
+        tmpFolder);
 
     Text key = new Text();
     LongWritable value = new LongWritable();
@@ -333,21 +380,25 @@
         LongWritable val = stats.get(k);
         if (val == null) {
           val = new LongWritable();
-          if (k.equals("scx")) val.set(Long.MIN_VALUE);
-          if (k.equals("scn")) val.set(Long.MAX_VALUE);
+          if (k.equals("scx"))
+            val.set(Long.MIN_VALUE);
+          if (k.equals("scn"))
+            val.set(Long.MAX_VALUE);
           stats.put(k, val);
         }
         if (k.equals("scx")) {
-          if (val.get() < value.get()) val.set(value.get());
+          if (val.get() < value.get())
+            val.set(value.get());
         } else if (k.equals("scn")) {
-          if (val.get() > value.get()) val.set(value.get());          
+          if (val.get() > value.get())
+            val.set(value.get());
         } else {
           val.set(val.get() + value.get());
         }
       }
       reader.close();
     }
-    
+
     if (LOG.isInfoEnabled()) {
       LOG.info("Statistics for CrawlDb: " + crawlDb);
       LongWritable totalCnt = stats.get("T");
@@ -361,31 +412,40 @@
         } else if (k.equals("scx")) {
           LOG.info("max score:\t" + (float) (val.get() / 1000.0f));
         } else if (k.equals("sct")) {
-          LOG.info("avg score:\t" + (float) ((((double)val.get()) / totalCnt.get()) / 1000.0));
+          LOG.info("avg score:\t"
+              + (float) ((((double) val.get()) / totalCnt.get()) / 1000.0));
         } else if (k.startsWith("status")) {
           String[] st = k.split(" ");
           int code = Integer.parseInt(st[1]);
-          if(st.length >2 ) LOG.info("   " + st[2] +" :\t" + val);
-          else LOG.info(st[0] +" " +code + " (" + CrawlDatum.getStatusName((byte) code) + "):\t" + val);
-        } else LOG.info(k + ":\t" + val);
+          if (st.length > 2)
+            LOG.info("   " + st[2] + " :\t" + val);
+          else
+            LOG.info(st[0] + " " + code + " ("
+                + CrawlDatum.getStatusName((byte) code) + "):\t" + val);
+        } else
+          LOG.info(k + ":\t" + val);
       }
     }
     // removing the tmp folder
     fileSystem.delete(tmpFolder, true);
-    if (LOG.isInfoEnabled()) { LOG.info("CrawlDb statistics: done"); }
+    if (LOG.isInfoEnabled()) {
+      LOG.info("CrawlDb statistics: done");
+    }
 
   }
-  
-  public CrawlDatum get(String crawlDb, String url, Configuration config) throws IOException {
+
+  public CrawlDatum get(String crawlDb, String url, Configuration config)
+      throws IOException {
     Text key = new Text(url);
     CrawlDatum val = new CrawlDatum();
     openReaders(crawlDb, config);
-    CrawlDatum res = (CrawlDatum)MapFileOutputFormat.getEntry(readers,
+    CrawlDatum res = (CrawlDatum) MapFileOutputFormat.getEntry(readers,
         new HashPartitioner<Text, CrawlDatum>(), key, val);
     return res;
   }
 
-  public void readUrl(String crawlDb, String url, Configuration config) throws IOException {
+  public void readUrl(String crawlDb, String url, Configuration config)
+      throws IOException {
     CrawlDatum res = get(crawlDb, url, config);
     System.out.println("URL: " + url);
     if (res != null) {
@@ -394,14 +454,15 @@
       System.out.println("not found");
     }
   }
-  
-  public void processDumpJob(String crawlDb, String output, Configuration config, int format) throws IOException {
 
+  public void processDumpJob(String crawlDb, String output,
+      Configuration config, int format) throws IOException {
+
     if (LOG.isInfoEnabled()) {
       LOG.info("CrawlDb dump: starting");
       LOG.info("CrawlDb db: " + crawlDb);
     }
-    
+
     Path outFolder = new Path(output);
 
     JobConf job = new NutchJob(config);
@@ -411,27 +472,31 @@
     job.setInputFormat(SequenceFileInputFormat.class);
 
     FileOutputFormat.setOutputPath(job, outFolder);
-    if(format == CSV_FORMAT) job.setOutputFormat(CrawlDatumCsvOutputFormat.class);
-    else job.setOutputFormat(TextOutputFormat.class);
+    if (format == CSV_FORMAT)
+      job.setOutputFormat(CrawlDatumCsvOutputFormat.class);
+    else
+      job.setOutputFormat(TextOutputFormat.class);
     job.setOutputKeyClass(Text.class);
     job.setOutputValueClass(CrawlDatum.class);
 
     JobClient.runJob(job);
-    if (LOG.isInfoEnabled()) { LOG.info("CrawlDb dump: done"); }
+    if (LOG.isInfoEnabled()) {
+      LOG.info("CrawlDb dump: done");
+    }
   }
 
-  public void processTopNJob(String crawlDb, long topN, float min, String output, Configuration config) throws IOException {
-    
+  public void processTopNJob(String crawlDb, long topN, float min,
+      String output, Configuration config) throws IOException {
+
     if (LOG.isInfoEnabled()) {
       LOG.info("CrawlDb topN: starting (topN=" + topN + ", min=" + min + ")");
       LOG.info("CrawlDb db: " + crawlDb);
     }
-    
+
     Path outFolder = new Path(output);
-    Path tempDir =
-      new Path(config.get("mapred.temp.dir", ".") +
-               "/readdb-topN-temp-"+
-               Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
+    Path tempDir = new Path(config.get("mapred.temp.dir", ".")
+        + "/readdb-topN-temp-"
+        + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
 
     JobConf job = new NutchJob(config);
     job.setJobName("topN prepare " + crawlDb);
@@ -447,8 +512,8 @@
 
     // XXX hmmm, no setFloat() in the API ... :(
     job.setLong("db.reader.topn.min", Math.round(1000000.0 * min));
-    JobClient.runJob(job); 
-    
+    JobClient.runJob(job);
+
     if (LOG.isInfoEnabled()) {
       LOG.info("CrawlDb topN: collecting topN scores.");
     }
@@ -471,7 +536,9 @@
     JobClient.runJob(job);
     FileSystem fs = FileSystem.get(config);
     fs.delete(tempDir, true);
-    if (LOG.isInfoEnabled()) { LOG.info("CrawlDb topN: done"); }
+    if (LOG.isInfoEnabled()) {
+      LOG.info("CrawlDb topN: done");
+    }
 
   }
 
@@ -479,16 +546,24 @@
     CrawlDbReader dbr = new CrawlDbReader();
 
     if (args.length < 1) {
-      System.err.println("Usage: CrawlDbReader <crawldb> (-stats | -dump <out_dir> | -topN <nnnn> <out_dir> [<min>] | -url <url>)");
-      System.err.println("\t<crawldb>\tdirectory name where crawldb is located");
-      System.err.println("\t-stats [-sort] \tprint overall statistics to System.out");
+      System.err
+          .println("Usage: CrawlDbReader <crawldb> (-stats | -dump <out_dir> | -topN <nnnn> <out_dir> [<min>] | -url <url>)");
+      System.err
+          .println("\t<crawldb>\tdirectory name where crawldb is located");
+      System.err
+          .println("\t-stats [-sort] \tprint overall statistics to System.out");
       System.err.println("\t\t[-sort]\tlist status sorted by host");
-      System.err.println("\t-dump <out_dir> [-format normal|csv ]\tdump the whole db to a text file in <out_dir>");
+      System.err
+          .println("\t-dump <out_dir> [-format normal|csv ]\tdump the whole db to a text file in <out_dir>");
       System.err.println("\t\t[-format csv]\tdump in Csv format");
-      System.err.println("\t\t[-format normal]\tdump in standard format (default option)");
-      System.err.println("\t-url <url>\tprint information on <url> to System.out");
-      System.err.println("\t-topN <nnnn> <out_dir> [<min>]\tdump top <nnnn> urls sorted by score to <out_dir>");
-      System.err.println("\t\t[<min>]\tskip records with scores below this value.");
+      System.err
+          .println("\t\t[-format normal]\tdump in standard format (default option)");
+      System.err
+          .println("\t-url <url>\tprint information on <url> to System.out");
+      System.err
+          .println("\t-topN <nnnn> <out_dir> [<min>]\tdump top <nnnn> urls sorted by score to <out_dir>");
+      System.err
+          .println("\t\t[<min>]\tskip records with scores below this value.");
       System.err.println("\t\t\tThis can significantly improve performance.");
       return;
     }
@@ -498,7 +573,7 @@
     for (int i = 1; i < args.length; i++) {
       if (args[i].equals("-stats")) {
         boolean toSort = false;
-        if(i < args.length - 1 && "-sort".equals(args[i+1])){
+        if (i < args.length - 1 && "-sort".equals(args[i + 1])) {
           toSort = true;
           i++;
         }
@@ -506,9 +581,10 @@
       } else if (args[i].equals("-dump")) {
         param = args[++i];
         String format = "normal";
-        if(i < args.length - 1 &&  "-format".equals(args[i+1]))
-          format = args[i=i+2];
-        dbr.processDumpJob(crawlDb, param, conf, "csv".equals(format)? CSV_FORMAT : STD_FORMAT );
+        if (i < args.length - 1 && "-format".equals(args[i + 1]))
+          format = args[i = i + 2];
+        dbr.processDumpJob(crawlDb, param, conf,
+            "csv".equals(format) ? CSV_FORMAT : STD_FORMAT);
       } else if (args[i].equals("-url")) {
         param = args[++i];
         dbr.readUrl(crawlDb, param, conf);
Index: src/java/org/apache/nutch/crawl/LinkDb.java
===================================================================
--- src/java/org/apache/nutch/crawl/LinkDb.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/LinkDb.java	(working copy)
@@ -43,7 +43,8 @@
 import org.apache.nutch.util.TimingUtil;
 
 /** Maintains an inverted link map, listing incoming links for each url. */
-public class LinkDb extends Configured implements Tool, Mapper<Text, ParseData, Text, Inlinks> {
+public class LinkDb extends Configured implements Tool,
+    Mapper<Text, ParseData, Text, Inlinks> {
 
   public static final Logger LOG = LoggerFactory.getLogger(LinkDb.class);
 
@@ -54,13 +55,14 @@
   private boolean ignoreInternalLinks;
   private URLFilters urlFilters;
   private URLNormalizers urlNormalizers;
-  
-  public LinkDb() {}
-  
+
+  public LinkDb() {
+  }
+
   public LinkDb(Configuration conf) {
     setConf(conf);
   }
-  
+
   public void configure(JobConf job) {
     maxAnchorLength = job.getInt("db.max.anchor.length", 100);
     ignoreInternalLinks = job.getBoolean("db.ignore.internal.links", true);
@@ -72,16 +74,19 @@
     }
   }
 
-  public void close() {}
+  public void close() {
+  }
 
   public void map(Text key, ParseData parseData,
-                  OutputCollector<Text, Inlinks> output, Reporter reporter)
-    throws IOException {
+      OutputCollector<Text, Inlinks> output, Reporter reporter)
+      throws IOException {
     String fromUrl = key.toString();
     String fromHost = getHost(fromUrl);
     if (urlNormalizers != null) {
       try {
-        fromUrl = urlNormalizers.normalize(fromUrl, URLNormalizers.SCOPE_LINKDB); // normalize the url
+        fromUrl = urlNormalizers
+            .normalize(fromUrl, URLNormalizers.SCOPE_LINKDB); // normalize the
+                                                              // url
       } catch (Exception e) {
         LOG.warn("Skipping " + fromUrl + ":" + e);
         fromUrl = null;
@@ -95,7 +100,8 @@
         fromUrl = null;
       }
     }
-    if (fromUrl == null) return; // discard all outlinks
+    if (fromUrl == null)
+      return; // discard all outlinks
     Outlink[] outlinks = parseData.getOutlinks();
     Inlinks inlinks = new Inlinks();
     for (int i = 0; i < outlinks.length; i++) {
@@ -105,12 +111,14 @@
       if (ignoreInternalLinks) {
         String toHost = getHost(toUrl);
         if (toHost == null || toHost.equals(fromHost)) { // internal link
-          continue;                               // skip it
+          continue; // skip it
         }
       }
       if (urlNormalizers != null) {
         try {
-          toUrl = urlNormalizers.normalize(toUrl, URLNormalizers.SCOPE_LINKDB); // normalize the url
+          toUrl = urlNormalizers.normalize(toUrl, URLNormalizers.SCOPE_LINKDB); // normalize
+                                                                                // the
+                                                                                // url
         } catch (Exception e) {
           LOG.warn("Skipping " + toUrl + ":" + e);
           toUrl = null;
@@ -124,13 +132,14 @@
           toUrl = null;
         }
       }
-      if (toUrl == null) continue;
+      if (toUrl == null)
+        continue;
       inlinks.clear();
-      String anchor = outlink.getAnchor();        // truncate long anchors
+      String anchor = outlink.getAnchor(); // truncate long anchors
       if (anchor.length() > maxAnchorLength) {
         anchor = anchor.substring(0, maxAnchorLength);
       }
-      inlinks.add(new Inlink(fromUrl, anchor));   // collect inverted link
+      inlinks.add(new Inlink(fromUrl, anchor)); // collect inverted link
       output.collect(new Text(toUrl), inlinks);
     }
   }
@@ -143,13 +152,16 @@
     }
   }
 
-  public void invert(Path linkDb, final Path segmentsDir, boolean normalize, boolean filter, boolean force) throws IOException {
+  public void invert(Path linkDb, final Path segmentsDir, boolean normalize,
+      boolean filter, boolean force) throws IOException {
     final FileSystem fs = FileSystem.get(getConf());
-    FileStatus[] files = fs.listStatus(segmentsDir, HadoopFSUtil.getPassDirectoriesFilter(fs));
+    FileStatus[] files = fs.listStatus(segmentsDir,
+        HadoopFSUtil.getPassDirectoriesFilter(fs));
     invert(linkDb, HadoopFSUtil.getPaths(files), normalize, filter, force);
   }
 
-  public void invert(Path linkDb, Path[] segments, boolean normalize, boolean filter, boolean force) throws IOException {
+  public void invert(Path linkDb, Path[] segments, boolean normalize,
+      boolean filter, boolean force) throws IOException {
     Path lock = new Path(linkDb, LOCK_NAME);
     FileSystem fs = FileSystem.get(getConf());
     LockUtil.createLockFile(fs, lock, force);
@@ -169,7 +181,8 @@
       if (LOG.isInfoEnabled()) {
         LOG.info("LinkDb: adding segment: " + segments[i]);
       }
-      FileInputFormat.addInputPath(job, new Path(segments[i], ParseData.DIR_NAME));
+      FileInputFormat.addInputPath(job, new Path(segments[i],
+          ParseData.DIR_NAME));
     }
     try {
       JobClient.runJob(job);
@@ -198,13 +211,14 @@
     LinkDb.install(job, linkDb);
 
     long end = System.currentTimeMillis();
-    LOG.info("LinkDb: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    LOG.info("LinkDb: finished at " + sdf.format(end) + ", elapsed: "
+        + TimingUtil.elapsedTime(start, end));
   }
 
-  private static JobConf createJob(Configuration config, Path linkDb, boolean normalize, boolean filter) {
-    Path newLinkDb =
-      new Path("linkdb-" +
-               Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
+  private static JobConf createJob(Configuration config, Path linkDb,
+      boolean normalize, boolean filter) {
+    Path newLinkDb = new Path("linkdb-"
+        + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
 
     JobConf job = new NutchJob(config);
     job.setJobName("linkdb " + linkDb);
@@ -242,12 +256,14 @@
     Path old = new Path(linkDb, "old");
     Path current = new Path(linkDb, CURRENT_NAME);
     if (fs.exists(current)) {
-      if (fs.exists(old)) fs.delete(old, true);
+      if (fs.exists(old))
+        fs.delete(old, true);
       fs.rename(current, old);
     }
     fs.mkdirs(linkDb);
     fs.rename(newLinkDb, current);
-    if (fs.exists(old)) fs.delete(old, true);
+    if (fs.exists(old))
+      fs.delete(old, true);
     LockUtil.removeLockFile(fs, new Path(linkDb, LOCK_NAME));
   }
 
@@ -258,11 +274,14 @@
 
   public int run(String[] args) throws Exception {
     if (args.length < 2) {
-      System.err.println("Usage: LinkDb <linkdb> (-dir <segmentsDir> | <seg1> <seg2> ...) [-force] [-noNormalize] [-noFilter]");
+      System.err
+          .println("Usage: LinkDb <linkdb> (-dir <segmentsDir> | <seg1> <seg2> ...) [-force] [-noNormalize] [-noFilter]");
       System.err.println("\tlinkdb\toutput LinkDb to create or update");
-      System.err.println("\t-dir segmentsDir\tparent directory of several segments, OR");
+      System.err
+          .println("\t-dir segmentsDir\tparent directory of several segments, OR");
       System.err.println("\tseg1 seg2 ...\t list of segment directories");
-      System.err.println("\t-force\tforce update even if LinkDb appears to be locked (CAUTION advised)");
+      System.err
+          .println("\t-force\tforce update even if LinkDb appears to be locked (CAUTION advised)");
       System.err.println("\t-noNormalize\tdon't normalize link URLs");
       System.err.println("\t-noFilter\tdon't apply URLFilters to link URLs");
       return -1;
@@ -276,7 +295,8 @@
     boolean force = false;
     for (int i = 1; i < args.length; i++) {
       if (args[i].equals("-dir")) {
-        FileStatus[] paths = fs.listStatus(new Path(args[++i]), HadoopFSUtil.getPassDirectoriesFilter(fs));
+        FileStatus[] paths = fs.listStatus(new Path(args[++i]),
+            HadoopFSUtil.getPassDirectoriesFilter(fs));
         segs.addAll(Arrays.asList(HadoopFSUtil.getPaths(paths)));
       } else if (args[i].equalsIgnoreCase("-noNormalize")) {
         normalize = false;
@@ -284,7 +304,8 @@
         filter = false;
       } else if (args[i].equalsIgnoreCase("-force")) {
         force = true;
-      } else segs.add(new Path(args[i]));
+      } else
+        segs.add(new Path(args[i]));
     }
     try {
       invert(db, segs.toArray(new Path[segs.size()]), normalize, filter, force);
Index: src/java/org/apache/nutch/crawl/CrawlDatum.java
===================================================================
--- src/java/org/apache/nutch/crawl/CrawlDatum.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/CrawlDatum.java	(working copy)
@@ -41,51 +41,50 @@
   private static final byte OLD_STATUS_FETCH_SUCCESS = 5;
   private static final byte OLD_STATUS_FETCH_RETRY = 6;
   private static final byte OLD_STATUS_FETCH_GONE = 7;
-  
+
   private static HashMap<Byte, Byte> oldToNew = new HashMap<Byte, Byte>();
-  
+
   /** Page was not fetched yet. */
-  public static final byte STATUS_DB_UNFETCHED      = 0x01;
+  public static final byte STATUS_DB_UNFETCHED = 0x01;
   /** Page was successfully fetched. */
-  public static final byte STATUS_DB_FETCHED        = 0x02;
+  public static final byte STATUS_DB_FETCHED = 0x02;
   /** Page no longer exists. */
-  public static final byte STATUS_DB_GONE           = 0x03;
+  public static final byte STATUS_DB_GONE = 0x03;
   /** Page temporarily redirects to other page. */
-  public static final byte STATUS_DB_REDIR_TEMP     = 0x04;
+  public static final byte STATUS_DB_REDIR_TEMP = 0x04;
   /** Page permanently redirects to other page. */
-  public static final byte STATUS_DB_REDIR_PERM     = 0x05;
+  public static final byte STATUS_DB_REDIR_PERM = 0x05;
   /** Page was successfully fetched and found not modified. */
-  public static final byte STATUS_DB_NOTMODIFIED    = 0x06;
-  
+  public static final byte STATUS_DB_NOTMODIFIED = 0x06;
+
   /** Maximum value of DB-related status. */
-  public static final byte STATUS_DB_MAX            = 0x1f;
-  
+  public static final byte STATUS_DB_MAX = 0x1f;
+
   /** Fetching was successful. */
-  public static final byte STATUS_FETCH_SUCCESS     = 0x21;
+  public static final byte STATUS_FETCH_SUCCESS = 0x21;
   /** Fetching unsuccessful, needs to be retried (transient errors). */
-  public static final byte STATUS_FETCH_RETRY       = 0x22;
+  public static final byte STATUS_FETCH_RETRY = 0x22;
   /** Fetching temporarily redirected to other page. */
-  public static final byte STATUS_FETCH_REDIR_TEMP  = 0x23;
+  public static final byte STATUS_FETCH_REDIR_TEMP = 0x23;
   /** Fetching permanently redirected to other page. */
-  public static final byte STATUS_FETCH_REDIR_PERM  = 0x24;
+  public static final byte STATUS_FETCH_REDIR_PERM = 0x24;
   /** Fetching unsuccessful - page is gone. */
-  public static final byte STATUS_FETCH_GONE        = 0x25;
+  public static final byte STATUS_FETCH_GONE = 0x25;
   /** Fetching successful - page is not modified. */
   public static final byte STATUS_FETCH_NOTMODIFIED = 0x26;
-  
+
   /** Maximum value of fetch-related status. */
-  public static final byte STATUS_FETCH_MAX         = 0x3f;
-  
+  public static final byte STATUS_FETCH_MAX = 0x3f;
+
   /** Page signature. */
-  public static final byte STATUS_SIGNATURE         = 0x41;
+  public static final byte STATUS_SIGNATURE = 0x41;
   /** Page was newly injected. */
-  public static final byte STATUS_INJECTED          = 0x42;
+  public static final byte STATUS_INJECTED = 0x42;
   /** Page discovered through a link. */
-  public static final byte STATUS_LINKED            = 0x43;
+  public static final byte STATUS_LINKED = 0x43;
   /** Page got metadata from a parser */
-  public static final byte STATUS_PARSE_META        = 0x44;
-  
-  
+  public static final byte STATUS_PARSE_META = 0x44;
+
   public static final HashMap<Byte, String> statNames = new HashMap<Byte, String>();
   static {
     statNames.put(STATUS_DB_UNFETCHED, "db_unfetched");
@@ -104,7 +103,7 @@
     statNames.put(STATUS_FETCH_GONE, "fetch_gone");
     statNames.put(STATUS_FETCH_NOTMODIFIED, "fetch_notmodified");
     statNames.put(STATUS_PARSE_META, "parse_metadata");
-    
+
     oldToNew.put(OLD_STATUS_DB_UNFETCHED, STATUS_DB_UNFETCHED);
     oldToNew.put(OLD_STATUS_DB_FETCHED, STATUS_DB_FETCHED);
     oldToNew.put(OLD_STATUS_DB_GONE, STATUS_DB_GONE);
@@ -123,22 +122,25 @@
   private byte[] signature = null;
   private long modifiedTime;
   private org.apache.hadoop.io.MapWritable metaData;
-  
+
   public static boolean hasDbStatus(CrawlDatum datum) {
-    if (datum.status <= STATUS_DB_MAX) return true;
+    if (datum.status <= STATUS_DB_MAX)
+      return true;
     return false;
   }
 
   public static boolean hasFetchStatus(CrawlDatum datum) {
-    if (datum.status > STATUS_DB_MAX && datum.status <= STATUS_FETCH_MAX) return true;
+    if (datum.status > STATUS_DB_MAX && datum.status <= STATUS_FETCH_MAX)
+      return true;
     return false;
   }
 
-  public CrawlDatum() { }
+  public CrawlDatum() {
+  }
 
   public CrawlDatum(int status, int fetchInterval) {
     this();
-    this.status = (byte)status;
+    this.status = (byte) status;
     this.fetchInterval = fetchInterval;
   }
 
@@ -151,26 +153,36 @@
   // accessor methods
   //
 
-  public byte getStatus() { return status; }
-  
+  public byte getStatus() {
+    return status;
+  }
+
   public static String getStatusName(byte value) {
     String res = statNames.get(value);
-    if (res == null) res = "unknown";
+    if (res == null)
+      res = "unknown";
     return res;
   }
-  
-  public void setStatus(int status) { this.status = (byte)status; }
 
+  public void setStatus(int status) {
+    this.status = (byte) status;
+  }
+
   /**
    * Returns either the time of the last fetch, or the next fetch time,
    * depending on whether Fetcher or CrawlDbReducer set the time.
    */
-  public long getFetchTime() { return fetchTime; }
+  public long getFetchTime() {
+    return fetchTime;
+  }
+
   /**
-   * Sets either the time of the last fetch or the next fetch time,
-   * depending on whether Fetcher or CrawlDbReducer set the time.
+   * Sets either the time of the last fetch or the next fetch time, depending on
+   * whether Fetcher or CrawlDbReducer set the time.
    */
-  public void setFetchTime(long fetchTime) { this.fetchTime = fetchTime; }
+  public void setFetchTime(long fetchTime) {
+    this.fetchTime = fetchTime;
+  }
 
   public long getModifiedTime() {
     return modifiedTime;
@@ -179,54 +191,72 @@
   public void setModifiedTime(long modifiedTime) {
     this.modifiedTime = modifiedTime;
   }
-  
-  public byte getRetriesSinceFetch() { return retries; }
-  public void setRetriesSinceFetch(int retries) {this.retries = (byte)retries;}
 
-  public int getFetchInterval() { return fetchInterval; }
+  public byte getRetriesSinceFetch() {
+    return retries;
+  }
+
+  public void setRetriesSinceFetch(int retries) {
+    this.retries = (byte) retries;
+  }
+
+  public int getFetchInterval() {
+    return fetchInterval;
+  }
+
   public void setFetchInterval(int fetchInterval) {
     this.fetchInterval = fetchInterval;
   }
+
   public void setFetchInterval(float fetchInterval) {
     this.fetchInterval = Math.round(fetchInterval);
   }
 
-  public float getScore() { return score; }
-  public void setScore(float score) { this.score = score; }
+  public float getScore() {
+    return score;
+  }
 
+  public void setScore(float score) {
+    this.score = score;
+  }
+
   public byte[] getSignature() {
     return signature;
   }
 
   public void setSignature(byte[] signature) {
     if (signature != null && signature.length > 256)
-      throw new RuntimeException("Max signature length (256) exceeded: " + signature.length);
+      throw new RuntimeException("Max signature length (256) exceeded: "
+          + signature.length);
     this.signature = signature;
   }
-  
-   public void setMetaData(org.apache.hadoop.io.MapWritable mapWritable) {
-     this.metaData = new org.apache.hadoop.io.MapWritable(mapWritable);
-   }
-   
-   /** Add all metadata from other CrawlDatum to this CrawlDatum.
-    * 
-    * @param other CrawlDatum
-    */
-   public void putAllMetaData(CrawlDatum other) {
-     for (Entry<Writable, Writable> e : other.getMetaData().entrySet()) {
-       getMetaData().put(e.getKey(), e.getValue());
-     }
-   }
 
+  public void setMetaData(org.apache.hadoop.io.MapWritable mapWritable) {
+    this.metaData = new org.apache.hadoop.io.MapWritable(mapWritable);
+  }
+
   /**
-   * returns a MapWritable if it was set or read in @see readFields(DataInput), 
-   * returns empty map in case CrawlDatum was freshly created (lazily instantiated).
+   * Add all metadata from other CrawlDatum to this CrawlDatum.
+   * 
+   * @param other
+   *          CrawlDatum
    */
+  public void putAllMetaData(CrawlDatum other) {
+    for (Entry<Writable, Writable> e : other.getMetaData().entrySet()) {
+      getMetaData().put(e.getKey(), e.getValue());
+    }
+  }
+
+  /**
+   * returns a MapWritable if it was set or read in @see readFields(DataInput),
+   * returns empty map in case CrawlDatum was freshly created (lazily
+   * instantiated).
+   */
   public org.apache.hadoop.io.MapWritable getMetaData() {
-    if (this.metaData == null) this.metaData = new org.apache.hadoop.io.MapWritable();
+    if (this.metaData == null)
+      this.metaData = new org.apache.hadoop.io.MapWritable();
     return this.metaData;
   }
-  
 
   //
   // writable methods
@@ -239,8 +269,8 @@
   }
 
   public void readFields(DataInput in) throws IOException {
-    byte version = in.readByte();                 // read version
-    if (version > CUR_VERSION)                   // check version
+    byte version = in.readByte(); // read version
+    if (version > CUR_VERSION) // check version
       throw new VersionMismatchException(CUR_VERSION, version);
 
     status = in.readByte();
@@ -248,7 +278,8 @@
     retries = in.readByte();
     if (version > 5) {
       fetchInterval = in.readInt();
-    } else fetchInterval = Math.round(in.readFloat());
+    } else
+      fetchInterval = Math.round(in.readFloat());
     score = in.readFloat();
     if (version > 2) {
       modifiedTime = in.readLong();
@@ -256,9 +287,10 @@
       if (cnt > 0) {
         signature = new byte[cnt];
         in.readFully(signature);
-      } else signature = null;
+      } else
+        signature = null;
     }
-    
+
     if (version > 3) {
       boolean hasMetadata = false;
       if (version < 7) {
@@ -278,7 +310,8 @@
           metaData.readFields(in);
         }
       }
-      if (hasMetadata==false) metaData = null;
+      if (hasMetadata == false)
+        metaData = null;
     }
     // translate status codes
     if (version < 5) {
@@ -286,7 +319,7 @@
         status = oldToNew.get(status);
       else
         status = STATUS_DB_UNFETCHED;
-      
+
     }
   }
 
@@ -295,7 +328,7 @@
   private static final int SIG_OFFSET = SCORE_OFFSET + 4 + 8;
 
   public void write(DataOutput out) throws IOException {
-    out.writeByte(CUR_VERSION);                   // store current version
+    out.writeByte(CUR_VERSION); // store current version
     out.writeByte(status);
     out.writeLong(fetchTime);
     out.writeByte(retries);
@@ -326,17 +359,19 @@
     this.modifiedTime = that.modifiedTime;
     this.signature = that.signature;
     if (that.metaData != null) {
-      this.metaData = new org.apache.hadoop.io.MapWritable(that.metaData); // make a deep copy
+      this.metaData = new org.apache.hadoop.io.MapWritable(that.metaData); // make
+                                                                           // a
+                                                                           // deep
+                                                                           // copy
     } else {
       this.metaData = null;
     }
   }
 
-
   //
   // compare methods
   //
-  
+
   /** Sort by decreasing score. */
   public int compareTo(CrawlDatum that) {
     if (that.score != this.score)
@@ -354,47 +389,49 @@
     return SignatureComparator._compare(this, that);
   }
 
-  /** A Comparator optimized for CrawlDatum. */ 
+  /** A Comparator optimized for CrawlDatum. */
   public static class Comparator extends WritableComparator {
-    public Comparator() { super(CrawlDatum.class); }
+    public Comparator() {
+      super(CrawlDatum.class);
+    }
 
     public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
-      float score1 = readFloat(b1,s1+SCORE_OFFSET);
-      float score2 = readFloat(b2,s2+SCORE_OFFSET);
+      float score1 = readFloat(b1, s1 + SCORE_OFFSET);
+      float score2 = readFloat(b2, s2 + SCORE_OFFSET);
       if (score2 != score1) {
         return (score2 - score1) > 0 ? 1 : -1;
       }
-      int status1 = b1[s1+1];
-      int status2 = b2[s2+1];
+      int status1 = b1[s1 + 1];
+      int status2 = b2[s2 + 1];
       if (status2 != status1)
         return status1 - status2;
-      long fetchTime1 = readLong(b1, s1+1+1);
-      long fetchTime2 = readLong(b2, s2+1+1);
+      long fetchTime1 = readLong(b1, s1 + 1 + 1);
+      long fetchTime2 = readLong(b2, s2 + 1 + 1);
       if (fetchTime2 != fetchTime1)
         return (fetchTime2 - fetchTime1) > 0 ? 1 : -1;
-      int retries1 = b1[s1+1+1+8];
-      int retries2 = b2[s2+1+1+8];
+      int retries1 = b1[s1 + 1 + 1 + 8];
+      int retries2 = b2[s2 + 1 + 1 + 8];
       if (retries2 != retries1)
         return retries2 - retries1;
-      int fetchInterval1 = readInt(b1, s1+1+1+8+1);
-      int fetchInterval2 = readInt(b2, s2+1+1+8+1);
+      int fetchInterval1 = readInt(b1, s1 + 1 + 1 + 8 + 1);
+      int fetchInterval2 = readInt(b2, s2 + 1 + 1 + 8 + 1);
       if (fetchInterval2 != fetchInterval1)
         return (fetchInterval2 - fetchInterval1) > 0 ? 1 : -1;
       long modifiedTime1 = readLong(b1, s1 + SCORE_OFFSET + 4);
       long modifiedTime2 = readLong(b2, s2 + SCORE_OFFSET + 4);
       if (modifiedTime2 != modifiedTime1)
         return (modifiedTime2 - modifiedTime1) > 0 ? 1 : -1;
-      int sigl1 = b1[s1+SIG_OFFSET];
-      int sigl2 = b2[s2+SIG_OFFSET];
-      return SignatureComparator._compare(b1, SIG_OFFSET, sigl1, b2, SIG_OFFSET, sigl2);
+      int sigl1 = b1[s1 + SIG_OFFSET];
+      int sigl2 = b2[s2 + SIG_OFFSET];
+      return SignatureComparator._compare(b1, SIG_OFFSET, sigl1, b2,
+          SIG_OFFSET, sigl2);
     }
   }
 
-  static {                                        // register this comparator
+  static { // register this comparator
     WritableComparator.define(CrawlDatum.class, new Comparator());
   }
 
-
   //
   // basic methods
   //
@@ -402,12 +439,13 @@
   public String toString() {
     StringBuilder buf = new StringBuilder();
     buf.append("Version: " + CUR_VERSION + "\n");
-    buf.append("Status: " + getStatus() + " (" + getStatusName(getStatus()) + ")\n");
+    buf.append("Status: " + getStatus() + " (" + getStatusName(getStatus())
+        + ")\n");
     buf.append("Fetch time: " + new Date(getFetchTime()) + "\n");
     buf.append("Modified time: " + new Date(getModifiedTime()) + "\n");
     buf.append("Retries since fetch: " + getRetriesSinceFetch() + "\n");
-    buf.append("Retry interval: " + getFetchInterval() + " seconds (" +
-        (getFetchInterval() / FetchSchedule.SECONDS_PER_DAY) + " days)\n");
+    buf.append("Retry interval: " + getFetchInterval() + " seconds ("
+        + (getFetchInterval() / FetchSchedule.SECONDS_PER_DAY) + " days)\n");
     buf.append("Score: " + getScore() + "\n");
     buf.append("Signature: " + StringUtil.toHexString(getSignature()) + "\n");
     buf.append("Metadata: ");
@@ -421,35 +459,35 @@
     buf.append('\n');
     return buf.toString();
   }
-  
+
   private boolean metadataEquals(org.apache.hadoop.io.MapWritable otherMetaData) {
-    if (metaData==null || metaData.size() ==0) {
+    if (metaData == null || metaData.size() == 0) {
       return otherMetaData == null || otherMetaData.size() == 0;
     }
     if (otherMetaData == null) {
       // we already know that the current object is not null or empty
       return false;
     }
-    HashSet<Entry<Writable, Writable>> set1 =
-      new HashSet<Entry<Writable,Writable>>(metaData.entrySet());
-    HashSet<Entry<Writable, Writable>> set2 =
-      new HashSet<Entry<Writable,Writable>>(otherMetaData.entrySet());
+    HashSet<Entry<Writable, Writable>> set1 = new HashSet<Entry<Writable, Writable>>(
+        metaData.entrySet());
+    HashSet<Entry<Writable, Writable>> set2 = new HashSet<Entry<Writable, Writable>>(
+        otherMetaData.entrySet());
     return set1.equals(set2);
   }
 
   public boolean equals(Object o) {
     if (!(o instanceof CrawlDatum))
       return false;
-    CrawlDatum other = (CrawlDatum)o;
-    boolean res =
-      (this.status == other.status) &&
-      (this.fetchTime == other.fetchTime) &&
-      (this.modifiedTime == other.modifiedTime) &&
-      (this.retries == other.retries) &&
-      (this.fetchInterval == other.fetchInterval) &&
-      (SignatureComparator._compare(this.signature, other.signature) == 0) &&
-      (this.score == other.score);
-    if (!res) return res;
+    CrawlDatum other = (CrawlDatum) o;
+    boolean res = (this.status == other.status)
+        && (this.fetchTime == other.fetchTime)
+        && (this.modifiedTime == other.modifiedTime)
+        && (this.retries == other.retries)
+        && (this.fetchInterval == other.fetchInterval)
+        && (SignatureComparator._compare(this.signature, other.signature) == 0)
+        && (this.score == other.score);
+    if (!res)
+      return res;
     return metadataEquals(other.metaData);
   }
 
@@ -457,20 +495,14 @@
     int res = 0;
     if (signature != null) {
       for (int i = 0; i < signature.length / 4; i += 4) {
-        res ^= (int)(signature[i] << 24 + signature[i+1] << 16 +
-                signature[i+2] << 8 + signature[i+3]);
+        res ^= (int) (signature[i] << 24 + signature[i + 1] << 16 + signature[i + 2] << 8 + signature[i + 3]);
       }
     }
     if (metaData != null) {
       res ^= metaData.entrySet().hashCode();
     }
-    return
-      res ^ status ^
-      ((int)fetchTime) ^
-      ((int)modifiedTime) ^
-      retries ^
-      fetchInterval ^
-      Float.floatToIntBits(score);
+    return res ^ status ^ ((int) fetchTime) ^ ((int) modifiedTime) ^ retries
+        ^ fetchInterval ^ Float.floatToIntBits(score);
   }
 
   public Object clone() {
Index: src/java/org/apache/nutch/crawl/LinkDbMerger.java
===================================================================
--- src/java/org/apache/nutch/crawl/LinkDbMerger.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/LinkDbMerger.java	(working copy)
@@ -46,37 +46,44 @@
 import org.apache.nutch.util.TimingUtil;
 
 /**
- * This tool merges several LinkDb-s into one, optionally filtering
- * URLs through the current URLFilters, to skip prohibited URLs and
- * links.
+ * This tool merges several LinkDb-s into one, optionally filtering URLs through
+ * the current URLFilters, to skip prohibited URLs and links.
  * 
- * <p>It's possible to use this tool just for filtering - in that case
- * only one LinkDb should be specified in arguments.</p>
- * <p>If more than one LinkDb contains information about the same URL,
- * all inlinks are accumulated, but only at most <code>db.max.inlinks</code>
- * inlinks will ever be added.</p>
- * <p>If activated, URLFilters will be applied to both the target URLs and
- * to any incoming link URL. If a target URL is prohibited, all
- * inlinks to that target will be removed, including the target URL. If
- * some of incoming links are prohibited, only they will be removed, and they
- * won't count when checking the above-mentioned maximum limit.
+ * <p>
+ * It's possible to use this tool just for filtering - in that case only one
+ * LinkDb should be specified in arguments.
+ * </p>
+ * <p>
+ * If more than one LinkDb contains information about the same URL, all inlinks
+ * are accumulated, but only at most <code>db.max.inlinks</code> inlinks will
+ * ever be added.
+ * </p>
+ * <p>
+ * If activated, URLFilters will be applied to both the target URLs and to any
+ * incoming link URL. If a target URL is prohibited, all inlinks to that target
+ * will be removed, including the target URL. If some of incoming links are
+ * prohibited, only they will be removed, and they won't count when checking the
+ * above-mentioned maximum limit.
  * 
  * @author Andrzej Bialecki
  */
-public class LinkDbMerger extends Configured implements Tool, Reducer<Text, Inlinks, Text, Inlinks> {
+public class LinkDbMerger extends Configured implements Tool,
+    Reducer<Text, Inlinks, Text, Inlinks> {
   private static final Logger LOG = LoggerFactory.getLogger(LinkDbMerger.class);
-  
+
   private int maxInlinks;
-  
+
   public LinkDbMerger() {
-    
+
   }
-  
+
   public LinkDbMerger(Configuration conf) {
     setConf(conf);
   }
 
-  public void reduce(Text key, Iterator<Inlinks> values, OutputCollector<Text, Inlinks> output, Reporter reporter) throws IOException {
+  public void reduce(Text key, Iterator<Inlinks> values,
+      OutputCollector<Text, Inlinks> output, Reporter reporter)
+      throws IOException {
 
     Inlinks result = new Inlinks();
 
@@ -86,43 +93,48 @@
       int end = Math.min(maxInlinks - result.size(), inlinks.size());
       Iterator<Inlink> it = inlinks.iterator();
       int i = 0;
-      while(it.hasNext() && i++ < end) {
+      while (it.hasNext() && i++ < end) {
         result.add(it.next());
       }
     }
-    if (result.size() == 0) return;
+    if (result.size() == 0)
+      return;
     output.collect(key, result);
-    
+
   }
 
   public void configure(JobConf job) {
     maxInlinks = job.getInt("db.max.inlinks", 10000);
   }
 
-  public void close() throws IOException { }
+  public void close() throws IOException {
+  }
 
-  public void merge(Path output, Path[] dbs, boolean normalize, boolean filter) throws Exception {
+  public void merge(Path output, Path[] dbs, boolean normalize, boolean filter)
+      throws Exception {
     SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
     long start = System.currentTimeMillis();
     LOG.info("LinkDb merge: starting at " + sdf.format(start));
 
     JobConf job = createMergeJob(getConf(), output, normalize, filter);
     for (int i = 0; i < dbs.length; i++) {
-      FileInputFormat.addInputPath(job, new Path(dbs[i], LinkDb.CURRENT_NAME));      
+      FileInputFormat.addInputPath(job, new Path(dbs[i], LinkDb.CURRENT_NAME));
     }
     JobClient.runJob(job);
     FileSystem fs = FileSystem.get(getConf());
     fs.mkdirs(output);
-    fs.rename(FileOutputFormat.getOutputPath(job), new Path(output, LinkDb.CURRENT_NAME));
+    fs.rename(FileOutputFormat.getOutputPath(job), new Path(output,
+        LinkDb.CURRENT_NAME));
 
     long end = System.currentTimeMillis();
-    LOG.info("LinkDb merge: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    LOG.info("LinkDb merge: finished at " + sdf.format(end) + ", elapsed: "
+        + TimingUtil.elapsedTime(start, end));
   }
 
-  public static JobConf createMergeJob(Configuration config, Path linkDb, boolean normalize, boolean filter) {
-    Path newLinkDb =
-      new Path("linkdb-merge-" + 
-               Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
+  public static JobConf createMergeJob(Configuration config, Path linkDb,
+      boolean normalize, boolean filter) {
+    Path newLinkDb = new Path("linkdb-merge-"
+        + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
 
     JobConf job = new NutchJob(config);
     job.setJobName("linkdb merge " + linkDb);
@@ -145,22 +157,27 @@
 
     return job;
   }
-  
+
   /**
    * @param args
    */
   public static void main(String[] args) throws Exception {
-    int res = ToolRunner.run(NutchConfiguration.create(), new LinkDbMerger(), args);
+    int res = ToolRunner.run(NutchConfiguration.create(), new LinkDbMerger(),
+        args);
     System.exit(res);
   }
-  
+
   public int run(String[] args) throws Exception {
     if (args.length < 2) {
-      System.err.println("Usage: LinkDbMerger <output_linkdb> <linkdb1> [<linkdb2> <linkdb3> ...] [-normalize] [-filter]");
+      System.err
+          .println("Usage: LinkDbMerger <output_linkdb> <linkdb1> [<linkdb2> <linkdb3> ...] [-normalize] [-filter]");
       System.err.println("\toutput_linkdb\toutput LinkDb");
-      System.err.println("\tlinkdb1 ...\tinput LinkDb-s (single input LinkDb is ok)");
-      System.err.println("\t-normalize\tuse URLNormalizer on both fromUrls and toUrls in linkdb(s) (usually not needed)");
-      System.err.println("\t-filter\tuse URLFilters on both fromUrls and toUrls in linkdb(s)");
+      System.err
+          .println("\tlinkdb1 ...\tinput LinkDb-s (single input LinkDb is ok)");
+      System.err
+          .println("\t-normalize\tuse URLNormalizer on both fromUrls and toUrls in linkdb(s) (usually not needed)");
+      System.err
+          .println("\t-filter\tuse URLFilters on both fromUrls and toUrls in linkdb(s)");
       return -1;
     }
     Path output = new Path(args[0]);
@@ -172,7 +189,8 @@
         filter = true;
       } else if (args[i].equals("-normalize")) {
         normalize = true;
-      } else dbs.add(new Path(args[i]));
+      } else
+        dbs.add(new Path(args[i]));
     }
     try {
       merge(output, dbs.toArray(new Path[dbs.size()]), normalize, filter);
Index: src/java/org/apache/nutch/crawl/FetchSchedule.java
===================================================================
--- src/java/org/apache/nutch/crawl/FetchSchedule.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/FetchSchedule.java	(working copy)
@@ -21,134 +21,184 @@
 import org.apache.hadoop.io.Text;
 
 /**
- * This interface defines the contract for implementations that manipulate
- * fetch times and re-fetch intervals.
+ * This interface defines the contract for implementations that manipulate fetch
+ * times and re-fetch intervals.
  * 
  * @author Andrzej Bialecki
  */
 public interface FetchSchedule extends Configurable {
-  
+
   /** It is unknown whether page was changed since our last visit. */
-  public static final int STATUS_UNKNOWN       = 0;
+  public static final int STATUS_UNKNOWN = 0;
   /** Page is known to have been modified since our last visit. */
-  public static final int STATUS_MODIFIED      = 1;
+  public static final int STATUS_MODIFIED = 1;
   /** Page is known to remain unmodified since our last visit. */
-  public static final int STATUS_NOTMODIFIED    = 2;
-  
+  public static final int STATUS_NOTMODIFIED = 2;
+
   public static final int SECONDS_PER_DAY = 3600 * 24;
+
   /**
-   * Initialize fetch schedule related data. Implementations should at least
-   * set the <code>fetchTime</code> and <code>fetchInterval</code>. The default
-   * implementation set the <code>fetchTime</code> to now, using the
-   * default <code>fetchInterval</code>.
+   * Initialize fetch schedule related data. Implementations should at least set
+   * the <code>fetchTime</code> and <code>fetchInterval</code>. The default
+   * implementation set the <code>fetchTime</code> to now, using the default
+   * <code>fetchInterval</code>.
    * 
-   * @param url URL of the page.
-   * @param datum datum instance to be initialized.
+   * @param url
+   *          URL of the page.
+   * @param datum
+   *          datum instance to be initialized.
    * @return adjusted page information, including all original information.
-   * NOTE: this may be a different instance than {@param datum}, but
-   * implementations should make sure that it contains at least all
-   * information from {@param datum}.
+   *         NOTE: this may be a different instance than
+   * @param datum
+   *          , but implementations should make sure that it contains at least
+   *          all information from
+   * @param datum
+   *          .
    */
   public CrawlDatum initializeSchedule(Text url, CrawlDatum datum);
-  
+
   /**
    * Sets the <code>fetchInterval</code> and <code>fetchTime</code> on a
-   * successfully fetched page.
-   * Implementations may use supplied arguments to support different re-fetching
-   * schedules.
+   * successfully fetched page. Implementations may use supplied arguments to
+   * support different re-fetching schedules.
    * 
-   * @param url url of the page
-   * @param datum page description to be adjusted. NOTE: this instance, passed by reference,
-   * may be modified inside the method.
-   * @param prevFetchTime previous value of fetch time, or 0 if not available
-   * @param prevModifiedTime previous value of modifiedTime, or 0 if not available
-   * @param fetchTime the latest time, when the page was recently re-fetched. Most FetchSchedule
-   * implementations should update the value in {@param datum} to something greater than this value.
-   * @param modifiedTime last time the content was modified. This information comes from
-   * the protocol implementations, or is set to < 0 if not available. Most FetchSchedule
-   * implementations should update the value in {@param datum} to this value.
-   * @param state if {@link #STATUS_MODIFIED}, then the content is considered to be "changed" before the
-   * <code>fetchTime</code>, if {@link #STATUS_NOTMODIFIED} then the content is known to be unchanged.
-   * This information may be obtained by comparing page signatures before and after fetching. If this
-   * is set to {@link #STATUS_UNKNOWN}, then it is unknown whether the page was changed; implementations
-   * are free to follow a sensible default behavior.
-   * @return adjusted page information, including all original information. NOTE: this may
-   * be a different instance than {@param datum}, but implementations should make sure that
-   * it contains at least all information from {@param datum}.
+   * @param url
+   *          url of the page
+   * @param datum
+   *          page description to be adjusted. NOTE: this instance, passed by
+   *          reference, may be modified inside the method.
+   * @param prevFetchTime
+   *          previous value of fetch time, or 0 if not available
+   * @param prevModifiedTime
+   *          previous value of modifiedTime, or 0 if not available
+   * @param fetchTime
+   *          the latest time, when the page was recently re-fetched. Most
+   *          FetchSchedule implementations should update the value in
+   * @param datum
+   *          to something greater than this value.
+   * @param modifiedTime
+   *          last time the content was modified. This information comes from
+   *          the protocol implementations, or is set to < 0 if not available.
+   *          Most FetchSchedule implementations should update the value in
+   * @param datum
+   *          to this value.
+   * @param state
+   *          if {@link #STATUS_MODIFIED}, then the content is considered to be
+   *          "changed" before the <code>fetchTime</code>, if
+   *          {@link #STATUS_NOTMODIFIED} then the content is known to be
+   *          unchanged. This information may be obtained by comparing page
+   *          signatures before and after fetching. If this is set to
+   *          {@link #STATUS_UNKNOWN}, then it is unknown whether the page was
+   *          changed; implementations are free to follow a sensible default
+   *          behavior.
+   * @return adjusted page information, including all original information.
+   *         NOTE: this may be a different instance than
+   * @param datum
+   *          , but implementations should make sure that it contains at least
+   *          all information from
+   * @param datum
+   *          .
    */
   public CrawlDatum setFetchSchedule(Text url, CrawlDatum datum,
-          long prevFetchTime, long prevModifiedTime,
-          long fetchTime, long modifiedTime, int state);
-  
+      long prevFetchTime, long prevModifiedTime, long fetchTime,
+      long modifiedTime, int state);
+
   /**
-   * This method specifies how to schedule refetching of pages
-   * marked as GONE. Default implementation increases fetchInterval by 50%,
-   * and if it exceeds the <code>maxInterval</code> it calls
+   * This method specifies how to schedule refetching of pages marked as GONE.
+   * Default implementation increases fetchInterval by 50%, and if it exceeds
+   * the <code>maxInterval</code> it calls
    * {@link #forceRefetch(Text, CrawlDatum, boolean)}.
-   * @param url URL of the page
-   * @param datum datum instance to be adjusted
+   * 
+   * @param url
+   *          URL of the page
+   * @param datum
+   *          datum instance to be adjusted
    * @return adjusted page information, including all original information.
-   * NOTE: this may be a different instance than {@param datum}, but
-   * implementations should make sure that it contains at least all
-   * information from {@param datum}.
+   *         NOTE: this may be a different instance than
+   * @param datum
+   *          , but implementations should make sure that it contains at least
+   *          all information from
+   * @param datum
+   *          .
    */
   public CrawlDatum setPageGoneSchedule(Text url, CrawlDatum datum,
-          long prevFetchTime, long prevModifiedTime, long fetchTime);
-  
+      long prevFetchTime, long prevModifiedTime, long fetchTime);
+
   /**
-   * This method adjusts the fetch schedule if fetching needs to be
-   * re-tried due to transient errors. The default implementation
-   * sets the next fetch time 1 day in the future and increases the
-   * retry counter.
-   * @param url URL of the page
-   * @param datum page information
-   * @param prevFetchTime previous fetch time
-   * @param prevModifiedTime previous modified time
-   * @param fetchTime current fetch time
+   * This method adjusts the fetch schedule if fetching needs to be re-tried due
+   * to transient errors. The default implementation sets the next fetch time 1
+   * day in the future and increases the retry counter.
+   * 
+   * @param url
+   *          URL of the page
+   * @param datum
+   *          page information
+   * @param prevFetchTime
+   *          previous fetch time
+   * @param prevModifiedTime
+   *          previous modified time
+   * @param fetchTime
+   *          current fetch time
    * @return adjusted page information, including all original information.
-   * NOTE: this may be a different instance than {@param datum}, but
-   * implementations should make sure that it contains at least all
-   * information from {@param datum}.
+   *         NOTE: this may be a different instance than
+   * @param datum
+   *          , but implementations should make sure that it contains at least
+   *          all information from
+   * @param datum
+   *          .
    */
   public CrawlDatum setPageRetrySchedule(Text url, CrawlDatum datum,
-          long prevFetchTime, long prevModifiedTime, long fetchTime);
-  
+      long prevFetchTime, long prevModifiedTime, long fetchTime);
+
   /**
    * Calculates last fetch time of the given CrawlDatum.
+   * 
    * @return the date as a long.
    */
   public long calculateLastFetchTime(CrawlDatum datum);
 
   /**
-   * This method provides information whether the page is suitable for
-   * selection in the current fetchlist. NOTE: a true return value does not
-   * guarantee that the page will be fetched, it just allows it to be
-   * included in the further selection process based on scores. The default
-   * implementation checks <code>fetchTime</code>, if it is higher than the
-   * {@param curTime} it returns false, and true otherwise. It will also
-   * check that fetchTime is not too remote (more than <code>maxInterval</code),
-   * in which case it lowers the interval and returns true.
-   * @param url URL of the page
-   * @param datum datum instance
-   * @param curTime reference time (usually set to the time when the
-   * fetchlist generation process was started).
+   * This method provides information whether the page is suitable for selection
+   * in the current fetchlist. NOTE: a true return value does not guarantee that
+   * the page will be fetched, it just allows it to be included in the further
+   * selection process based on scores. The default implementation checks
+   * <code>fetchTime</code>, if it is higher than the
+   * 
+   * @param curTime
+   *          it returns false, and true otherwise. It will also check that
+   *          fetchTime is not too remote (more than <code>maxInterval</code ),
+   *          in which case it lowers the interval and returns true.
+   * @param url
+   *          URL of the page
+   * @param datum
+   *          datum instance
+   * @param curTime
+   *          reference time (usually set to the time when the fetchlist
+   *          generation process was started).
    * @return true, if the page should be considered for inclusion in the current
-   * fetchlist, otherwise false.
+   *         fetchlist, otherwise false.
    */
   public boolean shouldFetch(Text url, CrawlDatum datum, long curTime);
-  
+
   /**
-   * This method resets fetchTime, fetchInterval, modifiedTime and
-   * page signature, so that it forces refetching.
-   * @param url URL of the page
-   * @param datum datum instance
-   * @param asap if true, force refetch as soon as possible - this sets
-   * the fetchTime to now. If false, force refetch whenever the next fetch
-   * time is set.
+   * This method resets fetchTime, fetchInterval, modifiedTime and page
+   * signature, so that it forces refetching.
+   * 
+   * @param url
+   *          URL of the page
+   * @param datum
+   *          datum instance
+   * @param asap
+   *          if true, force refetch as soon as possible - this sets the
+   *          fetchTime to now. If false, force refetch whenever the next fetch
+   *          time is set.
    * @return adjusted page information, including all original information.
-   * NOTE: this may be a different instance than {@param datum}, but
-   * implementations should make sure that it contains at least all
-   * information from {@param datum}.
+   *         NOTE: this may be a different instance than
+   * @param datum
+   *          , but implementations should make sure that it contains at least
+   *          all information from
+   * @param datum
+   *          .
    */
   public CrawlDatum forceRefetch(Text url, CrawlDatum datum, boolean asap);
 }
Index: src/java/org/apache/nutch/crawl/MD5Signature.java
===================================================================
--- src/java/org/apache/nutch/crawl/MD5Signature.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/MD5Signature.java	(working copy)
@@ -22,9 +22,9 @@
 import org.apache.nutch.protocol.Content;
 
 /**
- * Default implementation of a page signature. It calculates an MD5 hash
- * of the raw binary content of a page. In case there is no content, it
- * calculates a hash from the page's URL.
+ * Default implementation of a page signature. It calculates an MD5 hash of the
+ * raw binary content of a page. In case there is no content, it calculates a
+ * hash from the page's URL.
  * 
  * @author Andrzej Bialecki &lt;ab@getopt.org&gt;
  */
@@ -32,7 +32,8 @@
 
   public byte[] calculate(Content content, Parse parse) {
     byte[] data = content.getContent();
-    if (data == null) data = content.getUrl().getBytes();
+    if (data == null)
+      data = content.getUrl().getBytes();
     return MD5Hash.digest(data).getDigest();
   }
 }
Index: src/java/org/apache/nutch/crawl/MapWritable.java
===================================================================
--- src/java/org/apache/nutch/crawl/MapWritable.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/MapWritable.java	(working copy)
@@ -47,15 +47,15 @@
 import org.apache.nutch.protocol.ProtocolStatus;
 
 /**
- * A writable map, with a similar behavior as <code>java.util.HashMap</code>.
- * In addition to the size of key and value writable tuple two additional bytes
- * are stored to identify the Writable classes. This means that a maximum of
- * 255 different class types can be used for key and value objects.
- * A binary-id to class mapping is defined in a static block of this class.
- * However it is possible to use custom implementations of Writable.
- * For these custom Writables we write the byte id - utf class name tuple
- * into the header of each MapWritable that uses these types.
- *
+ * A writable map, with a similar behavior as <code>java.util.HashMap</code>. In
+ * addition to the size of key and value writable tuple two additional bytes are
+ * stored to identify the Writable classes. This means that a maximum of 255
+ * different class types can be used for key and value objects. A binary-id to
+ * class mapping is defined in a static block of this class. However it is
+ * possible to use custom implementations of Writable. For these custom
+ * Writables we write the byte id - utf class name tuple into the header of each
+ * MapWritable that uses these types.
+ * 
  * @author Stefan Groschupf
  * @deprecated Use org.apache.hadoop.io.MapWritable instead.
  */
@@ -105,14 +105,16 @@
     CLASS_ID_MAP.put(clazz, byteId);
     ID_CLASS_MAP.put(byteId, clazz);
   }
-  
-  public MapWritable() { }
-  
+
+  public MapWritable() {
+  }
+
   /**
    * Copy constructor. This constructor makes a deep copy, using serialization /
    * deserialization to break any possible references to contained objects.
    * 
-   * @param map map to copy from
+   * @param map
+   *          map to copy from
    */
   public MapWritable(MapWritable map) {
     if (map != null) {
@@ -123,8 +125,8 @@
         dib.reset(dob.getData(), dob.getLength());
         readFields(dib);
       } catch (IOException e) {
-        throw new IllegalArgumentException("this map cannot be copied: " +
-                StringUtils.stringifyException(e));
+        throw new IllegalArgumentException("this map cannot be copied: "
+            + StringUtils.stringifyException(e));
       }
     }
   }
@@ -177,7 +179,8 @@
 
   public Set<Writable> keySet() {
     HashSet<Writable> set = new HashSet<Writable>();
-    if (isEmpty()) return set;
+    if (isEmpty())
+      return set;
     set.add(fFirst.fKey);
     KeyValueEntry entry = fFirst;
     while ((entry = entry.fNextEntry) != null) {
@@ -257,7 +260,8 @@
   public boolean equals(Object obj) {
     if (obj instanceof MapWritable) {
       MapWritable map = (MapWritable) obj;
-      if (fSize != map.fSize) return false;
+      if (fSize != map.fSize)
+        return false;
       HashSet<KeyValueEntry> set1 = new HashSet<KeyValueEntry>();
       KeyValueEntry e1 = fFirst;
       while (e1 != null) {
@@ -345,7 +349,7 @@
           clazz = Class.forName(Text.readString(in));
           addIdEntry(id, clazz);
         } catch (Exception e) {
-          if (LOG.isWarnEnabled()) { 
+          if (LOG.isWarnEnabled()) {
             LOG.warn("Unable to load internal map entry" + e.toString());
           }
           fIdCount--;
@@ -364,8 +368,8 @@
           }
         } catch (IOException e) {
           if (LOG.isWarnEnabled()) {
-            LOG.warn("Unable to load meta data entry, ignoring.. : "  +
-                     e.toString());
+            LOG.warn("Unable to load meta data entry, ignoring.. : "
+                + e.toString());
           }
           fSize--;
         }
Index: src/java/org/apache/nutch/crawl/Inlinks.java
===================================================================
--- src/java/org/apache/nutch/crawl/Inlinks.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/Inlinks.java	(working copy)
@@ -27,18 +27,26 @@
 public class Inlinks implements Writable {
   private HashSet<Inlink> inlinks = new HashSet<Inlink>(1);
 
-  public void add(Inlink inlink) { inlinks.add(inlink); }
+  public void add(Inlink inlink) {
+    inlinks.add(inlink);
+  }
 
-  public void add(Inlinks inlinks) { this.inlinks.addAll(inlinks.inlinks); }
+  public void add(Inlinks inlinks) {
+    this.inlinks.addAll(inlinks.inlinks);
+  }
 
   public Iterator<Inlink> iterator() {
     return this.inlinks.iterator();
   }
-  
-  public int size() { return inlinks.size(); }
 
-  public void clear() { inlinks.clear(); }
+  public int size() {
+    return inlinks.size();
+  }
 
+  public void clear() {
+    inlinks.clear();
+  }
+
   public void readFields(DataInput in) throws IOException {
     int length = in.readInt();
     inlinks.clear();
@@ -67,30 +75,32 @@
     return buffer.toString();
   }
 
-  /** Return the set of anchor texts.  Only a single anchor with a given text
-   * is permitted from a given domain. */
+  /**
+   * Return the set of anchor texts. Only a single anchor with a given text is
+   * permitted from a given domain.
+   */
   public String[] getAnchors() {
-    HashMap<String, Set<String>> domainToAnchors =
-      new HashMap<String, Set<String>>();
+    HashMap<String, Set<String>> domainToAnchors = new HashMap<String, Set<String>>();
     ArrayList<String> results = new ArrayList<String>();
     Iterator<Inlink> it = inlinks.iterator();
     while (it.hasNext()) {
       Inlink inlink = it.next();
       String anchor = inlink.getAnchor();
 
-      if (anchor.length() == 0)                   // skip empty anchors
+      if (anchor.length() == 0) // skip empty anchors
         continue;
-      String domain = null;                       // extract domain name
+      String domain = null; // extract domain name
       try {
         domain = new URL(inlink.getFromUrl()).getHost();
-      } catch (MalformedURLException e) {}
+      } catch (MalformedURLException e) {
+      }
       Set<String> domainAnchors = domainToAnchors.get(domain);
       if (domainAnchors == null) {
         domainAnchors = new HashSet<String>();
         domainToAnchors.put(domain, domainAnchors);
       }
-      if (domainAnchors.add(anchor)) {            // new anchor from domain
-        results.add(anchor);                      // collect it
+      if (domainAnchors.add(anchor)) { // new anchor from domain
+        results.add(anchor); // collect it
       }
     }
 
Index: src/java/org/apache/nutch/crawl/Crawl.java
===================================================================
--- src/java/org/apache/nutch/crawl/Crawl.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/Crawl.java	(working copy)
@@ -43,24 +43,26 @@
   public static final Logger LOG = LoggerFactory.getLogger(Crawl.class);
 
   private static String getDate() {
-    return new SimpleDateFormat("yyyyMMddHHmmss").format
-      (new Date(System.currentTimeMillis()));
+    return new SimpleDateFormat("yyyyMMddHHmmss").format(new Date(System
+        .currentTimeMillis()));
   }
 
-
-  /* Perform complete crawling and indexing (to Solr) given a set of root urls and the -solr
-     parameter respectively. More information and Usage parameters can be found below. */
+  /*
+   * Perform complete crawling and indexing (to Solr) given a set of root urls
+   * and the -solr parameter respectively. More information and Usage parameters
+   * can be found below.
+   */
   public static void main(String args[]) throws Exception {
     Configuration conf = NutchConfiguration.create();
     int res = ToolRunner.run(conf, new Crawl(), args);
     System.exit(res);
   }
-  
+
   @Override
   public int run(String[] args) throws Exception {
     if (args.length < 1) {
-      System.out.println
-      ("Usage: Crawl <urlDir> -solr <solrURL> [-dir d] [-threads n] [-depth i] [-topN N]");
+      System.out
+          .println("Usage: Crawl <urlDir> -solr <solrURL> [-dir d] [-threads n] [-depth i] [-topN N]");
       return -1;
     }
     Path rootUrlDir = null;
@@ -69,20 +71,20 @@
     int depth = 5;
     long topN = Long.MAX_VALUE;
     String solrUrl = null;
-    
+
     for (int i = 0; i < args.length; i++) {
       if ("-dir".equals(args[i])) {
-        dir = new Path(args[i+1]);
+        dir = new Path(args[i + 1]);
         i++;
       } else if ("-threads".equals(args[i])) {
-        threads = Integer.parseInt(args[i+1]);
+        threads = Integer.parseInt(args[i + 1]);
         i++;
       } else if ("-depth".equals(args[i])) {
-        depth = Integer.parseInt(args[i+1]);
+        depth = Integer.parseInt(args[i + 1]);
         i++;
       } else if ("-topN".equals(args[i])) {
-          topN = Integer.parseInt(args[i+1]);
-          i++;
+        topN = Integer.parseInt(args[i + 1]);
+        i++;
       } else if ("-solr".equals(args[i])) {
         solrUrl = StringUtils.lowerCase(args[i + 1]);
         i++;
@@ -90,7 +92,7 @@
         rootUrlDir = new Path(args[i]);
       }
     }
-    
+
     JobConf job = new NutchJob(getConf());
 
     if (solrUrl == null) {
@@ -103,39 +105,39 @@
       LOG.info("crawl started in: " + dir);
       LOG.info("rootUrlDir = " + rootUrlDir);
       LOG.info("threads = " + threads);
-      LOG.info("depth = " + depth);      
+      LOG.info("depth = " + depth);
       LOG.info("solrUrl=" + solrUrl);
       if (topN != Long.MAX_VALUE)
         LOG.info("topN = " + topN);
     }
-    
+
     Path crawlDb = new Path(dir + "/crawldb");
     Path linkDb = new Path(dir + "/linkdb");
     Path segments = new Path(dir + "/segments");
     Path indexes = new Path(dir + "/indexes");
     Path index = new Path(dir + "/index");
 
-    Path tmpDir = job.getLocalPath("crawl"+Path.SEPARATOR+getDate());
+    Path tmpDir = job.getLocalPath("crawl" + Path.SEPARATOR + getDate());
     Injector injector = new Injector(getConf());
     Generator generator = new Generator(getConf());
     Fetcher fetcher = new Fetcher(getConf());
     ParseSegment parseSegment = new ParseSegment(getConf());
     CrawlDb crawlDbTool = new CrawlDb(getConf());
     LinkDb linkDbTool = new LinkDb(getConf());
-      
+
     // initialize crawlDb
     injector.inject(crawlDb, rootUrlDir);
     int i;
-    for (i = 0; i < depth; i++) {             // generate new segment
-      Path[] segs = generator.generate(crawlDb, segments, -1, topN, System
-          .currentTimeMillis());
+    for (i = 0; i < depth; i++) { // generate new segment
+      Path[] segs = generator.generate(crawlDb, segments, -1, topN,
+          System.currentTimeMillis());
       if (segs == null) {
         LOG.info("Stopping at depth=" + i + " - no more URLs to fetch.");
         break;
       }
-      fetcher.fetch(segs[0], threads);  // fetch it
+      fetcher.fetch(segs[0], threads); // fetch it
       if (!Fetcher.isParsing(job)) {
-        parseSegment.parse(segs[0]);    // parse it, if needed
+        parseSegment.parse(segs[0]); // parse it, if needed
       }
       crawlDbTool.update(crawlDb, segs, true, true); // update crawldb
     }
@@ -144,21 +146,23 @@
 
       if (solrUrl != null) {
         // index, dedup & merge
-        FileStatus[] fstats = fs.listStatus(segments, HadoopFSUtil.getPassDirectoriesFilter(fs));
+        FileStatus[] fstats = fs.listStatus(segments,
+            HadoopFSUtil.getPassDirectoriesFilter(fs));
         SolrIndexer indexer = new SolrIndexer(getConf());
-        indexer.indexSolr(solrUrl, crawlDb, linkDb, 
-          Arrays.asList(HadoopFSUtil.getPaths(fstats)));
+        indexer.indexSolr(solrUrl, crawlDb, linkDb,
+            Arrays.asList(HadoopFSUtil.getPaths(fstats)));
         SolrDeleteDuplicates dedup = new SolrDeleteDuplicates();
         dedup.setConf(getConf());
         dedup.dedup(solrUrl);
       }
-      
+
     } else {
       LOG.warn("No URLs to fetch - check your seed list and URL filters.");
     }
-    if (LOG.isInfoEnabled()) { LOG.info("crawl finished: " + dir); }
+    if (LOG.isInfoEnabled()) {
+      LOG.info("crawl finished: " + dir);
+    }
     return 0;
   }
 
-
 }
Index: src/java/org/apache/nutch/crawl/LinkDbFilter.java
===================================================================
--- src/java/org/apache/nutch/crawl/LinkDbFilter.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/LinkDbFilter.java	(working copy)
@@ -31,8 +31,8 @@
 import org.apache.nutch.net.URLNormalizers;
 
 /**
- * This class provides a way to separate the URL normalization
- * and filtering steps from the rest of LinkDb manipulation code.
+ * This class provides a way to separate the URL normalization and filtering
+ * steps from the rest of LinkDb manipulation code.
  * 
  * @author Andrzej Bialecki
  */
@@ -50,13 +50,13 @@
   private URLFilters filters;
 
   private URLNormalizers normalizers;
-  
+
   private String scope;
-  
+
   public static final Logger LOG = LoggerFactory.getLogger(LinkDbFilter.class);
 
   private Text newKey = new Text();
-  
+
   public void configure(JobConf job) {
     filter = job.getBoolean(URL_FILTERING, false);
     normalize = job.getBoolean(URL_NORMALIZING, false);
@@ -69,10 +69,12 @@
     }
   }
 
-  public void close() {}
+  public void close() {
+  }
 
   public void map(Text key, Inlinks value,
-      OutputCollector<Text, Inlinks> output, Reporter reporter) throws IOException {
+      OutputCollector<Text, Inlinks> output, Reporter reporter)
+      throws IOException {
     String url = key.toString();
     Inlinks result = new Inlinks();
     if (normalize) {
@@ -91,7 +93,8 @@
         url = null;
       }
     }
-    if (url == null) return; // didn't pass the filters
+    if (url == null)
+      return; // didn't pass the filters
     Iterator<Inlink> it = value.iterator();
     String fromUrl = null;
     while (it.hasNext()) {
@@ -113,7 +116,7 @@
           fromUrl = null;
         }
       }
-      if (fromUrl != null) { 
+      if (fromUrl != null) {
         result.add(new Inlink(fromUrl, inlink.getAnchor()));
       }
     }
Index: src/java/org/apache/nutch/crawl/DefaultFetchSchedule.java
===================================================================
--- src/java/org/apache/nutch/crawl/DefaultFetchSchedule.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/DefaultFetchSchedule.java	(working copy)
@@ -20,8 +20,8 @@
 import org.apache.hadoop.io.Text;
 
 /**
- * This class implements the default re-fetch schedule. That is, no matter
- * if the page was changed or not, the <code>fetchInterval</code> remains
+ * This class implements the default re-fetch schedule. That is, no matter if
+ * the page was changed or not, the <code>fetchInterval</code> remains
  * unchanged, and the updated page fetchTime will always be set to
  * <code>fetchTime + fetchInterval * 1000</code>.
  * 
@@ -31,14 +31,14 @@
 
   @Override
   public CrawlDatum setFetchSchedule(Text url, CrawlDatum datum,
-          long prevFetchTime, long prevModifiedTime,
-          long fetchTime, long modifiedTime, int state) {
+      long prevFetchTime, long prevModifiedTime, long fetchTime,
+      long modifiedTime, int state) {
     datum = super.setFetchSchedule(url, datum, prevFetchTime, prevModifiedTime,
         fetchTime, modifiedTime, state);
-    if (datum.getFetchInterval() == 0 ) {
+    if (datum.getFetchInterval() == 0) {
       datum.setFetchInterval(defaultInterval);
     }
-    datum.setFetchTime(fetchTime + (long)datum.getFetchInterval() * 1000);
+    datum.setFetchTime(fetchTime + (long) datum.getFetchInterval() * 1000);
     datum.setModifiedTime(modifiedTime);
     return datum;
   }
Index: src/java/org/apache/nutch/crawl/NutchWritable.java
===================================================================
--- src/java/org/apache/nutch/crawl/NutchWritable.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/NutchWritable.java	(working copy)
@@ -20,36 +20,34 @@
 import org.apache.nutch.util.GenericWritableConfigurable;
 
 public class NutchWritable extends GenericWritableConfigurable {
-  
+
   private static Class<? extends Writable>[] CLASSES = null;
-  
+
   static {
-    CLASSES = new Class[] {
-      org.apache.hadoop.io.NullWritable.class, 
-      org.apache.hadoop.io.LongWritable.class,
-      org.apache.hadoop.io.BytesWritable.class,
-      org.apache.hadoop.io.FloatWritable.class,
-      org.apache.hadoop.io.IntWritable.class,
-      org.apache.hadoop.io.Text.class,
-      org.apache.hadoop.io.MD5Hash.class,
-      org.apache.nutch.crawl.CrawlDatum.class,
-      org.apache.nutch.crawl.Inlink.class,
-      org.apache.nutch.crawl.Inlinks.class,
-      org.apache.nutch.crawl.MapWritable.class,
-      org.apache.nutch.fetcher.FetcherOutput.class,
-      org.apache.nutch.metadata.Metadata.class,
-      org.apache.nutch.parse.Outlink.class,
-      org.apache.nutch.parse.ParseText.class,
-      org.apache.nutch.parse.ParseData.class,
-      org.apache.nutch.parse.ParseImpl.class,
-      org.apache.nutch.parse.ParseStatus.class,
-      org.apache.nutch.protocol.Content.class,
-      org.apache.nutch.protocol.ProtocolStatus.class,
-    };
+    CLASSES = new Class[] { org.apache.hadoop.io.NullWritable.class,
+        org.apache.hadoop.io.LongWritable.class,
+        org.apache.hadoop.io.BytesWritable.class,
+        org.apache.hadoop.io.FloatWritable.class,
+        org.apache.hadoop.io.IntWritable.class,
+        org.apache.hadoop.io.Text.class, org.apache.hadoop.io.MD5Hash.class,
+        org.apache.nutch.crawl.CrawlDatum.class,
+        org.apache.nutch.crawl.Inlink.class,
+        org.apache.nutch.crawl.Inlinks.class,
+        org.apache.nutch.crawl.MapWritable.class,
+        org.apache.nutch.fetcher.FetcherOutput.class,
+        org.apache.nutch.metadata.Metadata.class,
+        org.apache.nutch.parse.Outlink.class,
+        org.apache.nutch.parse.ParseText.class,
+        org.apache.nutch.parse.ParseData.class,
+        org.apache.nutch.parse.ParseImpl.class,
+        org.apache.nutch.parse.ParseStatus.class,
+        org.apache.nutch.protocol.Content.class,
+        org.apache.nutch.protocol.ProtocolStatus.class, };
   }
 
-  public NutchWritable() { }
-  
+  public NutchWritable() {
+  }
+
   public NutchWritable(Writable instance) {
     set(instance);
   }
Index: src/java/org/apache/nutch/crawl/Injector.java
===================================================================
--- src/java/org/apache/nutch/crawl/Injector.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/Injector.java	(working copy)
@@ -38,25 +38,31 @@
 import org.apache.nutch.util.NutchJob;
 import org.apache.nutch.util.TimingUtil;
 
-/** This class takes a flat file of URLs and adds them to the of pages to be
- * crawled.  Useful for bootstrapping the system. 
- * The URL files contain one URL per line, optionally followed by custom metadata 
- * separated by tabs with the metadata key separated from the corresponding value by '='. <br>
+/**
+ * This class takes a flat file of URLs and adds them to the of pages to be
+ * crawled. Useful for bootstrapping the system. The URL files contain one URL
+ * per line, optionally followed by custom metadata separated by tabs with the
+ * metadata key separated from the corresponding value by '='. <br>
  * Note that some metadata keys are reserved : <br>
  * - <i>nutch.score</i> : allows to set a custom score for a specific URL <br>
- * - <i>nutch.fetchInterval</i> : allows to set a custom fetch interval for a specific URL <br>
- * e.g. http://www.nutch.org/ \t nutch.score=10 \t nutch.fetchInterval=2592000 \t userType=open_source
+ * - <i>nutch.fetchInterval</i> : allows to set a custom fetch interval for a
+ * specific URL <br>
+ * e.g. http://www.nutch.org/ \t nutch.score=10 \t nutch.fetchInterval=2592000
+ * \t userType=open_source
  **/
 public class Injector extends Configured implements Tool {
   public static final Logger LOG = LoggerFactory.getLogger(Injector.class);
-  
+
   /** metadata key reserved for setting a custom score for a specific URL */
   public static String nutchScoreMDName = "nutch.score";
-  /** metadata key reserved for setting a custom fetchInterval for a specific URL */
+  /**
+   * metadata key reserved for setting a custom fetchInterval for a specific URL
+   */
   public static String nutchFetchIntervalMDName = "nutch.fetchInterval";
 
   /** Normalize and filter injected urls. */
-  public static class InjectMapper implements Mapper<WritableComparable, Text, Text, CrawlDatum> {
+  public static class InjectMapper implements
+      Mapper<WritableComparable, Text, Text, CrawlDatum> {
     private URLNormalizers urlNormalizers;
     private int interval;
     private float scoreInjected;
@@ -72,78 +78,86 @@
       filters = new URLFilters(jobConf);
       scfilters = new ScoringFilters(jobConf);
       scoreInjected = jobConf.getFloat("db.score.injected", 1.0f);
-      curTime = job.getLong("injector.current.time", System.currentTimeMillis());
+      curTime = job
+          .getLong("injector.current.time", System.currentTimeMillis());
     }
 
-    public void close() {}
+    public void close() {
+    }
 
     public void map(WritableComparable key, Text value,
-                    OutputCollector<Text, CrawlDatum> output, Reporter reporter)
-      throws IOException {
-      String url = value.toString();              // value is line of text
+        OutputCollector<Text, CrawlDatum> output, Reporter reporter)
+        throws IOException {
+      String url = value.toString(); // value is line of text
 
       if (url != null && url.trim().startsWith("#")) {
-          /* Ignore line that start with # */
-          return;
+        /* Ignore line that start with # */
+        return;
       }
 
       // if tabs : metadata that could be stored
       // must be name=value and separated by \t
       float customScore = -1f;
       int customInterval = interval;
-      Map<String,String> metadata = new TreeMap<String,String>();
-      if (url.indexOf("\t")!=-1){
-    	  String[] splits = url.split("\t");
-    	  url = splits[0];
-    	  for (int s=1;s<splits.length;s++){
-    		  // find separation between name and value
-    		  int indexEquals = splits[s].indexOf("=");
-    		  if (indexEquals==-1) {
-    			  // skip anything without a =
-    			  continue;		    
-    		  }
-    		  String metaname = splits[s].substring(0, indexEquals);
-    		  String metavalue = splits[s].substring(indexEquals+1);
-    		  if (metaname.equals(nutchScoreMDName)) {
-    			  try {
-    			  customScore = Float.parseFloat(metavalue);}
-    			  catch (NumberFormatException nfe){}
-    		  }
-    		  else if (metaname.equals(nutchFetchIntervalMDName)) {
-    			  try {
-    				  customInterval = Integer.parseInt(metavalue);}
-    			  catch (NumberFormatException nfe){}
-    		  }
-    		  else metadata.put(metaname,metavalue);
-    	  }
+      Map<String, String> metadata = new TreeMap<String, String>();
+      if (url.indexOf("\t") != -1) {
+        String[] splits = url.split("\t");
+        url = splits[0];
+        for (int s = 1; s < splits.length; s++) {
+          // find separation between name and value
+          int indexEquals = splits[s].indexOf("=");
+          if (indexEquals == -1) {
+            // skip anything without a =
+            continue;
+          }
+          String metaname = splits[s].substring(0, indexEquals);
+          String metavalue = splits[s].substring(indexEquals + 1);
+          if (metaname.equals(nutchScoreMDName)) {
+            try {
+              customScore = Float.parseFloat(metavalue);
+            } catch (NumberFormatException nfe) {
+            }
+          } else if (metaname.equals(nutchFetchIntervalMDName)) {
+            try {
+              customInterval = Integer.parseInt(metavalue);
+            } catch (NumberFormatException nfe) {
+            }
+          } else
+            metadata.put(metaname, metavalue);
+        }
       }
       try {
         url = urlNormalizers.normalize(url, URLNormalizers.SCOPE_INJECT);
-        url = filters.filter(url);             // filter the url
+        url = filters.filter(url); // filter the url
       } catch (Exception e) {
-        if (LOG.isWarnEnabled()) { LOG.warn("Skipping " +url+":"+e); }
+        if (LOG.isWarnEnabled()) {
+          LOG.warn("Skipping " + url + ":" + e);
+        }
         url = null;
       }
-      if (url != null) {                          // if it passes
-        value.set(url);                           // collect it
-        CrawlDatum datum = new CrawlDatum(CrawlDatum.STATUS_INJECTED, customInterval);
+      if (url != null) { // if it passes
+        value.set(url); // collect it
+        CrawlDatum datum = new CrawlDatum(CrawlDatum.STATUS_INJECTED,
+            customInterval);
         datum.setFetchTime(curTime);
         // now add the metadata
         Iterator<String> keysIter = metadata.keySet().iterator();
-        while (keysIter.hasNext()){
-        	String keymd = keysIter.next();
-        	String valuemd = metadata.get(keymd);
-        	datum.getMetaData().put(new Text(keymd), new Text(valuemd));
+        while (keysIter.hasNext()) {
+          String keymd = keysIter.next();
+          String valuemd = metadata.get(keymd);
+          datum.getMetaData().put(new Text(keymd), new Text(valuemd));
         }
-        if (customScore != -1) datum.setScore(customScore);
-        else datum.setScore(scoreInjected);
+        if (customScore != -1)
+          datum.setScore(customScore);
+        else
+          datum.setScore(scoreInjected);
         try {
-        	scfilters.injectedScore(value, datum);
+          scfilters.injectedScore(value, datum);
         } catch (ScoringFilterException e) {
-        	if (LOG.isWarnEnabled()) {
-        		LOG.warn("Cannot filter injected score for url " + url
-        				+ ", using default (" + e.getMessage() + ")");
-        	}
+          if (LOG.isWarnEnabled()) {
+            LOG.warn("Cannot filter injected score for url " + url
+                + ", using default (" + e.getMessage() + ")");
+          }
         }
         output.collect(value, datum);
       }
@@ -151,16 +165,20 @@
   }
 
   /** Combine multiple new entries for a url. */
-  public static class InjectReducer implements Reducer<Text, CrawlDatum, Text, CrawlDatum> {
-    public void configure(JobConf job) {}    
-    public void close() {}
+  public static class InjectReducer implements
+      Reducer<Text, CrawlDatum, Text, CrawlDatum> {
+    public void configure(JobConf job) {
+    }
 
+    public void close() {
+    }
+
     private CrawlDatum old = new CrawlDatum();
     private CrawlDatum injected = new CrawlDatum();
-    
+
     public void reduce(Text key, Iterator<CrawlDatum> values,
-                       OutputCollector<Text, CrawlDatum> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, CrawlDatum> output, Reporter reporter)
+        throws IOException {
       boolean oldSet = false;
       while (values.hasNext()) {
         CrawlDatum val = values.next();
@@ -173,19 +191,22 @@
         }
       }
       CrawlDatum res = null;
-      if (oldSet) res = old; // don't overwrite existing value
-      else res = injected;
+      if (oldSet)
+        res = old; // don't overwrite existing value
+      else
+        res = injected;
 
       output.collect(key, res);
     }
   }
 
-  public Injector() {}
-  
+  public Injector() {
+  }
+
   public Injector(Configuration conf) {
     setConf(conf);
   }
-  
+
   public void inject(Path crawlDb, Path urlDir) throws IOException {
     SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
     long start = System.currentTimeMillis();
@@ -195,10 +216,9 @@
       LOG.info("Injector: urlDir: " + urlDir);
     }
 
-    Path tempDir =
-      new Path(getConf().get("mapred.temp.dir", ".") +
-               "/inject-temp-"+
-               Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
+    Path tempDir = new Path(getConf().get("mapred.temp.dir", ".")
+        + "/inject-temp-"
+        + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
 
     // map text input file to a <url,CrawlDatum> file
     if (LOG.isInfoEnabled()) {
@@ -231,14 +251,15 @@
     fs.delete(tempDir, true);
 
     long end = System.currentTimeMillis();
-    LOG.info("Injector: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    LOG.info("Injector: finished at " + sdf.format(end) + ", elapsed: "
+        + TimingUtil.elapsedTime(start, end));
   }
 
   public static void main(String[] args) throws Exception {
     int res = ToolRunner.run(NutchConfiguration.create(), new Injector(), args);
     System.exit(res);
   }
-  
+
   public int run(String[] args) throws Exception {
     if (args.length < 2) {
       System.err.println("Usage: Injector <crawldb> <url_dir>");
Index: src/java/org/apache/nutch/crawl/CrawlDb.java
===================================================================
--- src/java/org/apache/nutch/crawl/CrawlDb.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/CrawlDb.java	(working copy)
@@ -38,8 +38,8 @@
 import org.apache.nutch.util.TimingUtil;
 
 /**
- * This class takes the output of the fetcher and updates the
- * crawldb accordingly.
+ * This class takes the output of the fetcher and updates the crawldb
+ * accordingly.
  */
 public class CrawlDb extends Configured implements Tool {
   public static final Logger LOG = LoggerFactory.getLogger(CrawlDb.class);
@@ -49,21 +49,26 @@
   public static final String CRAWLDB_PURGE_404 = "db.update.purge.404";
 
   public static final String CURRENT_NAME = "current";
-  
+
   public static final String LOCK_NAME = ".locked";
-  
-  public CrawlDb() {}
-  
+
+  public CrawlDb() {
+  }
+
   public CrawlDb(Configuration conf) {
     setConf(conf);
   }
 
-  public void update(Path crawlDb, Path[] segments, boolean normalize, boolean filter) throws IOException {
-    boolean additionsAllowed = getConf().getBoolean(CRAWLDB_ADDITIONS_ALLOWED, true);
+  public void update(Path crawlDb, Path[] segments, boolean normalize,
+      boolean filter) throws IOException {
+    boolean additionsAllowed = getConf().getBoolean(CRAWLDB_ADDITIONS_ALLOWED,
+        true);
     update(crawlDb, segments, normalize, filter, additionsAllowed, false);
   }
-  
-  public void update(Path crawlDb, Path[] segments, boolean normalize, boolean filter, boolean additionsAllowed, boolean force) throws IOException {
+
+  public void update(Path crawlDb, Path[] segments, boolean normalize,
+      boolean filter, boolean additionsAllowed, boolean force)
+      throws IOException {
     FileSystem fs = FileSystem.get(getConf());
     Path lock = new Path(crawlDb, LOCK_NAME);
     LockUtil.createLockFile(fs, lock, force);
@@ -106,25 +111,25 @@
     } catch (IOException e) {
       LockUtil.removeLockFile(fs, lock);
       Path outPath = FileOutputFormat.getOutputPath(job);
-      if (fs.exists(outPath) ) fs.delete(outPath, true);
+      if (fs.exists(outPath))
+        fs.delete(outPath, true);
       throw e;
     }
 
     CrawlDb.install(job, crawlDb);
     long end = System.currentTimeMillis();
-    LOG.info("CrawlDb update: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    LOG.info("CrawlDb update: finished at " + sdf.format(end) + ", elapsed: "
+        + TimingUtil.elapsedTime(start, end));
   }
 
   public static JobConf createJob(Configuration config, Path crawlDb)
-    throws IOException {
-    Path newCrawlDb =
-      new Path(crawlDb,
-               Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
+      throws IOException {
+    Path newCrawlDb = new Path(crawlDb, Integer.toString(new Random()
+        .nextInt(Integer.MAX_VALUE)));
 
     JobConf job = new NutchJob(config);
     job.setJobName("crawldb " + crawlDb);
 
-
     Path current = new Path(crawlDb, CURRENT_NAME);
     if (FileSystem.get(job).exists(current)) {
       FileInputFormat.addInputPath(job, current);
@@ -151,12 +156,14 @@
     Path old = new Path(crawlDb, "old");
     Path current = new Path(crawlDb, CURRENT_NAME);
     if (fs.exists(current)) {
-      if (fs.exists(old)) fs.delete(old, true);
+      if (fs.exists(old))
+        fs.delete(old, true);
       fs.rename(current, old);
     }
     fs.mkdirs(crawlDb);
     fs.rename(newCrawlDb, current);
-    if (fs.exists(old)) fs.delete(old, true);
+    if (fs.exists(old))
+      fs.delete(old, true);
     Path lock = new Path(crawlDb, LOCK_NAME);
     LockUtil.removeLockFile(fs, lock);
   }
@@ -168,14 +175,21 @@
 
   public int run(String[] args) throws Exception {
     if (args.length < 2) {
-      System.err.println("Usage: CrawlDb <crawldb> (-dir <segments> | <seg1> <seg2> ...) [-force] [-normalize] [-filter] [-noAdditions]");
+      System.err
+          .println("Usage: CrawlDb <crawldb> (-dir <segments> | <seg1> <seg2> ...) [-force] [-normalize] [-filter] [-noAdditions]");
       System.err.println("\tcrawldb\tCrawlDb to update");
-      System.err.println("\t-dir segments\tparent directory containing all segments to update from");
-      System.err.println("\tseg1 seg2 ...\tlist of segment names to update from");
-      System.err.println("\t-force\tforce update even if CrawlDb appears to be locked (CAUTION advised)");
-      System.err.println("\t-normalize\tuse URLNormalizer on urls in CrawlDb and segment (usually not needed)");
-      System.err.println("\t-filter\tuse URLFilters on urls in CrawlDb and segment");
-      System.err.println("\t-noAdditions\tonly update already existing URLs, don't add any newly discovered URLs");
+      System.err
+          .println("\t-dir segments\tparent directory containing all segments to update from");
+      System.err
+          .println("\tseg1 seg2 ...\tlist of segment names to update from");
+      System.err
+          .println("\t-force\tforce update even if CrawlDb appears to be locked (CAUTION advised)");
+      System.err
+          .println("\t-normalize\tuse URLNormalizer on urls in CrawlDb and segment (usually not needed)");
+      System.err
+          .println("\t-filter\tuse URLFilters on urls in CrawlDb and segment");
+      System.err
+          .println("\t-noAdditions\tonly update already existing URLs, don't add any newly discovered URLs");
 
       return -1;
     }
@@ -184,7 +198,8 @@
     boolean force = false;
     boolean url404Purging = false;
     final FileSystem fs = FileSystem.get(getConf());
-    boolean additionsAllowed = getConf().getBoolean(CRAWLDB_ADDITIONS_ALLOWED, true);
+    boolean additionsAllowed = getConf().getBoolean(CRAWLDB_ADDITIONS_ALLOWED,
+        true);
     HashSet<Path> dirs = new HashSet<Path>();
     for (int i = 1; i < args.length; i++) {
       if (args[i].equals("-normalize")) {
@@ -196,14 +211,16 @@
       } else if (args[i].equals("-noAdditions")) {
         additionsAllowed = false;
       } else if (args[i].equals("-dir")) {
-        FileStatus[] paths = fs.listStatus(new Path(args[++i]), HadoopFSUtil.getPassDirectoriesFilter(fs));
+        FileStatus[] paths = fs.listStatus(new Path(args[++i]),
+            HadoopFSUtil.getPassDirectoriesFilter(fs));
         dirs.addAll(Arrays.asList(HadoopFSUtil.getPaths(paths)));
       } else {
         dirs.add(new Path(args[i]));
       }
     }
     try {
-      update(new Path(args[0]), dirs.toArray(new Path[dirs.size()]), normalize, filter, additionsAllowed, force);
+      update(new Path(args[0]), dirs.toArray(new Path[dirs.size()]), normalize,
+          filter, additionsAllowed, force);
       return 0;
     } catch (Exception e) {
       LOG.error("CrawlDb update: " + StringUtils.stringifyException(e));
Index: src/java/org/apache/nutch/crawl/CrawlDbMerger.java
===================================================================
--- src/java/org/apache/nutch/crawl/CrawlDbMerger.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/CrawlDbMerger.java	(working copy)
@@ -39,36 +39,42 @@
 import org.apache.nutch.util.TimingUtil;
 
 /**
- * This tool merges several CrawlDb-s into one, optionally filtering
- * URLs through the current URLFilters, to skip prohibited
- * pages.
+ * This tool merges several CrawlDb-s into one, optionally filtering URLs
+ * through the current URLFilters, to skip prohibited pages.
  * 
- * <p>It's possible to use this tool just for filtering - in that case
- * only one CrawlDb should be specified in arguments.</p>
- * <p>If more than one CrawlDb contains information about the same URL,
- * only the most recent version is retained, as determined by the
- * value of {@link org.apache.nutch.crawl.CrawlDatum#getFetchTime()}.
- * However, all metadata information from all versions is accumulated,
- * with newer values taking precedence over older values.
+ * <p>
+ * It's possible to use this tool just for filtering - in that case only one
+ * CrawlDb should be specified in arguments.
+ * </p>
+ * <p>
+ * If more than one CrawlDb contains information about the same URL, only the
+ * most recent version is retained, as determined by the value of
+ * {@link org.apache.nutch.crawl.CrawlDatum#getFetchTime()}. However, all
+ * metadata information from all versions is accumulated, with newer values
+ * taking precedence over older values.
  * 
  * @author Andrzej Bialecki
  */
 public class CrawlDbMerger extends Configured implements Tool {
-  private static final Logger LOG = LoggerFactory.getLogger(CrawlDbMerger.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(CrawlDbMerger.class);
 
-  public static class Merger extends MapReduceBase implements Reducer<Text, CrawlDatum, Text, CrawlDatum> {
+  public static class Merger extends MapReduceBase implements
+      Reducer<Text, CrawlDatum, Text, CrawlDatum> {
     private org.apache.hadoop.io.MapWritable meta;
     private CrawlDatum res = new CrawlDatum();
     private FetchSchedule schedule;
 
-    public void close() throws IOException {}
+    public void close() throws IOException {
+    }
 
     public void configure(JobConf conf) {
       schedule = FetchScheduleFactory.getFetchSchedule(conf);
     }
 
-    public void reduce(Text key, Iterator<CrawlDatum> values, OutputCollector<Text, CrawlDatum> output, Reporter reporter)
-            throws IOException {
+    public void reduce(Text key, Iterator<CrawlDatum> values,
+        OutputCollector<Text, CrawlDatum> output, Reporter reporter)
+        throws IOException {
       long resTime = 0L;
       boolean resSet = false;
       meta = new org.apache.hadoop.io.MapWritable();
@@ -91,7 +97,7 @@
             meta.put(e.getKey(), e.getValue());
           }
           res.set(val);
-          resTime = valTime ;
+          resTime = valTime;
         } else {
           // insert older metadata before newer
           for (Entry<Writable, Writable> e : meta.entrySet()) {
@@ -104,35 +110,42 @@
       output.collect(key, res);
     }
   }
-  
+
   public CrawlDbMerger() {
-    
+
   }
-  
+
   public CrawlDbMerger(Configuration conf) {
     setConf(conf);
   }
 
-  public void merge(Path output, Path[] dbs, boolean normalize, boolean filter) throws Exception {
+  public void merge(Path output, Path[] dbs, boolean normalize, boolean filter)
+      throws Exception {
     SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
     long start = System.currentTimeMillis();
     LOG.info("CrawlDb merge: starting at " + sdf.format(start));
 
     JobConf job = createMergeJob(getConf(), output, normalize, filter);
     for (int i = 0; i < dbs.length; i++) {
-      if (LOG.isInfoEnabled()) { LOG.info("Adding " + dbs[i]); }
+      if (LOG.isInfoEnabled()) {
+        LOG.info("Adding " + dbs[i]);
+      }
       FileInputFormat.addInputPath(job, new Path(dbs[i], CrawlDb.CURRENT_NAME));
     }
     JobClient.runJob(job);
     FileSystem fs = FileSystem.get(getConf());
     fs.mkdirs(output);
-    fs.rename(FileOutputFormat.getOutputPath(job), new Path(output, CrawlDb.CURRENT_NAME));
+    fs.rename(FileOutputFormat.getOutputPath(job), new Path(output,
+        CrawlDb.CURRENT_NAME));
     long end = System.currentTimeMillis();
-    LOG.info("CrawlDb merge: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    LOG.info("CrawlDb merge: finished at " + sdf.format(end) + ", elapsed: "
+        + TimingUtil.elapsedTime(start, end));
   }
 
-  public static JobConf createMergeJob(Configuration conf, Path output, boolean normalize, boolean filter) {
-    Path newCrawlDb = new Path("crawldb-merge-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
+  public static JobConf createMergeJob(Configuration conf, Path output,
+      boolean normalize, boolean filter) {
+    Path newCrawlDb = new Path("crawldb-merge-"
+        + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
 
     JobConf job = new NutchJob(conf);
     job.setJobName("crawldb merge " + output);
@@ -156,16 +169,20 @@
    * @param args
    */
   public static void main(String[] args) throws Exception {
-    int res = ToolRunner.run(NutchConfiguration.create(), new CrawlDbMerger(), args);
+    int res = ToolRunner.run(NutchConfiguration.create(), new CrawlDbMerger(),
+        args);
     System.exit(res);
   }
-  
+
   public int run(String[] args) throws Exception {
     if (args.length < 2) {
-      System.err.println("Usage: CrawlDbMerger <output_crawldb> <crawldb1> [<crawldb2> <crawldb3> ...] [-normalize] [-filter]");
+      System.err
+          .println("Usage: CrawlDbMerger <output_crawldb> <crawldb1> [<crawldb2> <crawldb3> ...] [-normalize] [-filter]");
       System.err.println("\toutput_crawldb\toutput CrawlDb");
-      System.err.println("\tcrawldb1 ...\tinput CrawlDb-s (single input CrawlDb is ok)");
-      System.err.println("\t-normalize\tuse URLNormalizer on urls in the crawldb(s) (usually not needed)");
+      System.err
+          .println("\tcrawldb1 ...\tinput CrawlDb-s (single input CrawlDb is ok)");
+      System.err
+          .println("\t-normalize\tuse URLNormalizer on urls in the crawldb(s) (usually not needed)");
       System.err.println("\t-filter\tuse URLFilters on urls in the crawldb(s)");
       return -1;
     }
@@ -183,8 +200,8 @@
         continue;
       }
       final Path dbPath = new Path(args[i]);
-      if(fs.exists(dbPath))
-       dbs.add(dbPath);
+      if (fs.exists(dbPath))
+        dbs.add(dbPath);
     }
     try {
       merge(output, dbs.toArray(new Path[dbs.size()]), normalize, filter);
Index: src/java/org/apache/nutch/crawl/URLPartitioner.java
===================================================================
--- src/java/org/apache/nutch/crawl/URLPartitioner.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/URLPartitioner.java	(working copy)
@@ -33,8 +33,9 @@
  * Partition urls by host, domain name or IP depending on the value of the
  * parameter 'partition.url.mode' which can be 'byHost', 'byDomain' or 'byIP'
  */
-public class URLPartitioner implements Partitioner<Text,Writable> {
-  private static final Logger LOG = LoggerFactory.getLogger(URLPartitioner.class);
+public class URLPartitioner implements Partitioner<Text, Writable> {
+  private static final Logger LOG = LoggerFactory
+      .getLogger(URLPartitioner.class);
 
   public static final String PARTITION_MODE_KEY = "partition.url.mode";
 
@@ -58,7 +59,8 @@
     normalizers = new URLNormalizers(job, URLNormalizers.SCOPE_PARTITION);
   }
 
-  public void close() {}
+  public void close() {
+  }
 
   /** Hash by domain name. */
   public int getPartition(Text key, Writable value, int numReduceTasks) {
@@ -66,15 +68,16 @@
     URL url = null;
     int hashCode = urlString.hashCode();
     try {
-      urlString = normalizers.normalize(urlString, URLNormalizers.SCOPE_PARTITION);
+      urlString = normalizers.normalize(urlString,
+          URLNormalizers.SCOPE_PARTITION);
       url = new URL(urlString);
       hashCode = url.getHost().hashCode();
     } catch (MalformedURLException e) {
       LOG.warn("Malformed URL: '" + urlString + "'");
     }
 
-    if (mode.equals(PARTITION_MODE_DOMAIN) && url != null) hashCode = URLUtil
-        .getDomainName(url).hashCode();
+    if (mode.equals(PARTITION_MODE_DOMAIN) && url != null)
+      hashCode = URLUtil.getDomainName(url).hashCode();
     else if (mode.equals(PARTITION_MODE_IP)) {
       try {
         InetAddress address = InetAddress.getByName(url.getHost());
Index: src/java/org/apache/nutch/crawl/AdaptiveFetchSchedule.java
===================================================================
--- src/java/org/apache/nutch/crawl/AdaptiveFetchSchedule.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/AdaptiveFetchSchedule.java	(working copy)
@@ -32,11 +32,12 @@
  * If SYNC_DELTA property is true, then:
  * <ul>
  * <li>calculate a <code>delta = fetchTime - modifiedTime</code></li>
- * <li>try to synchronize with the time of change, by shifting the next fetchTime
- * by a fraction of the difference between the last modification time and the last
- * fetch time. I.e. the next fetch time will be set to
+ * <li>try to synchronize with the time of change, by shifting the next
+ * fetchTime by a fraction of the difference between the last modification time
+ * and the last fetch time. I.e. the next fetch time will be set to
  * <code>fetchTime + fetchInterval - delta * SYNC_DELTA_RATE</code></li>
- * <li>if the adjusted fetch interval is bigger than the delta, then <code>fetchInterval = delta</code>.</li>
+ * <li>if the adjusted fetch interval is bigger than the delta, then
+ * <code>fetchInterval = delta</code>.</li>
  * </ul>
  * </li>
  * <li>the minimum value of fetchInterval may not be smaller than MIN_INTERVAL
@@ -44,10 +45,13 @@
  * <li>the maximum value of fetchInterval may not be bigger than MAX_INTERVAL
  * (default is 365 days).</li>
  * </ul>
- * <p>NOTE: values of DEC_FACTOR and INC_FACTOR higher than 0.4f may destabilize the algorithm,
- * so that the fetch interval either increases or decreases infinitely, with little
- * relevance to the page changes. Please use {@link #main(String[])} method to
- * test the values before applying them in a production system.</p>
+ * <p>
+ * NOTE: values of DEC_FACTOR and INC_FACTOR higher than 0.4f may destabilize
+ * the algorithm, so that the fetch interval either increases or decreases
+ * infinitely, with little relevance to the page changes. Please use
+ * {@link #main(String[])} method to test the values before applying them in a
+ * production system.
+ * </p>
  * 
  * @author Andrzej Bialecki
  */
@@ -60,45 +64,50 @@
   private int MAX_INTERVAL;
 
   private int MIN_INTERVAL;
-  
+
   private boolean SYNC_DELTA;
 
   private double SYNC_DELTA_RATE;
-  
+
   public void setConf(Configuration conf) {
     super.setConf(conf);
-    if (conf == null) return;
+    if (conf == null)
+      return;
     INC_RATE = conf.getFloat("db.fetch.schedule.adaptive.inc_rate", 0.2f);
     DEC_RATE = conf.getFloat("db.fetch.schedule.adaptive.dec_rate", 0.2f);
     MIN_INTERVAL = conf.getInt("db.fetch.schedule.adaptive.min_interval", 60);
-    MAX_INTERVAL = conf.getInt("db.fetch.schedule.adaptive.max_interval", SECONDS_PER_DAY * 365 ); // 1 year
+    MAX_INTERVAL = conf.getInt("db.fetch.schedule.adaptive.max_interval",
+        SECONDS_PER_DAY * 365); // 1 year
     SYNC_DELTA = conf.getBoolean("db.fetch.schedule.adaptive.sync_delta", true);
-    SYNC_DELTA_RATE = conf.getFloat("db.fetch.schedule.adaptive.sync_delta_rate", 0.2f);
+    SYNC_DELTA_RATE = conf.getFloat(
+        "db.fetch.schedule.adaptive.sync_delta_rate", 0.2f);
   }
 
   @Override
   public CrawlDatum setFetchSchedule(Text url, CrawlDatum datum,
-          long prevFetchTime, long prevModifiedTime,
-          long fetchTime, long modifiedTime, int state) {
+      long prevFetchTime, long prevModifiedTime, long fetchTime,
+      long modifiedTime, int state) {
     super.setFetchSchedule(url, datum, prevFetchTime, prevModifiedTime,
         fetchTime, modifiedTime, state);
     long refTime = fetchTime;
-    if (modifiedTime <= 0) modifiedTime = fetchTime;
+    if (modifiedTime <= 0)
+      modifiedTime = fetchTime;
     float interval = datum.getFetchInterval();
     switch (state) {
-      case FetchSchedule.STATUS_MODIFIED:
-        interval *= (1.0f - DEC_RATE);
-        break;
-      case FetchSchedule.STATUS_NOTMODIFIED:
-        interval *= (1.0f + INC_RATE);
-        break;
-      case FetchSchedule.STATUS_UNKNOWN:
-        break;
+    case FetchSchedule.STATUS_MODIFIED:
+      interval *= (1.0f - DEC_RATE);
+      break;
+    case FetchSchedule.STATUS_NOTMODIFIED:
+      interval *= (1.0f + INC_RATE);
+      break;
+    case FetchSchedule.STATUS_UNKNOWN:
+      break;
     }
     if (SYNC_DELTA) {
       // try to synchronize with the time of change
       long delta = (fetchTime - modifiedTime) / 1000L;
-      if (delta > interval) interval = delta;
+      if (delta > interval)
+        interval = delta;
       refTime = fetchTime - Math.round(delta * SYNC_DELTA_RATE * 1000);
     }
     if (interval < MIN_INTERVAL) {
@@ -134,30 +143,39 @@
     // let's move the timeline a couple of deltas
     for (int i = 0; i < 10000; i++) {
       if (lastModified + update < curTime) {
-        //System.out.println("i=" + i + ", lastModified=" + lastModified + ", update=" + update + ", curTime=" + curTime);
+        // System.out.println("i=" + i + ", lastModified=" + lastModified +
+        // ", update=" + update + ", curTime=" + curTime);
         changed = true;
         changeCnt++;
         lastModified = curTime;
       }
-      System.out.println(i + ". " + changed + "\twill fetch at " + (p.getFetchTime() / delta) + "\tinterval "
-              + (p.getFetchInterval() / SECONDS_PER_DAY ) + " days" + "\t missed " + miss);
+      System.out.println(i + ". " + changed + "\twill fetch at "
+          + (p.getFetchTime() / delta) + "\tinterval "
+          + (p.getFetchInterval() / SECONDS_PER_DAY) + " days" + "\t missed "
+          + miss);
       if (p.getFetchTime() <= curTime) {
         fetchCnt++;
-        fs.setFetchSchedule(new Text("http://www.example.com"), p,
-                p.getFetchTime(), p.getModifiedTime(), curTime, lastModified,
-                changed ? FetchSchedule.STATUS_MODIFIED : FetchSchedule.STATUS_NOTMODIFIED);
-        System.out.println("\tfetched & adjusted: " + "\twill fetch at " + (p.getFetchTime() / delta) + "\tinterval "
-                + (p.getFetchInterval() / SECONDS_PER_DAY ) + " days");
-        if (!changed) miss++;
-        if (miss > maxMiss) maxMiss = miss;
+        fs.setFetchSchedule(new Text("http://www.example.com"), p, p
+            .getFetchTime(), p.getModifiedTime(), curTime, lastModified,
+            changed ? FetchSchedule.STATUS_MODIFIED
+                : FetchSchedule.STATUS_NOTMODIFIED);
+        System.out.println("\tfetched & adjusted: " + "\twill fetch at "
+            + (p.getFetchTime() / delta) + "\tinterval "
+            + (p.getFetchInterval() / SECONDS_PER_DAY) + " days");
+        if (!changed)
+          miss++;
+        if (miss > maxMiss)
+          maxMiss = miss;
         changed = false;
         totalMiss += miss;
         miss = 0;
       }
-      if (changed) miss++;
+      if (changed)
+        miss++;
       curTime += delta;
     }
     System.out.println("Total missed: " + totalMiss + ", max miss: " + maxMiss);
-    System.out.println("Page changed " + changeCnt + " times, fetched " + fetchCnt + " times.");
+    System.out.println("Page changed " + changeCnt + " times, fetched "
+        + fetchCnt + " times.");
   }
 }
Index: src/java/org/apache/nutch/crawl/Inlink.java
===================================================================
--- src/java/org/apache/nutch/crawl/Inlink.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/Inlink.java	(working copy)
@@ -26,7 +26,8 @@
   private String fromUrl;
   private String anchor;
 
-  public Inlink() {}
+  public Inlink() {
+  }
 
   public Inlink(String fromUrl, String anchor) {
     this.fromUrl = fromUrl;
@@ -40,8 +41,8 @@
 
   /** Skips over one Inlink in the input. */
   public static void skip(DataInput in) throws IOException {
-    Text.skip(in);                                // skip fromUrl
-    Text.skip(in);                                // skip anchor
+    Text.skip(in); // skip fromUrl
+    Text.skip(in); // skip anchor
   }
 
   public void write(DataOutput out) throws IOException {
@@ -55,16 +56,20 @@
     return inlink;
   }
 
-  public String getFromUrl() { return fromUrl; }
-  public String getAnchor() { return anchor; }
+  public String getFromUrl() {
+    return fromUrl;
+  }
 
+  public String getAnchor() {
+    return anchor;
+  }
+
   public boolean equals(Object o) {
     if (!(o instanceof Inlink))
       return false;
-    Inlink other = (Inlink)o;
-    return
-      this.fromUrl.equals(other.fromUrl) &&
-      this.anchor.equals(other.anchor);
+    Inlink other = (Inlink) o;
+    return this.fromUrl.equals(other.fromUrl)
+        && this.anchor.equals(other.anchor);
   }
 
   public int hashCode() {
Index: src/java/org/apache/nutch/crawl/CrawlDbFilter.java
===================================================================
--- src/java/org/apache/nutch/crawl/CrawlDbFilter.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/CrawlDbFilter.java	(working copy)
@@ -30,12 +30,13 @@
 import org.apache.nutch.net.URLNormalizers;
 
 /**
- * This class provides a way to separate the URL normalization
- * and filtering steps from the rest of CrawlDb manipulation code.
+ * This class provides a way to separate the URL normalization and filtering
+ * steps from the rest of CrawlDb manipulation code.
  * 
  * @author Andrzej Bialecki
  */
-public class CrawlDbFilter implements Mapper<Text, CrawlDatum, Text, CrawlDatum> {
+public class CrawlDbFilter implements
+    Mapper<Text, CrawlDatum, Text, CrawlDatum> {
   public static final String URL_FILTERING = "crawldb.url.filters";
 
   public static final String URL_NORMALIZING = "crawldb.url.normalizers";
@@ -51,7 +52,7 @@
   private URLFilters filters;
 
   private URLNormalizers normalizers;
-  
+
   private String scope;
 
   public static final Logger LOG = LoggerFactory.getLogger(CrawlDbFilter.class);
@@ -70,17 +71,19 @@
     }
   }
 
-  public void close() {}
-  
+  public void close() {
+  }
+
   private Text newKey = new Text();
 
   public void map(Text key, CrawlDatum value,
-      OutputCollector<Text, CrawlDatum> output,
-      Reporter reporter) throws IOException {
+      OutputCollector<Text, CrawlDatum> output, Reporter reporter)
+      throws IOException {
 
     String url = key.toString();
 
-    // https://issues.apache.org/jira/browse/NUTCH-1101 check status first, cheaper than normalizing or filtering
+    // https://issues.apache.org/jira/browse/NUTCH-1101 check status first,
+    // cheaper than normalizing or filtering
     if (url404Purging && CrawlDatum.STATUS_DB_GONE == value.getStatus()) {
       url = null;
     }
Index: src/java/org/apache/nutch/crawl/Generator.java
===================================================================
--- src/java/org/apache/nutch/crawl/Generator.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/Generator.java	(working copy)
@@ -51,9 +51,9 @@
  * Generates a subset of a crawl db to fetch. This version allows to generate
  * fetchlists for several segments in one go. Unlike in the initial version
  * (OldGenerator), the IP resolution is done ONLY on the entries which have been
- * selected for fetching. The URLs are partitioned by IP, domain or host within a 
- * segment. We can chose separately how to count the URLS i.e. by domain or host
- * to limit the entries.
+ * selected for fetching. The URLs are partitioned by IP, domain or host within
+ * a segment. We can chose separately how to count the URLS i.e. by domain or
+ * host to limit the entries.
  **/
 public class Generator extends Configured implements Tool {
 
@@ -71,8 +71,8 @@
   public static final String GENERATOR_CUR_TIME = "generate.curTime";
   public static final String GENERATOR_DELAY = "crawl.gen.delay";
   public static final String GENERATOR_MAX_NUM_SEGMENTS = "generate.max.num.segments";
-  
-  // deprecated parameters 
+
+  // deprecated parameters
   public static final String GENERATE_MAX_PER_HOST_BY_IP = "generate.max.per.host.by.ip";
   public static final String GENERATE_MAX_PER_HOST = "generate.max.per.host";
 
@@ -100,25 +100,25 @@
     }
 
     public String toString() {
-      return "url=" + url.toString() + ", datum=" + datum.toString() + ", segnum="
-          + segnum.toString();
+      return "url=" + url.toString() + ", datum=" + datum.toString()
+          + ", segnum=" + segnum.toString();
     }
   }
 
   /** Selects entries due for fetch. */
   public static class Selector implements
-      Mapper<Text,CrawlDatum,FloatWritable,SelectorEntry>,
-      Partitioner<FloatWritable,Writable>,
-      Reducer<FloatWritable,SelectorEntry,FloatWritable,SelectorEntry> {
+      Mapper<Text, CrawlDatum, FloatWritable, SelectorEntry>,
+      Partitioner<FloatWritable, Writable>,
+      Reducer<FloatWritable, SelectorEntry, FloatWritable, SelectorEntry> {
     private LongWritable genTime = new LongWritable(System.currentTimeMillis());
     private long curTime;
     private long limit;
     private long count;
-    private HashMap<String,int[]> hostCounts = new HashMap<String,int[]>();
+    private HashMap<String, int[]> hostCounts = new HashMap<String, int[]>();
     private int segCounts[];
     private int maxCount;
     private boolean byDomain = false;
-    private Partitioner<Text,Writable> partitioner = new URLPartitioner();
+    private Partitioner<Text, Writable> partitioner = new URLPartitioner();
     private URLFilters filters;
     private URLNormalizers normalizers;
     private ScoringFilters scfilters;
@@ -134,46 +134,53 @@
 
     public void configure(JobConf job) {
       curTime = job.getLong(GENERATOR_CUR_TIME, System.currentTimeMillis());
-      limit = job.getLong(GENERATOR_TOP_N, Long.MAX_VALUE) / job.getNumReduceTasks();
+      limit = job.getLong(GENERATOR_TOP_N, Long.MAX_VALUE)
+          / job.getNumReduceTasks();
       maxCount = job.getInt(GENERATOR_MAX_COUNT, -1);
       // back compatibility with old param
       int oldMaxPerHost = job.getInt(GENERATE_MAX_PER_HOST, -1);
-      if (maxCount==-1 && oldMaxPerHost!=-1){
+      if (maxCount == -1 && oldMaxPerHost != -1) {
         maxCount = oldMaxPerHost;
         byDomain = false;
       }
-      if (GENERATOR_COUNT_VALUE_DOMAIN.equals(job.get(GENERATOR_COUNT_MODE))) byDomain = true;
+      if (GENERATOR_COUNT_VALUE_DOMAIN.equals(job.get(GENERATOR_COUNT_MODE)))
+        byDomain = true;
       filters = new URLFilters(job);
       normalise = job.getBoolean(GENERATOR_NORMALISE, true);
-      if (normalise) normalizers = new URLNormalizers(job,
-          URLNormalizers.SCOPE_GENERATE_HOST_COUNT);
+      if (normalise)
+        normalizers = new URLNormalizers(job,
+            URLNormalizers.SCOPE_GENERATE_HOST_COUNT);
       scfilters = new ScoringFilters(job);
       partitioner.configure(job);
       filter = job.getBoolean(GENERATOR_FILTER, true);
       genDelay = job.getLong(GENERATOR_DELAY, 7L) * 3600L * 24L * 1000L;
       long time = job.getLong(Nutch.GENERATE_TIME_KEY, 0L);
-      if (time > 0) genTime.set(time);
+      if (time > 0)
+        genTime.set(time);
       schedule = FetchScheduleFactory.getFetchSchedule(job);
       scoreThreshold = job.getFloat(GENERATOR_MIN_SCORE, Float.NaN);
       maxNumSegments = job.getInt(GENERATOR_MAX_NUM_SEGMENTS, 1);
       segCounts = new int[maxNumSegments];
     }
 
-    public void close() {}
+    public void close() {
+    }
 
     /** Select & invert subset due for fetch. */
     public void map(Text key, CrawlDatum value,
-        OutputCollector<FloatWritable,SelectorEntry> output, Reporter reporter)
+        OutputCollector<FloatWritable, SelectorEntry> output, Reporter reporter)
         throws IOException {
       Text url = key;
       if (filter) {
         // If filtering is on don't generate URLs that don't pass
         // URLFilters
         try {
-          if (filters.filter(url.toString()) == null) return;
+          if (filters.filter(url.toString()) == null)
+            return;
         } catch (URLFilterException e) {
           if (LOG.isWarnEnabled()) {
-            LOG.warn("Couldn't filter url: " + url + " (" + e.getMessage() + ")");
+            LOG.warn("Couldn't filter url: " + url + " (" + e.getMessage()
+                + ")");
           }
         }
       }
@@ -190,8 +197,8 @@
           Nutch.WRITABLE_GENERATE_TIME_KEY);
       if (oldGenTime != null) { // awaiting fetch & update
         if (oldGenTime.get() + genDelay > curTime) // still wait for
-        // update
-        return;
+          // update
+          return;
       }
       float sort = 1.0f;
       try {
@@ -203,7 +210,8 @@
       }
 
       // consider only entries with a score superior to the threshold
-      if (scoreThreshold != Float.NaN && sort < scoreThreshold) return;
+      if (scoreThreshold != Float.NaN && sort < scoreThreshold)
+        return;
 
       // sort by decreasing score, using DecreasingFloatComparator
       sortValue.set(sort);
@@ -215,13 +223,15 @@
     }
 
     /** Partition by host / domain or IP. */
-    public int getPartition(FloatWritable key, Writable value, int numReduceTasks) {
-      return partitioner.getPartition(((SelectorEntry) value).url, key, numReduceTasks);
+    public int getPartition(FloatWritable key, Writable value,
+        int numReduceTasks) {
+      return partitioner.getPartition(((SelectorEntry) value).url, key,
+          numReduceTasks);
     }
 
     /** Collect until limit is reached. */
     public void reduce(FloatWritable key, Iterator<SelectorEntry> values,
-        OutputCollector<FloatWritable,SelectorEntry> output, Reporter reporter)
+        OutputCollector<FloatWritable, SelectorEntry> output, Reporter reporter)
         throws IOException {
 
       while (values.hasNext()) {
@@ -231,7 +241,8 @@
           if (currentsegmentnum < maxNumSegments) {
             count = 0;
             currentsegmentnum++;
-          } else break;
+          } else
+            break;
         }
 
         SelectorEntry entry = values.next();
@@ -264,7 +275,7 @@
         if (maxCount > 0) {
           int[] hostCount = hostCounts.get(hostordomain);
           if (hostCount == null) {
-            hostCount = new int[] {1, 0};
+            hostCount = new int[] { 1, 0 };
             hostCounts.put(hostordomain, hostCount);
           }
 
@@ -272,7 +283,8 @@
           hostCount[1]++;
 
           // check if topN reached, select next segment if it is
-          while (segCounts[hostCount[0]-1] >= limit && hostCount[0] < maxNumSegments) {
+          while (segCounts[hostCount[0] - 1] >= limit
+              && hostCount[0] < maxNumSegments) {
             hostCount[0]++;
             hostCount[1] = 0;
           }
@@ -285,18 +297,23 @@
               hostCount[1] = 0;
             } else {
               if (hostCount[1] == maxCount + 1 && LOG.isInfoEnabled()) {
-                LOG.info("Host or domain " + hostordomain + " has more than " + maxCount
-                    + " URLs for all " + maxNumSegments + " segments. Additional URLs won't be included in the fetchlist.");
+                LOG.info("Host or domain "
+                    + hostordomain
+                    + " has more than "
+                    + maxCount
+                    + " URLs for all "
+                    + maxNumSegments
+                    + " segments. Additional URLs won't be included in the fetchlist.");
               }
               // skip this entry
               continue;
             }
           }
           entry.segnum = new IntWritable(hostCount[0]);
-          segCounts[hostCount[0]-1]++;
+          segCounts[hostCount[0] - 1]++;
         } else {
           entry.segnum = new IntWritable(currentsegmentnum);
-          segCounts[currentsegmentnum-1]++;
+          segCounts[currentsegmentnum - 1]++;
         }
 
         output.collect(key, entry);
@@ -310,16 +327,17 @@
 
   // Allows the reducers to generate one subfile per
   public static class GeneratorOutputFormat extends
-      MultipleSequenceFileOutputFormat<FloatWritable,SelectorEntry> {
+      MultipleSequenceFileOutputFormat<FloatWritable, SelectorEntry> {
     // generate a filename based on the segnum stored for this entry
-    protected String generateFileNameForKeyValue(FloatWritable key, SelectorEntry value,
-        String name) {
+    protected String generateFileNameForKeyValue(FloatWritable key,
+        SelectorEntry value, String name) {
       return "fetchlist-" + value.segnum.toString() + "/" + name;
     }
 
   }
 
-  public static class DecreasingFloatComparator extends FloatWritable.Comparator {
+  public static class DecreasingFloatComparator extends
+      FloatWritable.Comparator {
 
     /** Compares two FloatWritables decreasing. */
     public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
@@ -328,20 +346,22 @@
   }
 
   public static class SelectorInverseMapper extends MapReduceBase implements
-      Mapper<FloatWritable,SelectorEntry,Text,SelectorEntry> {
+      Mapper<FloatWritable, SelectorEntry, Text, SelectorEntry> {
 
     public void map(FloatWritable key, SelectorEntry value,
-        OutputCollector<Text,SelectorEntry> output, Reporter reporter) throws IOException {
+        OutputCollector<Text, SelectorEntry> output, Reporter reporter)
+        throws IOException {
       SelectorEntry entry = (SelectorEntry) value;
       output.collect(entry.url, entry);
     }
   }
 
   public static class PartitionReducer extends MapReduceBase implements
-      Reducer<Text,SelectorEntry,Text,CrawlDatum> {
+      Reducer<Text, SelectorEntry, Text, CrawlDatum> {
 
     public void reduce(Text key, Iterator<SelectorEntry> values,
-        OutputCollector<Text,CrawlDatum> output, Reporter reporter) throws IOException {
+        OutputCollector<Text, CrawlDatum> output, Reporter reporter)
+        throws IOException {
       // if using HashComparator, we get only one input key in case of
       // hash collision
       // so use only URLs from values
@@ -388,15 +408,17 @@
    * Update the CrawlDB so that the next generate won't include the same URLs.
    */
   public static class CrawlDbUpdater extends MapReduceBase implements
-      Mapper<Text,CrawlDatum,Text,CrawlDatum>, Reducer<Text,CrawlDatum,Text,CrawlDatum> {
+      Mapper<Text, CrawlDatum, Text, CrawlDatum>,
+      Reducer<Text, CrawlDatum, Text, CrawlDatum> {
     long generateTime;
 
     public void configure(JobConf job) {
       generateTime = job.getLong(Nutch.GENERATE_TIME_KEY, 0L);
     }
 
-    public void map(Text key, CrawlDatum value, OutputCollector<Text,CrawlDatum> output,
-        Reporter reporter) throws IOException {
+    public void map(Text key, CrawlDatum value,
+        OutputCollector<Text, CrawlDatum> output, Reporter reporter)
+        throws IOException {
       output.collect(key, value);
     }
 
@@ -404,7 +426,8 @@
     private LongWritable genTime = new LongWritable(0L);
 
     public void reduce(Text key, Iterator<CrawlDatum> values,
-        OutputCollector<Text,CrawlDatum> output, Reporter reporter) throws IOException {
+        OutputCollector<Text, CrawlDatum> output, Reporter reporter)
+        throws IOException {
       genTime.set(0L);
       while (values.hasNext()) {
         CrawlDatum val = values.next();
@@ -428,19 +451,21 @@
     }
   }
 
-  public Generator() {}
+  public Generator() {
+  }
 
   public Generator(Configuration conf) {
     setConf(conf);
   }
 
-  public Path[] generate(Path dbDir, Path segments, int numLists, long topN, long curTime)
-      throws IOException {
+  public Path[] generate(Path dbDir, Path segments, int numLists, long topN,
+      long curTime) throws IOException {
 
     JobConf job = new NutchJob(getConf());
     boolean filter = job.getBoolean(GENERATOR_FILTER, true);
     boolean normalise = job.getBoolean(GENERATOR_NORMALISE, true);
-    return generate(dbDir, segments, numLists, topN, curTime, filter, normalise, false, 1);
+    return generate(dbDir, segments, numLists, topN, curTime, filter,
+        normalise, false, 1);
   }
 
   /**
@@ -449,7 +474,8 @@
    **/
   public Path[] generate(Path dbDir, Path segments, int numLists, long topN,
       long curTime, boolean filter, boolean force) throws IOException {
-    return generate(dbDir, segments, numLists, topN, curTime, filter, true, force, 1);
+    return generate(dbDir, segments, numLists, topN, curTime, filter, true,
+        force, 1);
   }
 
   /**
@@ -475,11 +501,11 @@
    *           When an I/O error occurs
    */
   public Path[] generate(Path dbDir, Path segments, int numLists, long topN,
-      long curTime, boolean filter, boolean norm, boolean force, int maxNumSegments)
-      throws IOException {
+      long curTime, boolean filter, boolean norm, boolean force,
+      int maxNumSegments) throws IOException {
 
-    Path tempDir = new Path(getConf().get("mapred.temp.dir", ".") + "/generate-temp-"
-        + System.currentTimeMillis());
+    Path tempDir = new Path(getConf().get("mapred.temp.dir", ".")
+        + "/generate-temp-" + System.currentTimeMillis());
 
     Path lock = new Path(dbDir, CrawlDb.LOCK_NAME);
     FileSystem fs = FileSystem.get(getConf());
@@ -494,8 +520,8 @@
     if (topN != Long.MAX_VALUE) {
       LOG.info("Generator: topN: " + topN);
     }
-    
-    if ("true".equals(getConf().get(GENERATE_MAX_PER_HOST_BY_IP))){
+
+    if ("true".equals(getConf().get(GENERATE_MAX_PER_HOST_BY_IP))) {
       LOG.info("Generator: GENERATE_MAX_PER_HOST_BY_IP will be ignored, use partition.url.mode instead");
     }
 
@@ -548,7 +574,8 @@
     try {
       for (FileStatus stat : status) {
         Path subfetchlist = stat.getPath();
-        if (!subfetchlist.getName().startsWith("fetchlist-")) continue;
+        if (!subfetchlist.getName().startsWith("fetchlist-"))
+          continue;
         // start a new partition job for this segment
         Path newSeg = partitionSegment(fs, segments, subfetchlist, numLists);
         generatedSegments.add(newSeg);
@@ -568,8 +595,8 @@
 
     if (getConf().getBoolean(GENERATE_UPDATE_CRAWLDB, false)) {
       // update the db from tempDir
-      Path tempDir2 = new Path(getConf().get("mapred.temp.dir", ".") + "/generate-temp-"
-          + System.currentTimeMillis());
+      Path tempDir2 = new Path(getConf().get("mapred.temp.dir", ".")
+          + "/generate-temp-" + System.currentTimeMillis());
 
       job = new NutchJob(getConf());
       job.setJobName("generate: updatedb " + dbDir);
@@ -602,7 +629,8 @@
     fs.delete(tempDir, true);
 
     long end = System.currentTimeMillis();
-    LOG.info("Generator: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    LOG.info("Generator: finished at " + sdf.format(end) + ", elapsed: "
+        + TimingUtil.elapsedTime(start, end));
 
     Path[] patharray = new Path[generatedSegments.size()];
     return generatedSegments.toArray(patharray);
@@ -648,7 +676,8 @@
   public static synchronized String generateSegmentName() {
     try {
       Thread.sleep(1000);
-    } catch (Throwable t) {}
+    } catch (Throwable t) {
+    }
     ;
     return sdf.format(new Date(System.currentTimeMillis()));
   }
@@ -657,7 +686,8 @@
    * Generate a fetchlist from the crawldb.
    */
   public static void main(String args[]) throws Exception {
-    int res = ToolRunner.run(NutchConfiguration.create(), new Generator(), args);
+    int res = ToolRunner
+        .run(NutchConfiguration.create(), new Generator(), args);
     System.exit(res);
   }
 
@@ -701,9 +731,10 @@
     }
 
     try {
-      Path[] segs = generate(dbDir, segmentsDir, numFetchers, topN, curTime, filter,
-          norm, force, maxNumSegments);
-      if (segs == null) return -1;
+      Path[] segs = generate(dbDir, segmentsDir, numFetchers, topN, curTime,
+          filter, norm, force, maxNumSegments);
+      if (segs == null)
+        return -1;
     } catch (Exception e) {
       LOG.error("Generator: " + StringUtils.stringifyException(e));
       return -1;
Index: src/java/org/apache/nutch/crawl/LinkDbReader.java
===================================================================
--- src/java/org/apache/nutch/crawl/LinkDbReader.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/LinkDbReader.java	(working copy)
@@ -49,14 +49,14 @@
   private MapFile.Reader[] readers;
 
   public LinkDbReader() {
-    
+
   }
-  
+
   public LinkDbReader(Configuration conf, Path directory) throws Exception {
     setConf(conf);
     init(directory);
   }
-  
+
   public void init(Path directory) throws Exception {
     this.fs = FileSystem.get(getConf());
     this.directory = directory;
@@ -72,16 +72,16 @@
   public Inlinks getInlinks(Text url) throws IOException {
 
     if (readers == null) {
-      synchronized(this) {
-        readers = MapFileOutputFormat.getReaders
-          (fs, new Path(directory, LinkDb.CURRENT_NAME), getConf());
+      synchronized (this) {
+        readers = MapFileOutputFormat.getReaders(fs, new Path(directory,
+            LinkDb.CURRENT_NAME), getConf());
       }
     }
-    
-    return (Inlinks)MapFileOutputFormat.getEntry
-      (readers, PARTITIONER, url, new Inlinks());
+
+    return (Inlinks) MapFileOutputFormat.getEntry(readers, PARTITIONER, url,
+        new Inlinks());
   }
-  
+
   public void close() throws IOException {
     if (readers != null) {
       for (int i = 0; i < readers.length; i++) {
@@ -89,7 +89,7 @@
       }
     }
   }
-  
+
   public void processDumpJob(String linkdb, String output) throws IOException {
     SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
     long start = System.currentTimeMillis();
@@ -113,19 +113,24 @@
     JobClient.runJob(job);
 
     long end = System.currentTimeMillis();
-    LOG.info("LinkDb dump: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    LOG.info("LinkDb dump: finished at " + sdf.format(end) + ", elapsed: "
+        + TimingUtil.elapsedTime(start, end));
   }
-  
+
   public static void main(String[] args) throws Exception {
-    int res = ToolRunner.run(NutchConfiguration.create(), new LinkDbReader(), args);
+    int res = ToolRunner.run(NutchConfiguration.create(), new LinkDbReader(),
+        args);
     System.exit(res);
   }
-  
+
   public int run(String[] args) throws Exception {
     if (args.length < 2) {
-      System.err.println("Usage: LinkDbReader <linkdb> (-dump <out_dir> | -url <url>)");
-      System.err.println("\t-dump <out_dir>\tdump whole link db to a text file in <out_dir>");
-      System.err.println("\t-url <url>\tprint information about <url> to System.out");
+      System.err
+          .println("Usage: LinkDbReader <linkdb> (-dump <out_dir> | -url <url>)");
+      System.err
+          .println("\t-dump <out_dir>\tdump whole link db to a text file in <out_dir>");
+      System.err
+          .println("\t-url <url>\tprint information about <url> to System.out");
       return -1;
     }
     try {
Index: src/java/org/apache/nutch/crawl/TextProfileSignature.java
===================================================================
--- src/java/org/apache/nutch/crawl/TextProfileSignature.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/TextProfileSignature.java	(working copy)
@@ -35,41 +35,50 @@
 import org.apache.nutch.util.NutchConfiguration;
 
 /**
- * <p>An implementation of a page signature. It calculates an MD5 hash
- * of a plain text "profile" of a page. In case there is no text, it
- * calculates a hash using the {@link MD5Signature}.</p>
- * <p>The algorithm to calculate a page "profile" takes the plain text version of
- * a page and performs the following steps:
+ * <p>
+ * An implementation of a page signature. It calculates an MD5 hash of a plain
+ * text "profile" of a page. In case there is no text, it calculates a hash
+ * using the {@link MD5Signature}.
+ * </p>
+ * <p>
+ * The algorithm to calculate a page "profile" takes the plain text version of a
+ * page and performs the following steps:
  * <ul>
  * <li>remove all characters except letters and digits, and bring all characters
  * to lower case,</li>
  * <li>split the text into tokens (all consecutive non-whitespace characters),</li>
- * <li>discard tokens equal or shorter than MIN_TOKEN_LEN (default 2 characters),</li>
+ * <li>discard tokens equal or shorter than MIN_TOKEN_LEN (default 2
+ * characters),</li>
  * <li>sort the list of tokens by decreasing frequency,</li>
- * <li>round down the counts of tokens to the nearest multiple of QUANT
- * (<code>QUANT = QUANT_RATE * maxFreq</code>, where <code>QUANT_RATE</code> is 0.01f
- * by default, and <code>maxFreq</code> is the maximum token frequency). If
- * <code>maxFreq</code> is higher than 1, then QUANT is always higher than 2 (which
- * means that tokens with frequency 1 are always discarded).</li>
- * <li>tokens, which frequency after quantization falls below QUANT, are discarded.</li>
- * <li>create a list of tokens and their quantized frequency, separated by spaces,
- * in the order of decreasing frequency.</li>
+ * <li>round down the counts of tokens to the nearest multiple of QUANT (
+ * <code>QUANT = QUANT_RATE * maxFreq</code>, where <code>QUANT_RATE</code> is
+ * 0.01f by default, and <code>maxFreq</code> is the maximum token frequency).
+ * If <code>maxFreq</code> is higher than 1, then QUANT is always higher than 2
+ * (which means that tokens with frequency 1 are always discarded).</li>
+ * <li>tokens, which frequency after quantization falls below QUANT, are
+ * discarded.</li>
+ * <li>create a list of tokens and their quantized frequency, separated by
+ * spaces, in the order of decreasing frequency.</li>
  * </ul>
  * This list is then submitted to an MD5 hash calculation.
  * 
  * @author Andrzej Bialecki &lt;ab@getopt.org&gt;
  */
 public class TextProfileSignature extends Signature {
-  
+
   Signature fallback = new MD5Signature();
 
   public byte[] calculate(Content content, Parse parse) {
-    int MIN_TOKEN_LEN = getConf().getInt("db.signature.text_profile.min_token_len", 2);
-    float QUANT_RATE = getConf().getFloat("db.signature.text_profile.quant_rate", 0.01f);
+    int MIN_TOKEN_LEN = getConf().getInt(
+        "db.signature.text_profile.min_token_len", 2);
+    float QUANT_RATE = getConf().getFloat(
+        "db.signature.text_profile.quant_rate", 0.01f);
     HashMap<String, Token> tokens = new HashMap<String, Token>();
     String text = null;
-    if (parse != null) text = parse.getText();
-    if (text == null || text.length() == 0) return fallback.calculate(content, parse);
+    if (parse != null)
+      text = parse.getText();
+    if (text == null || text.length() == 0)
+      return fallback.calculate(content, parse);
     StringBuffer curToken = new StringBuffer();
     int maxFreq = 0;
     for (int i = 0; i < text.length(); i++) {
@@ -87,7 +96,8 @@
               tokens.put(s, tok);
             }
             tok.cnt++;
-            if (tok.cnt > maxFreq) maxFreq = tok.cnt;
+            if (tok.cnt > maxFreq)
+              maxFreq = tok.cnt;
           }
           curToken.setLength(0);
         }
@@ -103,17 +113,20 @@
         tokens.put(s, tok);
       }
       tok.cnt++;
-      if (tok.cnt > maxFreq) maxFreq = tok.cnt;
+      if (tok.cnt > maxFreq)
+        maxFreq = tok.cnt;
     }
     Iterator<Token> it = tokens.values().iterator();
     ArrayList<Token> profile = new ArrayList<Token>();
     // calculate the QUANT value
     int QUANT = Math.round(maxFreq * QUANT_RATE);
     if (QUANT < 2) {
-      if (maxFreq > 1) QUANT = 2;
-      else QUANT = 1;
+      if (maxFreq > 1)
+        QUANT = 2;
+      else
+        QUANT = 1;
     }
-    while(it.hasNext()) {
+    while (it.hasNext()) {
       Token t = it.next();
       // round down to the nearest QUANT
       t.cnt = (t.cnt / QUANT) * QUANT;
@@ -128,32 +141,33 @@
     it = profile.iterator();
     while (it.hasNext()) {
       Token t = it.next();
-      if (newText.length() > 0) newText.append("\n");
+      if (newText.length() > 0)
+        newText.append("\n");
       newText.append(t.toString());
     }
     return MD5Hash.digest(newText.toString()).getDigest();
   }
-  
+
   private static class Token {
     public int cnt;
     public String val;
-    
+
     public Token(int cnt, String val) {
       this.cnt = cnt;
       this.val = val;
     }
-    
+
     public String toString() {
       return val + " " + cnt;
     }
   }
-  
+
   private static class TokenComparator implements Comparator<Token> {
     public int compare(Token t1, Token t2) {
       return t2.cnt - t1.cnt;
     }
   }
-  
+
   public static void main(String[] args) throws Exception {
     TextProfileSignature sig = new TextProfileSignature();
     sig.setConf(NutchConfiguration.create());
@@ -161,15 +175,18 @@
     File[] files = new File(args[0]).listFiles();
     for (int i = 0; i < files.length; i++) {
       FileInputStream fis = new FileInputStream(files[i]);
-      BufferedReader br = new BufferedReader(new InputStreamReader(fis, "UTF-8"));
+      BufferedReader br = new BufferedReader(
+          new InputStreamReader(fis, "UTF-8"));
       StringBuffer text = new StringBuffer();
       String line = null;
       while ((line = br.readLine()) != null) {
-        if (text.length() > 0) text.append("\n");
+        if (text.length() > 0)
+          text.append("\n");
         text.append(line);
       }
       br.close();
-      byte[] signature = sig.calculate(null, new ParseImpl(text.toString(), null));
+      byte[] signature = sig.calculate(null, new ParseImpl(text.toString(),
+          null));
       res.put(files[i].toString(), signature);
     }
     Iterator<String> it = res.keySet().iterator();
Index: src/java/org/apache/nutch/crawl/CrawlDbReducer.java
===================================================================
--- src/java/org/apache/nutch/crawl/CrawlDbReducer.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/CrawlDbReducer.java	(working copy)
@@ -35,9 +35,11 @@
 import org.apache.nutch.scoring.ScoringFilters;
 
 /** Merge new page entries with existing entries. */
-public class CrawlDbReducer implements Reducer<Text, CrawlDatum, Text, CrawlDatum> {
-  public static final Logger LOG = LoggerFactory.getLogger(CrawlDbReducer.class);
-  
+public class CrawlDbReducer implements
+    Reducer<Text, CrawlDatum, Text, CrawlDatum> {
+  public static final Logger LOG = LoggerFactory
+      .getLogger(CrawlDbReducer.class);
+
   private int retryMax;
   private CrawlDatum result = new CrawlDatum();
   private InlinkPriorityQueue linked = null;
@@ -51,18 +53,20 @@
     scfilters = new ScoringFilters(job);
     additionsAllowed = job.getBoolean(CrawlDb.CRAWLDB_ADDITIONS_ALLOWED, true);
     int oldMaxInterval = job.getInt("db.max.fetch.interval", 0);
-    maxInterval = job.getInt("db.fetch.interval.max", 0 );
-    if (oldMaxInterval > 0 && maxInterval == 0) maxInterval = oldMaxInterval * FetchSchedule.SECONDS_PER_DAY;
+    maxInterval = job.getInt("db.fetch.interval.max", 0);
+    if (oldMaxInterval > 0 && maxInterval == 0)
+      maxInterval = oldMaxInterval * FetchSchedule.SECONDS_PER_DAY;
     schedule = FetchScheduleFactory.getFetchSchedule(job);
     int maxLinks = job.getInt("db.update.max.inlinks", 10000);
     linked = new InlinkPriorityQueue(maxLinks);
   }
 
-  public void close() {}
+  public void close() {
+  }
 
   public void reduce(Text key, Iterator<CrawlDatum> values,
-                     OutputCollector<Text, CrawlDatum> output, Reporter reporter)
-    throws IOException {
+      OutputCollector<Text, CrawlDatum> output, Reporter reporter)
+      throws IOException {
 
     CrawlDatum fetch = new CrawlDatum();
     CrawlDatum old = new CrawlDatum();
@@ -73,10 +77,11 @@
     boolean multiple = false; // avoid deep copy when only single value exists
     linked.clear();
     org.apache.hadoop.io.MapWritable metaFromParse = null;
-    
+
     while (values.hasNext()) {
-      CrawlDatum datum = (CrawlDatum)values.next();
-      if (!multiple && values.hasNext()) multiple = true;
+      CrawlDatum datum = (CrawlDatum) values.next();
+      if (!multiple && values.hasNext())
+        multiple = true;
       if (CrawlDatum.hasDbStatus(datum)) {
         if (!oldSet) {
           if (multiple) {
@@ -88,7 +93,8 @@
           oldSet = true;
         } else {
           // always take the latest version
-          if (old.getFetchTime() < datum.getFetchTime()) old.set(datum);
+          if (old.getFetchTime() < datum.getFetchTime())
+            old.set(datum);
         }
         continue;
       }
@@ -103,12 +109,13 @@
           fetchSet = true;
         } else {
           // always take the latest version
-          if (fetch.getFetchTime() < datum.getFetchTime()) fetch.set(datum);
+          if (fetch.getFetchTime() < datum.getFetchTime())
+            fetch.set(datum);
         }
         continue;
       }
 
-      switch (datum.getStatus()) {                // collect other info
+      switch (datum.getStatus()) { // collect other info
       case CrawlDatum.STATUS_LINKED:
         CrawlDatum link;
         if (multiple) {
@@ -129,7 +136,7 @@
         LOG.warn("Unknown status, key: " + key + ", datum: " + datum);
       }
     }
-    
+
     // copy the content of the queue into a List
     // in reversed order
     int numLinks = linked.size();
@@ -137,28 +144,31 @@
     for (int i = numLinks - 1; i >= 0; i--) {
       linkList.add(linked.pop());
     }
-    
+
     // if it doesn't already exist, skip it
-    if (!oldSet && !additionsAllowed) return;
-    
+    if (!oldSet && !additionsAllowed)
+      return;
+
     // if there is no fetched datum, perhaps there is a link
     if (!fetchSet && linkList.size() > 0) {
       fetch = linkList.get(0);
       fetchSet = true;
     }
-    
+
     // still no new data - record only unchanged old data, if exists, and return
     if (!fetchSet) {
       if (oldSet) {// at this point at least "old" should be present
         output.collect(key, old);
-        reporter.getCounter("CrawlDB status", CrawlDatum.getStatusName(old.getStatus())).increment(1);
+        reporter.getCounter("CrawlDB status",
+            CrawlDatum.getStatusName(old.getStatus())).increment(1);
       } else {
         LOG.warn("Missing fetch and old value, signature=" + signature);
       }
       return;
     }
-    
-    if (signature == null) signature = fetch.getSignature();
+
+    if (signature == null)
+      signature = fetch.getSignature();
     long prevModifiedTime = oldSet ? old.getModifiedTime() : 0L;
     long prevFetchTime = oldSet ? old.getFetchTime() : 0L;
 
@@ -177,31 +187,31 @@
         result.setModifiedTime(old.getModifiedTime());
       }
     }
-    
-    switch (fetch.getStatus()) {                // determine new status
 
-    case CrawlDatum.STATUS_LINKED:                // it was link
-      if (oldSet) {                          // if old exists
-        result.set(old);                          // use it
+    switch (fetch.getStatus()) { // determine new status
+
+    case CrawlDatum.STATUS_LINKED: // it was link
+      if (oldSet) { // if old exists
+        result.set(old); // use it
       } else {
-        result = schedule.initializeSchedule((Text)key, result);
+        result = schedule.initializeSchedule((Text) key, result);
         result.setStatus(CrawlDatum.STATUS_DB_UNFETCHED);
         try {
-          scfilters.initialScore((Text)key, result);
+          scfilters.initialScore((Text) key, result);
         } catch (ScoringFilterException e) {
           if (LOG.isWarnEnabled()) {
-            LOG.warn("Cannot filter init score for url " + key +
-                     ", using default: " + e.getMessage());
+            LOG.warn("Cannot filter init score for url " + key
+                + ", using default: " + e.getMessage());
           }
           result.setScore(0.0f);
         }
       }
       break;
-      
-    case CrawlDatum.STATUS_FETCH_SUCCESS:         // succesful fetch
-    case CrawlDatum.STATUS_FETCH_REDIR_TEMP:      // successful fetch, redirected
+
+    case CrawlDatum.STATUS_FETCH_SUCCESS: // succesful fetch
+    case CrawlDatum.STATUS_FETCH_REDIR_TEMP: // successful fetch, redirected
     case CrawlDatum.STATUS_FETCH_REDIR_PERM:
-    case CrawlDatum.STATUS_FETCH_NOTMODIFIED:     // successful fetch, notmodified
+    case CrawlDatum.STATUS_FETCH_NOTMODIFIED: // successful fetch, notmodified
       // determine the modification status
       int modified = FetchSchedule.STATUS_UNKNOWN;
       if (fetch.getStatus() == CrawlDatum.STATUS_FETCH_NOTMODIFIED) {
@@ -216,12 +226,14 @@
         }
       }
       // set the schedule
-      result = schedule.setFetchSchedule((Text)key, result, prevFetchTime,
-          prevModifiedTime, fetch.getFetchTime(), fetch.getModifiedTime(), modified);
+      result = schedule.setFetchSchedule((Text) key, result, prevFetchTime,
+          prevModifiedTime, fetch.getFetchTime(), fetch.getModifiedTime(),
+          modified);
       // set the result status and signature
       if (modified == FetchSchedule.STATUS_NOTMODIFIED) {
         result.setStatus(CrawlDatum.STATUS_DB_NOTMODIFIED);
-        if (oldSet) result.setSignature(old.getSignature());
+        if (oldSet)
+          result.setSignature(old.getSignature());
       } else {
         switch (fetch.getStatus()) {
         case CrawlDatum.STATUS_FETCH_SUCCESS:
@@ -234,34 +246,37 @@
           result.setStatus(CrawlDatum.STATUS_DB_REDIR_TEMP);
           break;
         default:
-          LOG.warn("Unexpected status: " + fetch.getStatus() + " resetting to old status.");
-          if (oldSet) result.setStatus(old.getStatus());
-          else result.setStatus(CrawlDatum.STATUS_DB_UNFETCHED);
+          LOG.warn("Unexpected status: " + fetch.getStatus()
+              + " resetting to old status.");
+          if (oldSet)
+            result.setStatus(old.getStatus());
+          else
+            result.setStatus(CrawlDatum.STATUS_DB_UNFETCHED);
         }
         result.setSignature(signature);
         if (metaFromParse != null) {
-            for (Entry<Writable, Writable> e : metaFromParse.entrySet()) {
-              result.getMetaData().put(e.getKey(), e.getValue());
-            }
+          for (Entry<Writable, Writable> e : metaFromParse.entrySet()) {
+            result.getMetaData().put(e.getKey(), e.getValue());
           }
+        }
       }
       // if fetchInterval is larger than the system-wide maximum, trigger
       // an unconditional recrawl. This prevents the page to be stuck at
       // NOTMODIFIED state, when the old fetched copy was already removed with
       // old segments.
       if (maxInterval < result.getFetchInterval())
-        result = schedule.forceRefetch((Text)key, result, false);
+        result = schedule.forceRefetch((Text) key, result, false);
       break;
     case CrawlDatum.STATUS_SIGNATURE:
       if (LOG.isWarnEnabled()) {
         LOG.warn("Lone CrawlDatum.STATUS_SIGNATURE: " + key);
-      }   
+      }
       return;
-    case CrawlDatum.STATUS_FETCH_RETRY:           // temporary failure
+    case CrawlDatum.STATUS_FETCH_RETRY: // temporary failure
       if (oldSet) {
-        result.setSignature(old.getSignature());  // use old signature
+        result.setSignature(old.getSignature()); // use old signature
       }
-      result = schedule.setPageRetrySchedule((Text)key, result, prevFetchTime,
+      result = schedule.setPageRetrySchedule((Text) key, result, prevFetchTime,
           prevModifiedTime, fetch.getFetchTime());
       if (result.getRetriesSinceFetch() < retryMax) {
         result.setStatus(CrawlDatum.STATUS_DB_UNFETCHED);
@@ -270,20 +285,22 @@
       }
       break;
 
-    case CrawlDatum.STATUS_FETCH_GONE:            // permanent failure
+    case CrawlDatum.STATUS_FETCH_GONE: // permanent failure
       if (oldSet)
-        result.setSignature(old.getSignature());  // use old signature
+        result.setSignature(old.getSignature()); // use old signature
       result.setStatus(CrawlDatum.STATUS_DB_GONE);
-      result = schedule.setPageGoneSchedule((Text)key, result, prevFetchTime,
+      result = schedule.setPageGoneSchedule((Text) key, result, prevFetchTime,
           prevModifiedTime, fetch.getFetchTime());
       break;
 
     default:
-      throw new RuntimeException("Unknown status: " + fetch.getStatus() + " " + key);
+      throw new RuntimeException("Unknown status: " + fetch.getStatus() + " "
+          + key);
     }
 
     try {
-      scfilters.updateDbScore((Text)key, oldSet ? old : null, result, linkList);
+      scfilters
+          .updateDbScore((Text) key, oldSet ? old : null, result, linkList);
     } catch (Exception e) {
       if (LOG.isWarnEnabled()) {
         LOG.warn("Couldn't update score, key=" + key + ": " + e);
@@ -292,22 +309,23 @@
     // remove generation time, if any
     result.getMetaData().remove(Nutch.WRITABLE_GENERATE_TIME_KEY);
     output.collect(key, result);
-    reporter.getCounter("CrawlDB status", CrawlDatum.getStatusName(result.getStatus())).increment(1);
+    reporter.getCounter("CrawlDB status",
+        CrawlDatum.getStatusName(result.getStatus())).increment(1);
   }
-  
+
 }
 
 class InlinkPriorityQueue extends PriorityQueue<CrawlDatum> {
-  
+
   public InlinkPriorityQueue(int maxSize) {
     initialize(maxSize);
   }
-  
+
   /** Determines the ordering of objects in this priority queue. **/
   protected boolean lessThan(Object arg0, Object arg1) {
     CrawlDatum candidate = (CrawlDatum) arg0;
     CrawlDatum least = (CrawlDatum) arg1;
     return candidate.getScore() > least.getScore();
   }
-  
+
 }
Index: src/java/org/apache/nutch/crawl/SignatureComparator.java
===================================================================
--- src/java/org/apache/nutch/crawl/SignatureComparator.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/SignatureComparator.java	(working copy)
@@ -23,25 +23,34 @@
   public int compare(Object o1, Object o2) {
     return _compare(o1, o2);
   }
-  
+
   public static int _compare(Object o1, Object o2) {
-    if (o1 == null && o2 == null) return 0;
-    if (o1 == null) return -1;
-    if (o2 == null) return 1;
-    if (!(o1 instanceof byte[])) return -1;
-    if (!(o2 instanceof byte[])) return 1;
-    byte[] data1 = (byte[])o1;
-    byte[] data2 = (byte[])o2;
+    if (o1 == null && o2 == null)
+      return 0;
+    if (o1 == null)
+      return -1;
+    if (o2 == null)
+      return 1;
+    if (!(o1 instanceof byte[]))
+      return -1;
+    if (!(o2 instanceof byte[]))
+      return 1;
+    byte[] data1 = (byte[]) o1;
+    byte[] data2 = (byte[]) o2;
     return _compare(data1, 0, data1.length, data2, 0, data2.length);
   }
-  
-  public static int _compare(byte[] data1, int s1, int l1, byte[] data2, int s2, int l2) {
-    if (l2 > l1) return -1;
-    if (l2 < l1) return 1;
+
+  public static int _compare(byte[] data1, int s1, int l1, byte[] data2,
+      int s2, int l2) {
+    if (l2 > l1)
+      return -1;
+    if (l2 < l1)
+      return 1;
     int res = 0;
     for (int i = 0; i < l1; i++) {
       res = (data1[s1 + i] - data2[s2 + i]);
-      if (res != 0) return res;
+      if (res != 0)
+        return res;
     }
     return 0;
   }
Index: src/java/org/apache/nutch/crawl/SignatureFactory.java
===================================================================
--- src/java/org/apache/nutch/crawl/SignatureFactory.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/SignatureFactory.java	(working copy)
@@ -27,28 +27,30 @@
 
 /**
  * Factory class, which instantiates a Signature implementation according to the
- * current Configuration configuration. This newly created instance is cached in the
- * Configuration instance, so that it could be later retrieved.
+ * current Configuration configuration. This newly created instance is cached in
+ * the Configuration instance, so that it could be later retrieved.
  * 
  * @author Andrzej Bialecki &lt;ab@getopt.org&gt;
  */
 public class SignatureFactory {
-  private static final Logger LOG = LoggerFactory.getLogger(SignatureFactory.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(SignatureFactory.class);
 
-  private SignatureFactory() {}                   // no public ctor
+  private SignatureFactory() {
+  } // no public ctor
 
   /** Return the default Signature implementation. */
   public static Signature getSignature(Configuration conf) {
     String clazz = conf.get("db.signature.class", MD5Signature.class.getName());
     ObjectCache objectCache = ObjectCache.get(conf);
-    Signature impl = (Signature)objectCache.getObject(clazz);
+    Signature impl = (Signature) objectCache.getObject(clazz);
     if (impl == null) {
       try {
         if (LOG.isInfoEnabled()) {
           LOG.info("Using Signature impl: " + clazz);
         }
         Class implClass = Class.forName(clazz);
-        impl = (Signature)implClass.newInstance();
+        impl = (Signature) implClass.newInstance();
         impl.setConf(conf);
         objectCache.setObject(clazz, impl);
       } catch (Exception e) {
Index: src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java
===================================================================
--- src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java	(working copy)
@@ -30,41 +30,48 @@
  * 
  * @author Andrzej Bialecki
  */
-public abstract class AbstractFetchSchedule extends Configured implements FetchSchedule {
-  private static final Logger LOG = LoggerFactory.getLogger(AbstractFetchSchedule.class);
-  
+public abstract class AbstractFetchSchedule extends Configured implements
+    FetchSchedule {
+  private static final Logger LOG = LoggerFactory
+      .getLogger(AbstractFetchSchedule.class);
+
   protected int defaultInterval;
   protected int maxInterval;
-  
+
   public AbstractFetchSchedule() {
     super(null);
   }
-  
+
   public AbstractFetchSchedule(Configuration conf) {
     super(conf);
   }
-  
+
   public void setConf(Configuration conf) {
     super.setConf(conf);
-    if (conf == null) return;
+    if (conf == null)
+      return;
     int oldDefaultInterval = conf.getInt("db.default.fetch.interval", 0);
     defaultInterval = conf.getInt("db.fetch.interval.default", 0);
-    if (oldDefaultInterval > 0 && defaultInterval == 0) defaultInterval = oldDefaultInterval * SECONDS_PER_DAY;
+    if (oldDefaultInterval > 0 && defaultInterval == 0)
+      defaultInterval = oldDefaultInterval * SECONDS_PER_DAY;
     int oldMaxInterval = conf.getInt("db.max.fetch.interval", 0);
-    maxInterval = conf.getInt("db.fetch.interval.max", 0 );
-    if (oldMaxInterval > 0 && maxInterval == 0) maxInterval = oldMaxInterval * FetchSchedule.SECONDS_PER_DAY;
+    maxInterval = conf.getInt("db.fetch.interval.max", 0);
+    if (oldMaxInterval > 0 && maxInterval == 0)
+      maxInterval = oldMaxInterval * FetchSchedule.SECONDS_PER_DAY;
     LOG.info("defaultInterval=" + defaultInterval);
     LOG.info("maxInterval=" + maxInterval);
   }
-  
+
   /**
-   * Initialize fetch schedule related data. Implementations should at least
-   * set the <code>fetchTime</code> and <code>fetchInterval</code>. The default
-   * implementation sets the <code>fetchTime</code> to now, using the
-   * default <code>fetchInterval</code>.
+   * Initialize fetch schedule related data. Implementations should at least set
+   * the <code>fetchTime</code> and <code>fetchInterval</code>. The default
+   * implementation sets the <code>fetchTime</code> to now, using the default
+   * <code>fetchInterval</code>.
    * 
-   * @param url URL of the page.
-   * @param datum datum instance to be initialized (modified in place).
+   * @param url
+   *          URL of the page.
+   * @param datum
+   *          datum instance to be initialized (modified in place).
    */
   public CrawlDatum initializeSchedule(Text url, CrawlDatum datum) {
     datum.setFetchTime(System.currentTimeMillis());
@@ -72,91 +79,113 @@
     datum.setRetriesSinceFetch(0);
     return datum;
   }
-  
+
   /**
    * Sets the <code>fetchInterval</code> and <code>fetchTime</code> on a
-   * successfully fetched page. NOTE: this implementation resets the
-   * retry counter - extending classes should call super.setFetchSchedule() to
+   * successfully fetched page. NOTE: this implementation resets the retry
+   * counter - extending classes should call super.setFetchSchedule() to
    * preserve this behavior.
    */
   public CrawlDatum setFetchSchedule(Text url, CrawlDatum datum,
-          long prevFetchTime, long prevModifiedTime,
-          long fetchTime, long modifiedTime, int state) {
+      long prevFetchTime, long prevModifiedTime, long fetchTime,
+      long modifiedTime, int state) {
     datum.setRetriesSinceFetch(0);
     return datum;
   }
-  
+
   /**
-   * This method specifies how to schedule refetching of pages
-   * marked as GONE. Default implementation increases fetchInterval by 50%,
-   * and if it exceeds the <code>maxInterval</code> it calls
+   * This method specifies how to schedule refetching of pages marked as GONE.
+   * Default implementation increases fetchInterval by 50%, and if it exceeds
+   * the <code>maxInterval</code> it calls
    * {@link #forceRefetch(Text, CrawlDatum, boolean)}.
-   * @param url URL of the page
-   * @param datum datum instance to be adjusted
+   * 
+   * @param url
+   *          URL of the page
+   * @param datum
+   *          datum instance to be adjusted
    * @return adjusted page information, including all original information.
-   * NOTE: this may be a different instance than {@param datum}, but
-   * implementations should make sure that it contains at least all
-   * information from {@param datum}.
+   *         NOTE: this may be a different instance than
+   * @param datum
+   *          , but implementations should make sure that it contains at least
+   *          all information from
+   * @param datum
+   *          .
    */
   public CrawlDatum setPageGoneSchedule(Text url, CrawlDatum datum,
-          long prevFetchTime, long prevModifiedTime, long fetchTime) {
+      long prevFetchTime, long prevModifiedTime, long fetchTime) {
     // no page is truly GONE ... just increase the interval by 50%
     // and try much later.
     datum.setFetchInterval(datum.getFetchInterval() * 1.5f);
-    datum.setFetchTime(fetchTime + (long)datum.getFetchInterval() * 1000);
-    if (maxInterval < datum.getFetchInterval()) forceRefetch(url, datum, false);
+    datum.setFetchTime(fetchTime + (long) datum.getFetchInterval() * 1000);
+    if (maxInterval < datum.getFetchInterval())
+      forceRefetch(url, datum, false);
     return datum;
   }
-  
+
   /**
-   * This method adjusts the fetch schedule if fetching needs to be
-   * re-tried due to transient errors. The default implementation
-   * sets the next fetch time 1 day in the future and increases
-   * the retry counter.
-   * @param url URL of the page
-   * @param datum page information
-   * @param prevFetchTime previous fetch time
-   * @param prevModifiedTime previous modified time
-   * @param fetchTime current fetch time
+   * This method adjusts the fetch schedule if fetching needs to be re-tried due
+   * to transient errors. The default implementation sets the next fetch time 1
+   * day in the future and increases the retry counter.
+   * 
+   * @param url
+   *          URL of the page
+   * @param datum
+   *          page information
+   * @param prevFetchTime
+   *          previous fetch time
+   * @param prevModifiedTime
+   *          previous modified time
+   * @param fetchTime
+   *          current fetch time
    * @return adjusted page information, including all original information.
-   * NOTE: this may be a different instance than {@param datum}, but
-   * implementations should make sure that it contains at least all
-   * information from {@param datum}.
+   *         NOTE: this may be a different instance than
+   * @param datum
+   *          , but implementations should make sure that it contains at least
+   *          all information from
+   * @param datum
+   *          .
    */
   public CrawlDatum setPageRetrySchedule(Text url, CrawlDatum datum,
-          long prevFetchTime, long prevModifiedTime, long fetchTime) {
-    datum.setFetchTime(fetchTime + (long)SECONDS_PER_DAY*1000);
+      long prevFetchTime, long prevModifiedTime, long fetchTime) {
+    datum.setFetchTime(fetchTime + (long) SECONDS_PER_DAY * 1000);
     datum.setRetriesSinceFetch(datum.getRetriesSinceFetch() + 1);
     return datum;
   }
-  
+
   /**
    * This method return the last fetch time of the CrawlDatum
+   * 
    * @return the date as a long.
    */
   public long calculateLastFetchTime(CrawlDatum datum) {
-    return  datum.getFetchTime() - (long)datum.getFetchInterval() * 1000;
+    return datum.getFetchTime() - (long) datum.getFetchInterval() * 1000;
   }
 
   /**
-   * This method provides information whether the page is suitable for
-   * selection in the current fetchlist. NOTE: a true return value does not
-   * guarantee that the page will be fetched, it just allows it to be
-   * included in the further selection process based on scores. The default
-   * implementation checks <code>fetchTime</code>, if it is higher than the
-   * {@param curTime} it returns false, and true otherwise. It will also
-   * check that fetchTime is not too remote (more than <code>maxInterval</code>,
-   * in which case it lowers the interval and returns true.
-   * @param url URL of the page
-   * @param datum datum instance
-   * @param curTime reference time (usually set to the time when the
-   * fetchlist generation process was started).
+   * This method provides information whether the page is suitable for selection
+   * in the current fetchlist. NOTE: a true return value does not guarantee that
+   * the page will be fetched, it just allows it to be included in the further
+   * selection process based on scores. The default implementation checks
+   * <code>fetchTime</code>, if it is higher than the
+   * 
+   * @param curTime
+   *          it returns false, and true otherwise. It will also check that
+   *          fetchTime is not too remote (more than <code>maxInterval</code>,
+   *          in which case it lowers the interval and returns true.
+   * @param url
+   *          URL of the page
+   * @param datum
+   *          datum instance
+   * @param curTime
+   *          reference time (usually set to the time when the fetchlist
+   *          generation process was started).
    * @return true, if the page should be considered for inclusion in the current
-   * fetchlist, otherwise false.
+   *         fetchlist, otherwise false.
    */
   public boolean shouldFetch(Text url, CrawlDatum datum, long curTime) {
     // pages are never truly GONE - we have to check them from time to time.
-    // pages with too long fetchInterval are adjusted so that they fit within
+    // pages with too long fetchInterval are adjusted so that they fit
+    // within
     // maximum fetchInterval (segment retention period).
     if (datum.getFetchTime() - curTime > (long) maxInterval * 1000) {
       if (datum.getFetchInterval() > maxInterval) {
@@ -165,21 +194,25 @@
       datum.setFetchTime(curTime);
     }
     if (datum.getFetchTime() > curTime) {
-      return false;                                   // not time yet
+      return false; // not time yet
     }
     return true;
   }
-  
+
   /**
    * This method resets fetchTime, fetchInterval, modifiedTime,
    * retriesSinceFetch and page signature, so that it forces refetching.
-   * @param url URL of the page
-   * @param datum datum instance
-   * @param asap if true, force refetch as soon as possible - this sets
-   * the fetchTime to now. If false, force refetch whenever the next fetch
-   * time is set.
+   * 
+   * @param url
+   *          URL of the page
+   * @param datum
+   *          datum instance
+   * @param asap
+   *          if true, force refetch as soon as possible - this sets the
+   *          fetchTime to now. If false, force refetch whenever the next fetch
+   *          time is set.
    */
-  public CrawlDatum  forceRefetch(Text url, CrawlDatum datum, boolean asap) {
+  public CrawlDatum forceRefetch(Text url, CrawlDatum datum, boolean asap) {
     // reduce fetchInterval so that it fits within the max value
     if (datum.getFetchInterval() > maxInterval)
       datum.setFetchInterval(maxInterval * 0.9f);
@@ -187,7 +220,8 @@
     datum.setRetriesSinceFetch(0);
     datum.setSignature(null);
     datum.setModifiedTime(0L);
-    if (asap) datum.setFetchTime(System.currentTimeMillis());
+    if (asap)
+      datum.setFetchTime(System.currentTimeMillis());
     return datum;
   }
 
Index: src/java/org/apache/nutch/crawl/FetchScheduleFactory.java
===================================================================
--- src/java/org/apache/nutch/crawl/FetchScheduleFactory.java	(revision 1188252)
+++ src/java/org/apache/nutch/crawl/FetchScheduleFactory.java	(working copy)
@@ -25,20 +25,23 @@
 /** Creates and caches a {@link FetchSchedule} implementation. */
 public class FetchScheduleFactory {
 
-  public static final Logger LOG = LoggerFactory.getLogger(FetchScheduleFactory.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(FetchScheduleFactory.class);
 
-  private FetchScheduleFactory() {}                   // no public ctor
+  private FetchScheduleFactory() {
+  } // no public ctor
 
   /** Return the FetchSchedule implementation. */
   public static FetchSchedule getFetchSchedule(Configuration conf) {
-    String clazz = conf.get("db.fetch.schedule.class", DefaultFetchSchedule.class.getName());
+    String clazz = conf.get("db.fetch.schedule.class",
+        DefaultFetchSchedule.class.getName());
     ObjectCache objectCache = ObjectCache.get(conf);
-    FetchSchedule impl = (FetchSchedule)objectCache.getObject(clazz);
+    FetchSchedule impl = (FetchSchedule) objectCache.getObject(clazz);
     if (impl == null) {
       try {
         LOG.info("Using FetchSchedule impl: " + clazz);
         Class implClass = Class.forName(clazz);
-        impl = (FetchSchedule)implClass.newInstance();
+        impl = (FetchSchedule) implClass.newInstance();
         impl.setConf(conf);
         objectCache.setObject(clazz, impl);
       } catch (Exception e) {
Index: src/java/org/apache/nutch/parse/ParsePluginsReader.java
===================================================================
--- src/java/org/apache/nutch/parse/ParsePluginsReader.java	(revision 1188252)
+++ src/java/org/apache/nutch/parse/ParsePluginsReader.java	(working copy)
@@ -42,50 +42,50 @@
 // Nutch imports
 import org.apache.nutch.util.NutchConfiguration;
 
-
 /**
  * A reader to load the information stored in the
  * <code>$NUTCH_HOME/conf/parse-plugins.xml</code> file.
- *
+ * 
  * @author mattmann
  * @version 1.0
  */
 class ParsePluginsReader {
-  
+
   /* our log stream */
-  public static final Logger LOG = LoggerFactory.getLogger(ParsePluginsReader.class);
-  
+  public static final Logger LOG = LoggerFactory
+      .getLogger(ParsePluginsReader.class);
+
   /** The property name of the parse-plugins location */
   private static final String PP_FILE_PROP = "parse.plugin.file";
 
   /** the parse-plugins file */
   private String fParsePluginsFile = null;
 
-  
   /**
    * Constructs a new ParsePluginsReader
    */
-  public ParsePluginsReader() { }
-  
+  public ParsePluginsReader() {
+  }
+
   /**
    * Reads the <code>parse-plugins.xml</code> file and returns the
    * {@link #ParsePluginList} defined by it.
-   *
+   * 
    * @return A {@link #ParsePluginList} specified by the
    *         <code>parse-plugins.xml</code> file.
    * @throws Exception
-   *             If any parsing error occurs.
+   *           If any parsing error occurs.
    */
   public ParsePluginList parse(Configuration conf) {
-    
+
     ParsePluginList pList = new ParsePluginList();
-    
+
     // open up the XML file
     DocumentBuilderFactory factory = null;
     DocumentBuilder parser = null;
     Document document = null;
     InputSource inputSource = null;
-    
+
     InputStream ppInputStream = null;
     if (fParsePluginsFile != null) {
       URL parsePluginUrl = null;
@@ -94,56 +94,55 @@
         ppInputStream = parsePluginUrl.openStream();
       } catch (Exception e) {
         if (LOG.isWarnEnabled()) {
-          LOG.warn("Unable to load parse plugins file from URL " +
-                   "[" + fParsePluginsFile + "]. Reason is [" + e + "]");
+          LOG.warn("Unable to load parse plugins file from URL " + "["
+              + fParsePluginsFile + "]. Reason is [" + e + "]");
         }
         return pList;
       }
     } else {
-      ppInputStream = conf.getConfResourceAsInputStream(
-                          conf.get(PP_FILE_PROP));
+      ppInputStream = conf.getConfResourceAsInputStream(conf.get(PP_FILE_PROP));
     }
-    
+
     inputSource = new InputSource(ppInputStream);
-    
+
     try {
       factory = DocumentBuilderFactory.newInstance();
       parser = factory.newDocumentBuilder();
       document = parser.parse(inputSource);
     } catch (Exception e) {
       if (LOG.isWarnEnabled()) {
-        LOG.warn("Unable to parse [" + fParsePluginsFile + "]." +
-                 "Reason is [" + e + "]");
+        LOG.warn("Unable to parse [" + fParsePluginsFile + "]." + "Reason is ["
+            + e + "]");
       }
       return null;
     }
-    
+
     Element parsePlugins = document.getDocumentElement();
-    
+
     // build up the alias hash map
     Map<String, String> aliases = getAliases(parsePlugins);
     // And store it on the parse plugin list
     pList.setAliases(aliases);
-     
+
     // get all the mime type nodes
     NodeList mimeTypes = parsePlugins.getElementsByTagName("mimeType");
-    
+
     // iterate through the mime types
     for (int i = 0; i < mimeTypes.getLength(); i++) {
       Element mimeType = (Element) mimeTypes.item(i);
       String mimeTypeStr = mimeType.getAttribute("name");
-      
+
       // for each mimeType, get the plugin list
       NodeList pluginList = mimeType.getElementsByTagName("plugin");
-      
+
       // iterate through the plugins, add them in order read
       // OR if they have a special order="" attribute, then hold those in
       // a separate list, and then insert them into the final list at the
       // order specified
       if (pluginList != null && pluginList.getLength() > 0) {
         List<String> plugList = new ArrayList<String>(pluginList.getLength());
-        
-        for (int j = 0; j<pluginList.getLength(); j++) {
+
+        for (int j = 0; j < pluginList.getLength(); j++) {
           Element plugin = (Element) pluginList.item(j);
           String pluginId = plugin.getAttribute("id");
           String extId = aliases.get(pluginId);
@@ -163,110 +162,110 @@
             plugList.add(extId);
           }
         }
-        
+
         // now add the plugin list and map it to this mimeType
         pList.setPluginList(mimeTypeStr, plugList);
-        
+
       } else if (LOG.isWarnEnabled()) {
         LOG.warn("ParsePluginsReader:ERROR:no plugins defined for mime type: "
-                 + mimeTypeStr + ", continuing parse");
+            + mimeTypeStr + ", continuing parse");
       }
     }
     return pList;
   }
-  
+
   /**
    * Tests parsing of the parse-plugins.xml file. An alternative name for the
-   * file can be specified via the <code>--file</code> option, although the
-   * file must be located in the <code>$NUTCH_HOME/conf</code> directory.
-   *
+   * file can be specified via the <code>--file</code> option, although the file
+   * must be located in the <code>$NUTCH_HOME/conf</code> directory.
+   * 
    * @param args
-   *            Currently only the --file argument to specify an alternative
-   *            name for the parse-plugins.xml file is supported.
+   *          Currently only the --file argument to specify an alternative name
+   *          for the parse-plugins.xml file is supported.
    */
   public static void main(String[] args) throws Exception {
     String parsePluginFile = null;
     String usage = "ParsePluginsReader [--file <parse plugin file location>]";
-    
-    if (( args.length != 0 && args.length != 2 )
+
+    if ((args.length != 0 && args.length != 2)
         || (args.length == 2 && !"--file".equals(args[0]))) {
       System.err.println(usage);
       System.exit(1);
     }
-    
+
     for (int i = 0; i < args.length; i++) {
       if (args[i].equals("--file")) {
         parsePluginFile = args[++i];
       }
     }
-    
+
     ParsePluginsReader reader = new ParsePluginsReader();
-    
+
     if (parsePluginFile != null) {
       reader.setFParsePluginsFile(parsePluginFile);
     }
-    
+
     ParsePluginList prefs = reader.parse(NutchConfiguration.create());
-    
+
     for (String mimeType : prefs.getSupportedMimeTypes()) {
-      
+
       System.out.println("MIMETYPE: " + mimeType);
       List<String> plugList = prefs.getPluginList(mimeType);
-      
+
       System.out.println("EXTENSION IDs:");
-      
+
       for (String j : plugList) {
         System.out.println(j);
       }
     }
-    
+
   }
-  
+
   /**
    * @return Returns the fParsePluginsFile.
    */
   public String getFParsePluginsFile() {
     return fParsePluginsFile;
   }
-  
+
   /**
    * @param parsePluginsFile
-   *            The fParsePluginsFile to set.
+   *          The fParsePluginsFile to set.
    */
   public void setFParsePluginsFile(String parsePluginsFile) {
     fParsePluginsFile = parsePluginsFile;
   }
-  
+
   private Map<String, String> getAliases(Element parsePluginsRoot) {
 
     Map<String, String> aliases = new HashMap<String, String>();
     NodeList aliasRoot = parsePluginsRoot.getElementsByTagName("aliases");
-	  
+
     if (aliasRoot == null || (aliasRoot != null && aliasRoot.getLength() == 0)) {
       if (LOG.isWarnEnabled()) {
         LOG.warn("No aliases defined in parse-plugins.xml!");
       }
       return aliases;
     }
-	  
+
     if (aliasRoot.getLength() > 1) {
       // log a warning, but try and continue processing
       if (LOG.isWarnEnabled()) {
         LOG.warn("There should only be one \"aliases\" tag in parse-plugins.xml");
       }
     }
-	  
-    Element aliasRootElem = (Element)aliasRoot.item(0);
+
+    Element aliasRootElem = (Element) aliasRoot.item(0);
     NodeList aliasElements = aliasRootElem.getElementsByTagName("alias");
-	  
+
     if (aliasElements != null && aliasElements.getLength() > 0) {
-      for (int i=0; i<aliasElements.getLength(); i++) {
-        Element aliasElem = (Element)aliasElements.item(i);
-	String parsePluginId = aliasElem.getAttribute("name");
-	String extensionId = aliasElem.getAttribute("extension-id");
+      for (int i = 0; i < aliasElements.getLength(); i++) {
+        Element aliasElem = (Element) aliasElements.item(i);
+        String parsePluginId = aliasElem.getAttribute("name");
+        String extensionId = aliasElem.getAttribute("extension-id");
         if (LOG.isTraceEnabled()) {
-          LOG.trace("Found alias: plugin-id: " + parsePluginId +
-                    ", extension-id: " + extensionId);
+          LOG.trace("Found alias: plugin-id: " + parsePluginId
+              + ", extension-id: " + extensionId);
         }
         if (parsePluginId != null && extensionId != null) {
           aliases.put(parsePluginId, extensionId);
@@ -275,5 +274,5 @@
     }
     return aliases;
   }
-  
+
 }
Index: src/java/org/apache/nutch/parse/ParseData.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseData.java	(revision 1188252)
+++ src/java/org/apache/nutch/parse/ParseData.java	(working copy)
@@ -30,8 +30,9 @@
 import org.apache.nutch.metadata.Metadata;
 import org.apache.nutch.util.NutchConfiguration;
 
-
-/** Data extracted from a page's content.
+/**
+ * Data extracted from a page's content.
+ * 
  * @see Parse#getData()
  */
 public final class ParseData extends VersionedWritable {
@@ -45,19 +46,19 @@
   private Metadata parseMeta;
   private ParseStatus status;
   private byte version = VERSION;
-  
+
   public ParseData() {
     contentMeta = new Metadata();
     parseMeta = new Metadata();
   }
 
   public ParseData(ParseStatus status, String title, Outlink[] outlinks,
-                   Metadata contentMeta) {
+      Metadata contentMeta) {
     this(status, title, outlinks, contentMeta, new Metadata());
   }
-  
+
   public ParseData(ParseStatus status, String title, Outlink[] outlinks,
-                   Metadata contentMeta, Metadata parseMeta) {
+      Metadata contentMeta, Metadata parseMeta) {
     this.status = status;
     this.title = title;
     this.outlinks = outlinks;
@@ -70,33 +71,43 @@
   //
 
   /** The status of parsing the page. */
-  public ParseStatus getStatus() { return status; }
-  
+  public ParseStatus getStatus() {
+    return status;
+  }
+
   /** The title of the page. */
-  public String getTitle() { return title; }
+  public String getTitle() {
+    return title;
+  }
 
   /** The outlinks of the page. */
-  public Outlink[] getOutlinks() { return outlinks; }
+  public Outlink[] getOutlinks() {
+    return outlinks;
+  }
 
   /** The original Metadata retrieved from content */
-  public Metadata getContentMeta() { return contentMeta; }
+  public Metadata getContentMeta() {
+    return contentMeta;
+  }
 
   /**
-   * Other content properties.
-   * This is the place to find format-specific properties.
-   * Different parser implementations for different content types will populate
-   * this differently.
+   * Other content properties. This is the place to find format-specific
+   * properties. Different parser implementations for different content types
+   * will populate this differently.
    */
-  public Metadata getParseMeta() { return parseMeta; }
-  
+  public Metadata getParseMeta() {
+    return parseMeta;
+  }
+
   public void setParseMeta(Metadata parseMeta) {
     this.parseMeta = parseMeta;
   }
-  
+
   /**
-   * Get a metadata single value.
-   * This method first looks for the metadata value in the parse metadata. If no
-   * value is found it the looks for the metadata in the content metadata.
+   * Get a metadata single value. This method first looks for the metadata value
+   * in the parse metadata. If no value is found it the looks for the metadata
+   * in the content metadata.
+   * 
    * @see #getContentMeta()
    * @see #getParseMeta()
    */
@@ -107,12 +118,14 @@
     }
     return value;
   }
-  
+
   //
   // Writable methods
   //
 
-  public byte getVersion() { return version; }
+  public byte getVersion() {
+    return version;
+  }
 
   public final void readFields(DataInput in) throws IOException {
 
@@ -121,16 +134,16 @@
     if (version != VERSION)
       throw new VersionMismatchException(VERSION, version);
     status = ParseStatus.read(in);
-    title = Text.readString(in);                   // read title
+    title = Text.readString(in); // read title
 
-    int numOutlinks = in.readInt();    
+    int numOutlinks = in.readInt();
     outlinks = new Outlink[numOutlinks];
     for (int i = 0; i < numOutlinks; i++) {
       outlinks[i] = Outlink.read(in);
     }
-    
+
     if (version < 3) {
-      int propertyCount = in.readInt();             // read metadata
+      int propertyCount = in.readInt(); // read metadata
       contentMeta.clear();
       for (int i = 0; i < propertyCount; i++) {
         contentMeta.add(Text.readString(in), Text.readString(in));
@@ -146,15 +159,15 @@
   }
 
   public final void write(DataOutput out) throws IOException {
-    out.writeByte(VERSION);                       // write version
-    status.write(out);                            // write status
-    Text.writeString(out, title);                 // write title
+    out.writeByte(VERSION); // write version
+    status.write(out); // write status
+    Text.writeString(out, title); // write title
 
-    out.writeInt(outlinks.length);                // write outlinks
+    out.writeInt(outlinks.length); // write outlinks
     for (int i = 0; i < outlinks.length; i++) {
       outlinks[i].write(out);
     }
-    contentMeta.write(out);                      // write content metadata
+    contentMeta.write(out); // write content metadata
     parseMeta.write(out);
   }
 
@@ -171,38 +184,36 @@
   public boolean equals(Object o) {
     if (!(o instanceof ParseData))
       return false;
-    ParseData other = (ParseData)o;
-    return
-      this.status.equals(other.status) &&
-      this.title.equals(other.title) &&
-      Arrays.equals(this.outlinks, other.outlinks) &&
-      this.contentMeta.equals(other.contentMeta) &&
-      this.parseMeta.equals(other.parseMeta);
+    ParseData other = (ParseData) o;
+    return this.status.equals(other.status) && this.title.equals(other.title)
+        && Arrays.equals(this.outlinks, other.outlinks)
+        && this.contentMeta.equals(other.contentMeta)
+        && this.parseMeta.equals(other.parseMeta);
   }
 
   public String toString() {
     StringBuffer buffer = new StringBuffer();
 
-    buffer.append("Version: " + version + "\n" );
-    buffer.append("Status: " + status + "\n" );
-    buffer.append("Title: " + title + "\n" );
+    buffer.append("Version: " + version + "\n");
+    buffer.append("Status: " + status + "\n");
+    buffer.append("Title: " + title + "\n");
 
     if (outlinks != null) {
-      buffer.append("Outlinks: " + outlinks.length + "\n" );
+      buffer.append("Outlinks: " + outlinks.length + "\n");
       for (int i = 0; i < outlinks.length; i++) {
         buffer.append("  outlink: " + outlinks[i] + "\n");
       }
     }
 
-    buffer.append("Content Metadata: " + contentMeta + "\n" );
-    buffer.append("Parse Metadata: " + parseMeta + "\n" );
+    buffer.append("Content Metadata: " + contentMeta + "\n");
+    buffer.append("Parse Metadata: " + parseMeta + "\n");
 
     return buffer.toString();
   }
 
   public static void main(String argv[]) throws Exception {
     String usage = "ParseData (-local | -dfs <namenode:port>) recno segment";
-    
+
     if (argv.length < 3) {
       System.out.println("usage:" + usage);
       return;
@@ -210,13 +221,12 @@
 
     Options opts = new Options();
     Configuration conf = NutchConfiguration.create();
-    
-    GenericOptionsParser parser =
-      new GenericOptionsParser(conf, opts, argv);
-    
+
+    GenericOptionsParser parser = new GenericOptionsParser(conf, opts, argv);
+
     String[] remainingArgs = parser.getRemainingArgs();
     FileSystem fs = FileSystem.get(conf);
-    
+
     try {
       int recno = Integer.parseInt(remainingArgs[0]);
       String segment = remainingArgs[1];
Index: src/java/org/apache/nutch/parse/ParseOutputFormat.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseOutputFormat.java	(revision 1188252)
+++ src/java/org/apache/nutch/parse/ParseOutputFormat.java	(working copy)
@@ -45,25 +45,26 @@
 
 /* Parse content in a segment. */
 public class ParseOutputFormat implements OutputFormat<Text, Parse> {
-  private static final Logger LOG = LoggerFactory.getLogger(ParseOutputFormat.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(ParseOutputFormat.class);
 
   private URLFilters filters;
   private URLNormalizers normalizers;
   private ScoringFilters scfilters;
-  
+
   private static class SimpleEntry implements Entry<Text, CrawlDatum> {
     private Text key;
     private CrawlDatum value;
-    
+
     public SimpleEntry(Text key, CrawlDatum value) {
       this.key = key;
       this.value = value;
     }
-    
+
     public Text getKey() {
       return key;
     }
-    
+
     public CrawlDatum getValue() {
       return value;
     }
@@ -75,215 +76,219 @@
   }
 
   public void checkOutputSpecs(FileSystem fs, JobConf job) throws IOException {
-      Path out = FileOutputFormat.getOutputPath(job);
-      if ((out == null) && (job.getNumReduceTasks() != 0)) {
-          throw new InvalidJobConfException(
-                  "Output directory not set in JobConf.");
-      }
-      if (fs == null) {
-          fs = out.getFileSystem(job);
-      }
-      if (fs.exists(new Path(out, CrawlDatum.PARSE_DIR_NAME)))
-          throw new IOException("Segment already parsed!");
+    Path out = FileOutputFormat.getOutputPath(job);
+    if ((out == null) && (job.getNumReduceTasks() != 0)) {
+      throw new InvalidJobConfException("Output directory not set in JobConf.");
+    }
+    if (fs == null) {
+      fs = out.getFileSystem(job);
+    }
+    if (fs.exists(new Path(out, CrawlDatum.PARSE_DIR_NAME)))
+      throw new IOException("Segment already parsed!");
   }
 
   public RecordWriter<Text, Parse> getRecordWriter(FileSystem fs, JobConf job,
-                                      String name, Progressable progress) throws IOException {
+      String name, Progressable progress) throws IOException {
 
     this.filters = new URLFilters(job);
     this.normalizers = new URLNormalizers(job, URLNormalizers.SCOPE_OUTLINK);
     this.scfilters = new ScoringFilters(job);
     final int interval = job.getInt("db.fetch.interval.default", 2592000);
-    final boolean ignoreExternalLinks = job.getBoolean("db.ignore.external.links", false);
+    final boolean ignoreExternalLinks = job.getBoolean(
+        "db.ignore.external.links", false);
     int maxOutlinksPerPage = job.getInt("db.max.outlinks.per.page", 100);
     final int maxOutlinks = (maxOutlinksPerPage < 0) ? Integer.MAX_VALUE
-                                                     : maxOutlinksPerPage;
-    final CompressionType compType = SequenceFileOutputFormat.getOutputCompressionType(job);
+        : maxOutlinksPerPage;
+    final CompressionType compType = SequenceFileOutputFormat
+        .getOutputCompressionType(job);
     Path out = FileOutputFormat.getOutputPath(job);
-    
+
     Path text = new Path(new Path(out, ParseText.DIR_NAME), name);
     Path data = new Path(new Path(out, ParseData.DIR_NAME), name);
     Path crawl = new Path(new Path(out, CrawlDatum.PARSE_DIR_NAME), name);
-    
-    final String[] parseMDtoCrawlDB = job.get("db.parsemeta.to.crawldb","").split(" *, *");
-    
-    final MapFile.Writer textOut =
-      new MapFile.Writer(job, fs, text.toString(), Text.class, ParseText.class,
-          CompressionType.RECORD, progress);
-    
-    final MapFile.Writer dataOut =
-      new MapFile.Writer(job, fs, data.toString(), Text.class, ParseData.class,
-          compType, progress);
-    
-    final SequenceFile.Writer crawlOut =
-      SequenceFile.createWriter(fs, job, crawl, Text.class, CrawlDatum.class,
-          compType, progress);
-    
+
+    final String[] parseMDtoCrawlDB = job.get("db.parsemeta.to.crawldb", "")
+        .split(" *, *");
+
+    final MapFile.Writer textOut = new MapFile.Writer(job, fs, text.toString(),
+        Text.class, ParseText.class, CompressionType.RECORD, progress);
+
+    final MapFile.Writer dataOut = new MapFile.Writer(job, fs, data.toString(),
+        Text.class, ParseData.class, compType, progress);
+
+    final SequenceFile.Writer crawlOut = SequenceFile.createWriter(fs, job,
+        crawl, Text.class, CrawlDatum.class, compType, progress);
+
     return new RecordWriter<Text, Parse>() {
 
+      public void write(Text key, Parse parse) throws IOException {
 
-        public void write(Text key, Parse parse)
-          throws IOException {
-          
-          String fromUrl = key.toString();
-          String fromHost = null; 
-          String toHost = null;          
-          textOut.append(key, new ParseText(parse.getText()));
-          
-          ParseData parseData = parse.getData();
-          // recover the signature prepared by Fetcher or ParseSegment
-          String sig = parseData.getContentMeta().get(Nutch.SIGNATURE_KEY);
-          if (sig != null) {
-            byte[] signature = StringUtil.fromHexString(sig);
-            if (signature != null) {
-              // append a CrawlDatum with a signature
-              CrawlDatum d = new CrawlDatum(CrawlDatum.STATUS_SIGNATURE, 0);
-              d.setSignature(signature);
-              crawlOut.append(key, d);
-            }
+        String fromUrl = key.toString();
+        String fromHost = null;
+        String toHost = null;
+        textOut.append(key, new ParseText(parse.getText()));
+
+        ParseData parseData = parse.getData();
+        // recover the signature prepared by Fetcher or ParseSegment
+        String sig = parseData.getContentMeta().get(Nutch.SIGNATURE_KEY);
+        if (sig != null) {
+          byte[] signature = StringUtil.fromHexString(sig);
+          if (signature != null) {
+            // append a CrawlDatum with a signature
+            CrawlDatum d = new CrawlDatum(CrawlDatum.STATUS_SIGNATURE, 0);
+            d.setSignature(signature);
+            crawlOut.append(key, d);
           }
-          
+        }
+
         // see if the parse metadata contain things that we'd like
         // to pass to the metadata of the crawlDB entry
         CrawlDatum parseMDCrawlDatum = null;
         for (String mdname : parseMDtoCrawlDB) {
           String mdvalue = parse.getData().getParseMeta().get(mdname);
           if (mdvalue != null) {
-            if (parseMDCrawlDatum == null) parseMDCrawlDatum = new CrawlDatum(
-                CrawlDatum.STATUS_PARSE_META, 0);
+            if (parseMDCrawlDatum == null)
+              parseMDCrawlDatum = new CrawlDatum(CrawlDatum.STATUS_PARSE_META,
+                  0);
             parseMDCrawlDatum.getMetaData().put(new Text(mdname),
                 new Text(mdvalue));
           }
         }
-        if (parseMDCrawlDatum != null) crawlOut.append(key, parseMDCrawlDatum);
+        if (parseMDCrawlDatum != null)
+          crawlOut.append(key, parseMDCrawlDatum);
 
-          try {
-            ParseStatus pstatus = parseData.getStatus();
-            if (pstatus != null && pstatus.isSuccess() &&
-                pstatus.getMinorCode() == ParseStatus.SUCCESS_REDIRECT) {
-              String newUrl = pstatus.getMessage();
-              int refreshTime = Integer.valueOf(pstatus.getArgs()[1]);
-              try {
-                newUrl = normalizers.normalize(newUrl,
-                    URLNormalizers.SCOPE_FETCHER);
-              } catch (MalformedURLException mfue) {
-                newUrl = null;
+        try {
+          ParseStatus pstatus = parseData.getStatus();
+          if (pstatus != null && pstatus.isSuccess()
+              && pstatus.getMinorCode() == ParseStatus.SUCCESS_REDIRECT) {
+            String newUrl = pstatus.getMessage();
+            int refreshTime = Integer.valueOf(pstatus.getArgs()[1]);
+            try {
+              newUrl = normalizers.normalize(newUrl,
+                  URLNormalizers.SCOPE_FETCHER);
+            } catch (MalformedURLException mfue) {
+              newUrl = null;
+            }
+            if (newUrl != null)
+              newUrl = filters.filter(newUrl);
+            String url = key.toString();
+            if (newUrl != null && !newUrl.equals(url)) {
+              String reprUrl = URLUtil.chooseRepr(url, newUrl,
+                  refreshTime < Fetcher.PERM_REFRESH_TIME);
+              CrawlDatum newDatum = new CrawlDatum();
+              newDatum.setStatus(CrawlDatum.STATUS_LINKED);
+              if (reprUrl != null && !reprUrl.equals(newUrl)) {
+                newDatum.getMetaData().put(Nutch.WRITABLE_REPR_URL_KEY,
+                    new Text(reprUrl));
               }
-              if (newUrl != null) newUrl = filters.filter(newUrl);
-              String url = key.toString();
-              if (newUrl != null && !newUrl.equals(url)) {
-                String reprUrl =
-                  URLUtil.chooseRepr(url, newUrl,
-                                     refreshTime < Fetcher.PERM_REFRESH_TIME);
-                CrawlDatum newDatum = new CrawlDatum();
-                newDatum.setStatus(CrawlDatum.STATUS_LINKED);
-                if (reprUrl != null && !reprUrl.equals(newUrl)) {
-                  newDatum.getMetaData().put(Nutch.WRITABLE_REPR_URL_KEY,
-                                             new Text(reprUrl));
-                }
-                crawlOut.append(new Text(newUrl), newDatum);
-              }
+              crawlOut.append(new Text(newUrl), newDatum);
             }
-          } catch (URLFilterException e) {
-            // ignore
           }
+        } catch (URLFilterException e) {
+          // ignore
+        }
 
-          // collect outlinks for subsequent db update
-          Outlink[] links = parseData.getOutlinks();
-          int outlinksToStore = Math.min(maxOutlinks, links.length);
+        // collect outlinks for subsequent db update
+        Outlink[] links = parseData.getOutlinks();
+        int outlinksToStore = Math.min(maxOutlinks, links.length);
+        if (ignoreExternalLinks) {
+          try {
+            fromHost = new URL(fromUrl).getHost().toLowerCase();
+          } catch (MalformedURLException e) {
+            fromHost = null;
+          }
+        } else {
+          fromHost = null;
+        }
+
+        int validCount = 0;
+        CrawlDatum adjust = null;
+        List<Entry<Text, CrawlDatum>> targets = new ArrayList<Entry<Text, CrawlDatum>>(
+            outlinksToStore);
+        List<Outlink> outlinkList = new ArrayList<Outlink>(outlinksToStore);
+        for (int i = 0; i < links.length && validCount < outlinksToStore; i++) {
+          String toUrl = links[i].getToUrl();
+          // ignore links to self (or anchors within the page)
+          if (fromUrl.equals(toUrl)) {
+            continue;
+          }
           if (ignoreExternalLinks) {
             try {
-              fromHost = new URL(fromUrl).getHost().toLowerCase();
+              toHost = new URL(toUrl).getHost().toLowerCase();
             } catch (MalformedURLException e) {
-              fromHost = null;
+              toHost = null;
             }
-          } else {
-            fromHost = null;
+            if (toHost == null || !toHost.equals(fromHost)) { // external links
+              continue; // skip it
+            }
           }
-
-          int validCount = 0;
-          CrawlDatum adjust = null;
-          List<Entry<Text, CrawlDatum>> targets = new ArrayList<Entry<Text, CrawlDatum>>(outlinksToStore);
-          List<Outlink> outlinkList = new ArrayList<Outlink>(outlinksToStore);
-          for (int i = 0; i < links.length && validCount < outlinksToStore; i++) {
-            String toUrl = links[i].getToUrl();
-            // ignore links to self (or anchors within the page)
-            if (fromUrl.equals(toUrl)) {
+          try {
+            toUrl = normalizers.normalize(toUrl, URLNormalizers.SCOPE_OUTLINK); // normalize
+                                                                                // the
+                                                                                // url
+            toUrl = filters.filter(toUrl); // filter the url
+            if (toUrl == null) {
               continue;
             }
-            if (ignoreExternalLinks) {
-              try {
-                toHost = new URL(toUrl).getHost().toLowerCase();
-              } catch (MalformedURLException e) {
-                toHost = null;
-              }
-              if (toHost == null || !toHost.equals(fromHost)) { // external links
-                continue; // skip it
-              }
-            }
-            try {
-              toUrl = normalizers.normalize(toUrl,
-                          URLNormalizers.SCOPE_OUTLINK); // normalize the url
-              toUrl = filters.filter(toUrl);   // filter the url
-              if (toUrl == null) {
-                continue;
-              }
-            } catch (Exception e) {
-              continue;
-            }
-            CrawlDatum target = new CrawlDatum(CrawlDatum.STATUS_LINKED, interval);
-            Text targetUrl = new Text(toUrl);
-            try {
-              scfilters.initialScore(targetUrl, target);
-            } catch (ScoringFilterException e) {
-              LOG.warn("Cannot filter init score for url " + key +
-                       ", using default: " + e.getMessage());
-              target.setScore(0.0f);
-            }
-            
-            targets.add(new SimpleEntry(targetUrl, target));
-            outlinkList.add(links[i]);
-            validCount++;
+          } catch (Exception e) {
+            continue;
           }
+          CrawlDatum target = new CrawlDatum(CrawlDatum.STATUS_LINKED, interval);
+          Text targetUrl = new Text(toUrl);
           try {
-            // compute score contributions and adjustment to the original score
-            adjust = scfilters.distributeScoreToOutlinks((Text)key, parseData, 
-                      targets, null, links.length);
+            scfilters.initialScore(targetUrl, target);
           } catch (ScoringFilterException e) {
-            LOG.warn("Cannot distribute score from " + key + ": " + e.getMessage());
+            LOG.warn("Cannot filter init score for url " + key
+                + ", using default: " + e.getMessage());
+            target.setScore(0.0f);
           }
-          for (Entry<Text, CrawlDatum> target : targets) {
-            crawlOut.append(target.getKey(), target.getValue());
-          }
-          if (adjust != null) crawlOut.append(key, adjust);
 
-          Outlink[] filteredLinks = outlinkList.toArray(new Outlink[outlinkList.size()]);
-          parseData = new ParseData(parseData.getStatus(), parseData.getTitle(), 
-                                    filteredLinks, parseData.getContentMeta(), 
-                                    parseData.getParseMeta());
-          dataOut.append(key, parseData);
-          if (!parse.isCanonical()) {
-            CrawlDatum datum = new CrawlDatum();
-            datum.setStatus(CrawlDatum.STATUS_FETCH_SUCCESS);
-            String timeString = parse.getData().getContentMeta().get(Nutch.FETCH_TIME_KEY);
-            try {
-              datum.setFetchTime(Long.parseLong(timeString));
-            } catch (Exception e) {
-              LOG.warn("Can't read fetch time for: " + key);
-              datum.setFetchTime(System.currentTimeMillis());
-            }
-            crawlOut.append(key, datum);
+          targets.add(new SimpleEntry(targetUrl, target));
+          outlinkList.add(links[i]);
+          validCount++;
+        }
+        try {
+          // compute score contributions and adjustment to the original score
+          adjust = scfilters.distributeScoreToOutlinks((Text) key, parseData,
+              targets, null, links.length);
+        } catch (ScoringFilterException e) {
+          LOG.warn("Cannot distribute score from " + key + ": "
+              + e.getMessage());
+        }
+        for (Entry<Text, CrawlDatum> target : targets) {
+          crawlOut.append(target.getKey(), target.getValue());
+        }
+        if (adjust != null)
+          crawlOut.append(key, adjust);
+
+        Outlink[] filteredLinks = outlinkList.toArray(new Outlink[outlinkList
+            .size()]);
+        parseData = new ParseData(parseData.getStatus(), parseData.getTitle(),
+            filteredLinks, parseData.getContentMeta(), parseData.getParseMeta());
+        dataOut.append(key, parseData);
+        if (!parse.isCanonical()) {
+          CrawlDatum datum = new CrawlDatum();
+          datum.setStatus(CrawlDatum.STATUS_FETCH_SUCCESS);
+          String timeString = parse.getData().getContentMeta()
+              .get(Nutch.FETCH_TIME_KEY);
+          try {
+            datum.setFetchTime(Long.parseLong(timeString));
+          } catch (Exception e) {
+            LOG.warn("Can't read fetch time for: " + key);
+            datum.setFetchTime(System.currentTimeMillis());
           }
+          crawlOut.append(key, datum);
         }
-        
-        public void close(Reporter reporter) throws IOException {
-          textOut.close();
-          dataOut.close();
-          crawlOut.close();
-        }
-        
-      };
-    
+      }
+
+      public void close(Reporter reporter) throws IOException {
+        textOut.close();
+        dataOut.close();
+        crawlOut.close();
+      }
+
+    };
+
   }
 
 }
Index: src/java/org/apache/nutch/parse/HtmlParseFilter.java
===================================================================
--- src/java/org/apache/nutch/parse/HtmlParseFilter.java	(revision 1188252)
+++ src/java/org/apache/nutch/parse/HtmlParseFilter.java	(working copy)
@@ -27,16 +27,19 @@
 import org.apache.nutch.plugin.Pluggable;
 import org.apache.nutch.protocol.Content;
 
-
-/** Extension point for DOM-based HTML parsers.  Permits one to add additional
- * metadata to HTML parses.  All plugins found which implement this extension
+/**
+ * Extension point for DOM-based HTML parsers. Permits one to add additional
+ * metadata to HTML parses. All plugins found which implement this extension
  * point are run sequentially on the parse.
  */
 public interface HtmlParseFilter extends Pluggable, Configurable {
   /** The name of the extension point. */
   final static String X_POINT_ID = HtmlParseFilter.class.getName();
 
-  /** Adds metadata or otherwise modifies a parse of HTML content, given
-   * the DOM tree of a page. */
-  ParseResult filter(Content content, ParseResult parseResult, HTMLMetaTags metaTags, DocumentFragment doc);
+  /**
+   * Adds metadata or otherwise modifies a parse of HTML content, given the DOM
+   * tree of a page.
+   */
+  ParseResult filter(Content content, ParseResult parseResult,
+      HTMLMetaTags metaTags, DocumentFragment doc);
 }
Index: src/java/org/apache/nutch/parse/ParseUtil.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseUtil.java	(revision 1188252)
+++ src/java/org/apache/nutch/parse/ParseUtil.java	(working copy)
@@ -27,124 +27,133 @@
 import org.apache.hadoop.conf.Configuration;
 import org.apache.nutch.protocol.Content;
 
-
 /**
  * A Utility class containing methods to simply perform parsing utilities such
  * as iterating through a preferred list of {@link Parser}s to obtain
  * {@link Parse} objects.
- *
+ * 
  * @author mattmann
  * @author J&eacute;r&ocirc;me Charron
  * @author S&eacute;bastien Le Callonnec
  */
 public class ParseUtil {
-  
+
   /* our log stream */
   public static final Logger LOG = LoggerFactory.getLogger(ParseUtil.class);
   private ParserFactory parserFactory;
   /** Parser timeout set to 30 sec by default. Set -1 to deactivate **/
   private int MAX_PARSE_TIME = 30;
-  
+
   /**
    * 
    * @param conf
    */
   public ParseUtil(Configuration conf) {
     this.parserFactory = new ParserFactory(conf);
-    MAX_PARSE_TIME=conf.getInt("parser.timeout", 30);
+    MAX_PARSE_TIME = conf.getInt("parser.timeout", 30);
   }
-  
+
   /**
    * Performs a parse by iterating through a List of preferred {@link Parser}s
    * until a successful parse is performed and a {@link Parse} object is
    * returned. If the parse is unsuccessful, a message is logged to the
    * <code>WARNING</code> level, and an empty parse is returned.
-   *
-   * @param content The content to try and parse.
+   * 
+   * @param content
+   *          The content to try and parse.
    * @return &lt;key, {@link Parse}&gt; pairs.
-   * @throws ParseException If no suitable parser is found to perform the parse.
+   * @throws ParseException
+   *           If no suitable parser is found to perform the parse.
    */
   public ParseResult parse(Content content) throws ParseException {
     Parser[] parsers = null;
-    
+
     try {
-      parsers = this.parserFactory.getParsers(content.getContentType(), 
-	         content.getUrl() != null ? content.getUrl():"");
+      parsers = this.parserFactory.getParsers(content.getContentType(),
+          content.getUrl() != null ? content.getUrl() : "");
     } catch (ParserNotFound e) {
       if (LOG.isWarnEnabled()) {
-        LOG.warn("No suitable parser found when trying to parse content " + content.getUrl() +
-               " of type " + content.getContentType());
+        LOG.warn("No suitable parser found when trying to parse content "
+            + content.getUrl() + " of type " + content.getContentType());
       }
       throw new ParseException(e.getMessage());
     }
-    
+
     ParseResult parseResult = null;
-    for (int i=0; i<parsers.length; i++) {
+    for (int i = 0; i < parsers.length; i++) {
       if (LOG.isDebugEnabled()) {
-        LOG.debug("Parsing [" + content.getUrl() + "] with [" + parsers[i] + "]");
+        LOG.debug("Parsing [" + content.getUrl() + "] with [" + parsers[i]
+            + "]");
       }
-      if (MAX_PARSE_TIME!=-1)
-      	parseResult = runParser(parsers[i], content);
-      else 
-      	parseResult = parsers[i].getParse(content);
+      if (MAX_PARSE_TIME != -1)
+        parseResult = runParser(parsers[i], content);
+      else
+        parseResult = parsers[i].getParse(content);
 
       if (parseResult != null && !parseResult.isEmpty())
         return parseResult;
     }
-   
-    if (LOG.isWarnEnabled()) { 
-      LOG.warn("Unable to successfully parse content " + content.getUrl() +
-               " of type " + content.getContentType());
+
+    if (LOG.isWarnEnabled()) {
+      LOG.warn("Unable to successfully parse content " + content.getUrl()
+          + " of type " + content.getContentType());
     }
-    return new ParseStatus(new ParseException("Unable to successfully parse content")).getEmptyParseResult(content.getUrl(), null);
+    return new ParseStatus(new ParseException(
+        "Unable to successfully parse content")).getEmptyParseResult(
+        content.getUrl(), null);
   }
-    
+
   /**
    * Method parses a {@link Content} object using the {@link Parser} specified
-   * by the parameter <code>extId</code>, i.e., the Parser's extension ID.
-   * If a suitable {@link Parser} is not found, then a <code>WARNING</code>
-   * level message is logged, and a ParseException is thrown. If the parse is
-   * uncessful for any other reason, then a <code>WARNING</code> level
-   * message is logged, and a <code>ParseStatus.getEmptyParse()</code> is
-   * returned.
-   *
-   * @param extId The extension implementation ID of the {@link Parser} to use
-   *              to parse the specified content.
-   * @param content The content to parse.
-   *
-   * @return &lt;key, {@link Parse}&gt; pairs if the parse is successful, otherwise,
-   *         a single &lt;key, <code>ParseStatus.getEmptyParse()</code>&gt; pair.
-   *
-   * @throws ParseException If there is no suitable {@link Parser} found
-   *                        to perform the parse.
+   * by the parameter <code>extId</code>, i.e., the Parser's extension ID. If a
+   * suitable {@link Parser} is not found, then a <code>WARNING</code> level
+   * message is logged, and a ParseException is thrown. If the parse is
+   * uncessful for any other reason, then a <code>WARNING</code> level message
+   * is logged, and a <code>ParseStatus.getEmptyParse()</code> is returned.
+   * 
+   * @param extId
+   *          The extension implementation ID of the {@link Parser} to use to
+   *          parse the specified content.
+   * @param content
+   *          The content to parse.
+   * 
+   * @return &lt;key, {@link Parse}&gt; pairs if the parse is successful,
+   *         otherwise, a single &lt;key,
+   *         <code>ParseStatus.getEmptyParse()</code>&gt; pair.
+   * 
+   * @throws ParseException
+   *           If there is no suitable {@link Parser} found to perform the
+   *           parse.
    */
   public ParseResult parseByExtensionId(String extId, Content content)
-  throws ParseException {
+      throws ParseException {
     Parser p = null;
-    
+
     try {
       p = this.parserFactory.getParserById(extId);
     } catch (ParserNotFound e) {
       if (LOG.isWarnEnabled()) {
-        LOG.warn("No suitable parser found when trying to parse content " + content.getUrl() +
-            " of type " + content.getContentType());
+        LOG.warn("No suitable parser found when trying to parse content "
+            + content.getUrl() + " of type " + content.getContentType());
       }
       throw new ParseException(e.getMessage());
     }
-    
+
     ParseResult parseResult = null;
-    if (MAX_PARSE_TIME!=-1)
-    	parseResult = runParser(p, content);
-    else 
-    	parseResult = p.getParse(content);
+    if (MAX_PARSE_TIME != -1)
+      parseResult = runParser(p, content);
+    else
+      parseResult = p.getParse(content);
     if (parseResult != null && !parseResult.isEmpty()) {
       return parseResult;
     } else {
       if (LOG.isWarnEnabled()) {
-        LOG.warn("Unable to successfully parse content " + content.getUrl() +
-            " of type " + content.getContentType());
-      }  
-      return new ParseStatus(new ParseException("Unable to successfully parse content")).getEmptyParseResult(content.getUrl(), null);
+        LOG.warn("Unable to successfully parse content " + content.getUrl()
+            + " of type " + content.getContentType());
+      }
+      return new ParseStatus(new ParseException(
+          "Unable to successfully parse content")).getEmptyParseResult(
+          content.getUrl(), null);
     }
   }
 
@@ -168,5 +177,4 @@
     }
     return res;
   }
-  
 }
Index: src/java/org/apache/nutch/parse/ParserNotFound.java
===================================================================
--- src/java/org/apache/nutch/parse/ParserNotFound.java	(revision 1188252)
+++ src/java/org/apache/nutch/parse/ParserNotFound.java	(working copy)
@@ -18,17 +18,17 @@
 
 public class ParserNotFound extends ParseException {
 
-  private static final long serialVersionUID=23993993939L;
+  private static final long serialVersionUID = 23993993939L;
   private String url;
   private String contentType;
 
-  public ParserNotFound(String message){
-    super(message);    
+  public ParserNotFound(String message) {
+    super(message);
   }
-  
+
   public ParserNotFound(String url, String contentType) {
-    this(url, contentType,
-         "parser not found for contentType="+contentType+" url="+url);
+    this(url, contentType, "parser not found for contentType=" + contentType
+        + " url=" + url);
   }
 
   public ParserNotFound(String url, String contentType, String message) {
@@ -37,6 +37,11 @@
     this.contentType = contentType;
   }
 
-  public String getUrl() { return url; }
-  public String getContentType() { return contentType; }
+  public String getUrl() {
+    return url;
+  }
+
+  public String getContentType() {
+    return contentType;
+  }
 }
Index: src/java/org/apache/nutch/parse/ParseResult.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseResult.java	(revision 1188252)
+++ src/java/org/apache/nutch/parse/ParseResult.java	(working copy)
@@ -27,94 +27,116 @@
 import org.apache.hadoop.io.Text;
 
 /**
- * A utility class that stores result of a parse. Internally
- * a ParseResult stores &lt;{@link Text}, {@link Parse}&gt; pairs.
- * <p>Parsers may return multiple results, which correspond to parts
- * or other associated documents related to the original URL.</p>
- * <p>There will be usually one parse result that corresponds directly
- * to the original URL, and possibly many (or none) results that correspond
- * to derived URLs (or sub-URLs).
+ * A utility class that stores result of a parse. Internally a ParseResult
+ * stores &lt;{@link Text}, {@link Parse}&gt; pairs.
+ * <p>
+ * Parsers may return multiple results, which correspond to parts or other
+ * associated documents related to the original URL.
+ * </p>
+ * <p>
+ * There will be usually one parse result that corresponds directly to the
+ * original URL, and possibly many (or none) results that correspond to derived
+ * URLs (or sub-URLs).
  */
 public class ParseResult implements Iterable<Map.Entry<Text, Parse>> {
   private Map<Text, Parse> parseMap;
   private String originalUrl;
-  
+
   public static final Logger LOG = LoggerFactory.getLogger(ParseResult.class);
-  
+
   /**
    * Create a container for parse results.
-   * @param originalUrl the original url from which all parse results
-   * have been obtained.
+   * 
+   * @param originalUrl
+   *          the original url from which all parse results have been obtained.
    */
   public ParseResult(String originalUrl) {
     parseMap = new HashMap<Text, Parse>();
     this.originalUrl = originalUrl;
   }
-  
+
   /**
    * Convenience method for obtaining {@link ParseResult} from a single
    * {@link Parse} output.
-   * @param url canonical url
-   * @param parse single parse output
+   * 
+   * @param url
+   *          canonical url
+   * @param parse
+   *          single parse output
    * @return result containing the single parse output
    */
   public static ParseResult createParseResult(String url, Parse parse) {
     ParseResult parseResult = new ParseResult(url);
-    parseResult.put(new Text(url), new ParseText(parse.getText()), parse.getData());
+    parseResult.put(new Text(url), new ParseText(parse.getText()),
+        parse.getData());
     return parseResult;
   }
-  
+
   /**
    * Checks whether the result is empty.
+   * 
    * @return
    */
   public boolean isEmpty() {
     return parseMap.isEmpty();
   }
-  
+
   /**
    * Return the number of parse outputs (both successful and failed)
    */
   public int size() {
     return parseMap.size();
   }
-  
+
   /**
    * Retrieve a single parse output.
-   * @param key sub-url under which the parse output is stored.
+   * 
+   * @param key
+   *          sub-url under which the parse output is stored.
    * @return parse output corresponding to this sub-url, or null.
    */
   public Parse get(String key) {
     return get(new Text(key));
   }
-  
+
   /**
    * Retrieve a single parse output.
-   * @param key sub-url under which the parse output is stored.
+   * 
+   * @param key
+   *          sub-url under which the parse output is stored.
    * @return parse output corresponding to this sub-url, or null.
    */
   public Parse get(Text key) {
     return parseMap.get(key);
   }
-  
+
   /**
    * Store a result of parsing.
-   * @param key URL or sub-url of this parse result
-   * @param text plain text result
-   * @param data corresponding parse metadata of this result
+   * 
+   * @param key
+   *          URL or sub-url of this parse result
+   * @param text
+   *          plain text result
+   * @param data
+   *          corresponding parse metadata of this result
    */
   public void put(Text key, ParseText text, ParseData data) {
     put(key.toString(), text, data);
   }
-  
+
   /**
    * Store a result of parsing.
-   * @param key URL or sub-url of this parse result
-   * @param text plain text result
-   * @param data corresponding parse metadata of this result
+   * 
+   * @param key
+   *          URL or sub-url of this parse result
+   * @param text
+   *          plain text result
+   * @param data
+   *          corresponding parse metadata of this result
    */
   public void put(String key, ParseText text, ParseData data) {
-    parseMap.put(new Text(key), new ParseImpl(text, data, key.equals(originalUrl)));
+    parseMap.put(new Text(key),
+        new ParseImpl(text, data, key.equals(originalUrl)));
   }
 
   /**
@@ -123,21 +145,21 @@
   public Iterator<Entry<Text, Parse>> iterator() {
     return parseMap.entrySet().iterator();
   }
-  
+
   /**
-   * Remove all results where status is not successful (as determined
-   * by {@link ParseStatus#isSuccess()}). Note that effects of this operation
+   * Remove all results where status is not successful (as determined by
+   * {@link ParseStatus#isSuccess()}). Note that effects of this operation
    * cannot be reversed.
    */
   public void filter() {
-    for(Iterator<Entry<Text, Parse>> i = iterator(); i.hasNext();) {
+    for (Iterator<Entry<Text, Parse>> i = iterator(); i.hasNext();) {
       Entry<Text, Parse> entry = i.next();
       if (!entry.getValue().getData().getStatus().isSuccess()) {
         LOG.warn(entry.getKey() + " is not parsed successfully, filtering");
         i.remove();
       }
     }
-      
+
   }
 
   /**
@@ -145,7 +167,7 @@
    * Parse success is determined by {@link ParseStatus#isSuccess()}
    */
   public boolean isSuccess() {
-    for(Iterator<Entry<Text, Parse>> i = iterator(); i.hasNext();) {
+    for (Iterator<Entry<Text, Parse>> i = iterator(); i.hasNext();) {
       Entry<Text, Parse> entry = i.next();
       if (!entry.getValue().getData().getStatus().isSuccess()) {
         return false;
Index: src/java/org/apache/nutch/parse/Parse.java
===================================================================
--- src/java/org/apache/nutch/parse/Parse.java	(revision 1188252)
+++ src/java/org/apache/nutch/parse/Parse.java	(working copy)
@@ -17,18 +17,22 @@
 
 package org.apache.nutch.parse;
 
-/** The result of parsing a page's raw content.
+/**
+ * The result of parsing a page's raw content.
+ * 
  * @see Parser#getParse(Content)
  */
 public interface Parse {
-  
-  /** The textual content of the page. This is indexed, searched, and used when
-   * generating snippets.*/ 
+
+  /**
+   * The textual content of the page. This is indexed, searched, and used when
+   * generating snippets.
+   */
   String getText();
 
   /** Other data extracted from the page. */
   ParseData getData();
-  
+
   /** Indicates if the parse is coming from a url or a sub-url */
   boolean isCanonical();
 }
Index: src/java/org/apache/nutch/parse/ParseStatus.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseStatus.java	(revision 1188252)
+++ src/java/org/apache/nutch/parse/ParseStatus.java	(working copy)
@@ -1,19 +1,19 @@
 /*
-* Licensed to the Apache Software Foundation (ASF) under one or more
-* contributor license agreements.  See the NOTICE file distributed with
-* this work for additional information regarding copyright ownership.
-* The ASF licenses this file to You under the Apache License, Version 2.0
-* (the "License"); you may not use this file except in compliance with
-* the License.  You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 /*
  * Created on Apr 28, 2005
  * Author: Andrzej Bialecki &lt;ab@getopt.org&gt;
@@ -32,113 +32,121 @@
 
 import org.apache.nutch.metadata.Metadata;
 
-
 /**
  * @author Andrzej Bialecki &lt;ab@getopt.org&gt;
  */
 public class ParseStatus implements Writable {
-  
+
   private final static byte VERSION = 2;
-  
+
   // Primary status codes:
-  
+
   /** Parsing was not performed. */
-  public static final byte NOTPARSED       = 0;
+  public static final byte NOTPARSED = 0;
   /** Parsing succeeded. */
-  public static final byte SUCCESS         = 1;
-  /** General failure. There may be a more specific error message in arguments. */
-  public static final byte FAILED          = 2;
-  
-  public static final String[] majorCodes = {
-          "notparsed",
-          "success",
-          "failed"
-  };
-  
+  public static final byte SUCCESS = 1;
+  /**
+   * General failure. There may be a more specific error message in arguments.
+   */
+  public static final byte FAILED = 2;
+
+  public static final String[] majorCodes = { "notparsed", "success", "failed" };
+
   // Secondary success codes go here:
-  
-  /** Parsed content contains a directive to redirect to another URL.
-   * The target URL can be retrieved from the arguments.
+
+  /**
+   * Parsed content contains a directive to redirect to another URL. The target
+   * URL can be retrieved from the arguments.
    */
-  public static final short SUCCESS_REDIRECT          = 100;
-  
+  public static final short SUCCESS_REDIRECT = 100;
+
   // Secondary failure codes go here:
-  
-  /** Parsing failed. An Exception occured (which may be retrieved from the arguments). */
-  public static final short FAILED_EXCEPTION          = 200;
-  /** Parsing failed. Content was truncated, but the parser cannot handle incomplete content. */
-  public static final short FAILED_TRUNCATED          = 202;
-  /** Parsing failed. Invalid format - the content may be corrupted or of wrong type. */
-  public static final short FAILED_INVALID_FORMAT     = 203;
-  /** Parsing failed. Other related parts of the content are needed to complete
+
+  /**
+   * Parsing failed. An Exception occured (which may be retrieved from the
+   * arguments).
+   */
+  public static final short FAILED_EXCEPTION = 200;
+  /**
+   * Parsing failed. Content was truncated, but the parser cannot handle
+   * incomplete content.
+   */
+  public static final short FAILED_TRUNCATED = 202;
+  /**
+   * Parsing failed. Invalid format - the content may be corrupted or of wrong
+   * type.
+   */
+  public static final short FAILED_INVALID_FORMAT = 203;
+  /**
+   * Parsing failed. Other related parts of the content are needed to complete
    * parsing. The list of URLs to missing parts may be provided in arguments.
    * The Fetcher may decide to fetch these parts at once, then put them into
    * Content.metadata, and supply them for re-parsing.
    */
-  public static final short FAILED_MISSING_PARTS      = 204;
-  /** Parsing failed. There was no content to be parsed - probably caused
-   * by errors at protocol stage.
+  public static final short FAILED_MISSING_PARTS = 204;
+  /**
+   * Parsing failed. There was no content to be parsed - probably caused by
+   * errors at protocol stage.
    */
-  public static final short FAILED_MISSING_CONTENT    = 205;
+  public static final short FAILED_MISSING_CONTENT = 205;
 
-
   public static final ParseStatus STATUS_NOTPARSED = new ParseStatus(NOTPARSED);
   public static final ParseStatus STATUS_SUCCESS = new ParseStatus(SUCCESS);
   public static final ParseStatus STATUS_FAILURE = new ParseStatus(FAILED);
-  
+
   private byte majorCode = 0;
   private short minorCode = 0;
   private String[] args = null;
-  
+
   public byte getVersion() {
     return VERSION;
   }
 
   public ParseStatus() {
-    
+
   }
-  
+
   public ParseStatus(int majorCode, int minorCode, String[] args) {
     this.args = args;
-    this.majorCode = (byte)majorCode;
-    this.minorCode = (short)minorCode;
+    this.majorCode = (byte) majorCode;
+    this.minorCode = (short) minorCode;
   }
-  
+
   public ParseStatus(int majorCode) {
-    this(majorCode, 0, (String[])null);
+    this(majorCode, 0, (String[]) null);
   }
-  
+
   public ParseStatus(int majorCode, String[] args) {
     this(majorCode, 0, args);
   }
-  
+
   public ParseStatus(int majorCode, int minorCode) {
-    this(majorCode, minorCode, (String[])null);
+    this(majorCode, minorCode, (String[]) null);
   }
-  
+
   /** Simplified constructor for passing just a text message. */
   public ParseStatus(int majorCode, int minorCode, String message) {
-    this(majorCode, minorCode, new String[]{message});
+    this(majorCode, minorCode, new String[] { message });
   }
-  
+
   /** Simplified constructor for passing just a text message. */
   public ParseStatus(int majorCode, String message) {
-    this(majorCode, 0, new String[]{message});
+    this(majorCode, 0, new String[] { message });
   }
-  
+
   public ParseStatus(Throwable t) {
-    this(FAILED, FAILED_EXCEPTION, new String[]{t.toString()});
+    this(FAILED, FAILED_EXCEPTION, new String[] { t.toString() });
   }
-  
+
   public static ParseStatus read(DataInput in) throws IOException {
     ParseStatus res = new ParseStatus();
     res.readFields(in);
     return res;
   }
-  
+
   public void readFields(DataInput in) throws IOException {
     byte version = in.readByte();
-    switch(version) {
+    switch (version) {
     case 1:
       majorCode = in.readByte();
       minorCode = in.readShort();
@@ -152,8 +160,8 @@
     default:
       throw new VersionMismatchException(VERSION, version);
     }
- }
-  
+  }
+
   public void write(DataOutput out) throws IOException {
     out.writeByte(VERSION);
     out.writeByte(majorCode);
@@ -164,55 +172,61 @@
       WritableUtils.writeStringArray(out, args);
     }
   }
-  
-  /** A convenience method. Returns true if majorCode is SUCCESS, false
+
+  /**
+   * A convenience method. Returns true if majorCode is SUCCESS, false
    * otherwise.
    */
-  
+
   public boolean isSuccess() {
     return majorCode == SUCCESS;
   }
-  
-  /** A convenience method. Return a String representation of the first
-   * argument, or null.
+
+  /**
+   * A convenience method. Return a String representation of the first argument,
+   * or null.
    */
   public String getMessage() {
     if (args != null && args.length > 0 && args[0] != null)
       return args[0];
     return null;
   }
-  
+
   public String[] getArgs() {
     return args;
   }
-  
+
   public int getMajorCode() {
     return majorCode;
   }
-  
+
   public int getMinorCode() {
     return minorCode;
   }
-  
-  /** A convenience method. Creates an empty Parse instance,
-   * which returns this status.
+
+  /**
+   * A convenience method. Creates an empty Parse instance, which returns this
+   * status.
    */
   public Parse getEmptyParse(Configuration conf) {
     return new EmptyParseImpl(this, conf);
   }
-  
-  /** A convenience method. Creates an empty ParseResult,
-   * which contains this status.
+
+  /**
+   * A convenience method. Creates an empty ParseResult, which contains this
+   * status.
    */
   public ParseResult getEmptyParseResult(String url, Configuration conf) {
     return ParseResult.createParseResult(url, getEmptyParse(conf));
   }
-  
+
   public String toString() {
     StringBuffer res = new StringBuffer();
     String name = null;
-    if (majorCode >= 0 && majorCode < majorCodes.length) name = majorCodes[majorCode];
-    else name = "UNKNOWN!";
+    if (majorCode >= 0 && majorCode < majorCodes.length)
+      name = majorCodes[majorCode];
+    else
+      name = "UNKNOWN!";
     res.append(name + "(" + majorCode + "," + minorCode + ")");
     if (args != null) {
       if (args.length == 1) {
@@ -226,18 +240,18 @@
     }
     return res.toString();
   }
-  
+
   public void setArgs(String[] args) {
     this.args = args;
   }
-  
+
   public void setMessage(String msg) {
     if (args == null || args.length == 0) {
       args = new String[1];
     }
     args[0] = msg;
   }
-  
+
   public void setMajorCode(byte majorCode) {
     this.majorCode = majorCode;
   }
@@ -245,37 +259,45 @@
   public void setMinorCode(short minorCode) {
     this.minorCode = minorCode;
   }
-  
+
   public boolean equals(Object o) {
-    if (o == null) return false;
-    if (!(o instanceof ParseStatus)) return false;
+    if (o == null)
+      return false;
+    if (!(o instanceof ParseStatus))
+      return false;
     boolean res = true;
-    ParseStatus other = (ParseStatus)o;
-    res = res && (this.majorCode == other.majorCode) &&
-      (this.minorCode == other.minorCode);
-    if (!res) return res;
+    ParseStatus other = (ParseStatus) o;
+    res = res && (this.majorCode == other.majorCode)
+        && (this.minorCode == other.minorCode);
+    if (!res)
+      return res;
     if (this.args == null) {
-      if (other.args == null) return true;
-      else return false;
+      if (other.args == null)
+        return true;
+      else
+        return false;
     } else {
-      if (other.args == null) return false;
-      if (other.args.length != this.args.length) return false;
+      if (other.args == null)
+        return false;
+      if (other.args.length != this.args.length)
+        return false;
       for (int i = 0; i < this.args.length; i++) {
-        if (!this.args[i].equals(other.args[i])) return false;
+        if (!this.args[i].equals(other.args[i]))
+          return false;
       }
     }
     return true;
   }
-  
+
   private static class EmptyParseImpl implements Parse {
-    
+
     private ParseData data = null;
-    
+
     public EmptyParseImpl(ParseStatus status, Configuration conf) {
-      data = new ParseData(status, "", new Outlink[0],
-                           new Metadata(), new Metadata());
+      data = new ParseData(status, "", new Outlink[0], new Metadata(),
+          new Metadata());
     }
-    
+
     public ParseData getData() {
       return data;
     }
@@ -283,10 +305,9 @@
     public String getText() {
       return "";
     }
-    
+
     public boolean isCanonical() {
       return true;
     }
   }
 }
-
Index: src/java/org/apache/nutch/parse/ParseText.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseText.java	(revision 1188252)
+++ src/java/org/apache/nutch/parse/ParseText.java	(working copy)
@@ -33,10 +33,12 @@
 
   private final static byte VERSION = 2;
 
-  public ParseText() {}
+  public ParseText() {
+  }
+
   private String text;
-    
-  public ParseText(String text){
+
+  public ParseText(String text) {
     this.text = text;
   }
 
@@ -68,12 +70,14 @@
   //
   // Accessor methods
   //
-  public String getText()  { return text; }
+  public String getText() {
+    return text;
+  }
 
   public boolean equals(Object o) {
     if (!(o instanceof ParseText))
       return false;
-    ParseText other = (ParseText)o;
+    ParseText other = (ParseText) o;
     return this.text.equals(other.text);
   }
 
@@ -90,12 +94,11 @@
     }
     Options opts = new Options();
     Configuration conf = NutchConfiguration.create();
-    
-    GenericOptionsParser parser =
-      new GenericOptionsParser(conf, opts, argv);
-    
+
+    GenericOptionsParser parser = new GenericOptionsParser(conf, opts, argv);
+
     String[] remainingArgs = parser.getRemainingArgs();
-    
+
     FileSystem fs = FileSystem.get(conf);
     try {
       int recno = Integer.parseInt(remainingArgs[0]);
Index: src/java/org/apache/nutch/parse/OutlinkExtractor.java
===================================================================
--- src/java/org/apache/nutch/parse/OutlinkExtractor.java	(revision 1188252)
+++ src/java/org/apache/nutch/parse/OutlinkExtractor.java	(working copy)
@@ -34,8 +34,8 @@
 import org.apache.oro.text.regex.Perl5Matcher;
 
 /**
- * Extractor to extract {@link org.apache.nutch.parse.Outlink}s 
- * / URLs from plain text using Regular Expressions.
+ * Extractor to extract {@link org.apache.nutch.parse.Outlink}s / URLs from
+ * plain text using Regular Expressions.
  * 
  * @see <a
  *      href="http://wiki.java.net/bin/view/Javapedia/RegularExpressions">Comparison
@@ -48,24 +48,27 @@
  * @since 0.7
  */
 public class OutlinkExtractor {
-  private static final Logger LOG = LoggerFactory.getLogger(OutlinkExtractor.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(OutlinkExtractor.class);
 
   /**
    * Regex pattern to get URLs within a plain text.
    * 
    * @see <a
    *      href="http://www.truerwords.net/articles/ut/urlactivation.html">http://www.truerwords.net/articles/ut/urlactivation.html
+
    *      </a>
    */
-  private static final String URL_PATTERN = 
-    "([A-Za-z][A-Za-z0-9+.-]{1,120}:[A-Za-z0-9/](([A-Za-z0-9$_.+!*,;/?:@&~=-])|%[A-Fa-f0-9]{2}){1,333}(#([a-zA-Z0-9][a-zA-Z0-9$_.+!*,;/?:@&~=%-]{0,1000}))?)";
+  private static final String URL_PATTERN = "([A-Za-z][A-Za-z0-9+.-]{1,120}:[A-Za-z0-9/](([A-Za-z0-9$_.+!*,;/?:@&~=-])|%[A-Fa-f0-9]{2}){1,333}(#([a-zA-Z0-9][a-zA-Z0-9$_.+!*,;/?:@&~=%-]{0,1000}))?)";
 
   /**
-   * Extracts <code>Outlink</code> from given plain text.
-   * Applying this method to non-plain-text can result in extremely lengthy
-   * runtimes for parasitic cases (postscript is a known example).
-   * @param plainText  the plain text from wich URLs should be extracted.
+   * Extracts <code>Outlink</code> from given plain text. Applying this method
+   * to non-plain-text can result in extremely lengthy runtimes for parasitic
+   * cases (postscript is a known example).
    * 
+   * @param plainText
+   *          the plain text from wich URLs should be extracted.
+   * 
    * @return Array of <code>Outlink</code>s within found in plainText
    */
   public static Outlink[] getOutlinks(final String plainText, Configuration conf) {
@@ -73,15 +76,18 @@
   }
 
   /**
-   * Extracts <code>Outlink</code> from given plain text and adds anchor
-   * to the extracted <code>Outlink</code>s
+   * Extracts <code>Outlink</code> from given plain text and adds anchor to the
+   * extracted <code>Outlink</code>s
    * 
-   * @param plainText the plain text from wich URLs should be extracted.
-   * @param anchor    the anchor of the url
+   * @param plainText
+   *          the plain text from wich URLs should be extracted.
+   * @param anchor
+   *          the anchor of the url
    * 
    * @return Array of <code>Outlink</code>s within found in plainText
    */
-  public static Outlink[] getOutlinks(final String plainText, String anchor, Configuration conf) {
+  public static Outlink[] getOutlinks(final String plainText, String anchor,
+      Configuration conf) {
     long start = System.currentTimeMillis();
     final List<Outlink> outlinks = new ArrayList<Outlink>();
 
@@ -97,11 +103,11 @@
       MatchResult result;
       String url;
 
-      //loop the matches
+      // loop the matches
       while (matcher.contains(input, pattern)) {
         // if this is taking too long, stop matching
-        //   (SHOULD really check cpu time used so that heavily loaded systems
-        //   do not unnecessarily hit this limit.)
+        // (SHOULD really check cpu time used so that heavily loaded systems
+        // do not unnecessarily hit this limit.)
         if (System.currentTimeMillis() - start >= 60000L) {
           if (LOG.isWarnEnabled()) {
             LOG.warn("Time limit exceeded for getOutLinks");
@@ -117,13 +123,16 @@
         }
       }
     } catch (Exception ex) {
-      // if the matcher fails (perhaps a malformed URL) we just log it and move on
-      if (LOG.isErrorEnabled()) { LOG.error("getOutlinks", ex); }
+      // if the matcher fails (perhaps a malformed URL) we just log it and move
+      // on
+      if (LOG.isErrorEnabled()) {
+        LOG.error("getOutlinks", ex);
+      }
     }
 
     final Outlink[] retval;
 
-    //create array of the Outlinks
+    // create array of the Outlinks
     if (outlinks != null && outlinks.size() > 0) {
       retval = outlinks.toArray(new Outlink[0]);
     } else {
@@ -132,7 +141,6 @@
 
     return retval;
   }
-  
 
   /**
    * Extracts outlinks from a plain text. <br />
@@ -161,7 +169,7 @@
     // url = re.getParen(0);
     //
     // if (LOG.isTraceEnabled()) {
-    //   LOG.trace("Extracted url: " + url);
+    // LOG.trace("Extracted url: " + url);
     // }
     //
     // try {
@@ -191,9 +199,8 @@
   }
 
   /**
-   * Extracts outlinks from a plain text.
-   * </p>
-   * This Method takes the JDK5 Regexp API.
+   * Extracts outlinks from a plain text. </p> This Method takes the JDK5 Regexp
+   * API.
    * 
    * @param plainText
    * 
@@ -241,5 +248,5 @@
     //
     // return retval;
   }
- 
+
 }
Index: src/java/org/apache/nutch/parse/ParserFactory.java
===================================================================
--- src/java/org/apache/nutch/parse/ParserFactory.java	(revision 1188252)
+++ src/java/org/apache/nutch/parse/ParserFactory.java	(working copy)
@@ -39,18 +39,17 @@
 import org.apache.nutch.util.MimeUtil;
 import org.apache.nutch.util.ObjectCache;
 
+/** Creates and caches {@link Parser} plugins. */
+public final class ParserFactory {
 
-/** Creates and caches {@link Parser} plugins.*/
-public final class ParserFactory {
-  
   public static final Logger LOG = LoggerFactory.getLogger(ParserFactory.class);
-  
+
   /** Wildcard for default plugins. */
   public static final String DEFAULT_PLUGIN = "*";
-  
+
   /** Empty extension list for caching purposes. */
   private final List EMPTY_EXTENSION_LIST = Collections.EMPTY_LIST;
-  
+
   private Configuration conf;
   private ExtensionPoint extensionPoint;
   private ParsePluginList parsePluginList;
@@ -60,10 +59,12 @@
     ObjectCache objectCache = ObjectCache.get(conf);
     this.extensionPoint = PluginRepository.get(conf).getExtensionPoint(
         Parser.X_POINT_ID);
-    this.parsePluginList = (ParsePluginList)objectCache.getObject(ParsePluginList.class.getName());
+    this.parsePluginList = (ParsePluginList) objectCache
+        .getObject(ParsePluginList.class.getName());
     if (this.parsePluginList == null) {
       this.parsePluginList = new ParsePluginsReader().parse(conf);
-      objectCache.setObject(ParsePluginList.class.getName(), this.parsePluginList);
+      objectCache.setObject(ParsePluginList.class.getName(),
+          this.parsePluginList);
     }
 
     if (this.extensionPoint == null) {
@@ -73,45 +74,46 @@
       throw new RuntimeException(
           "Parse Plugins preferences could not be loaded.");
     }
-  }                      
-  
-   
+  }
+
   /**
    * Function returns an array of {@link Parser}s for a given content type.
-   *
+   * 
    * The function consults the internal list of parse plugins for the
-   * ParserFactory to determine the list of pluginIds, then gets the
-   * appropriate extension points to instantiate as {@link Parser}s.
-   *
-   * @param contentType The contentType to return the <code>Array</code>
-   *                    of {@link Parser}s for.
-   * @param url The url for the content that may allow us to get the type from
-   *            the file suffix.
+   * ParserFactory to determine the list of pluginIds, then gets the appropriate
+   * extension points to instantiate as {@link Parser}s.
+   * 
+   * @param contentType
+   *          The contentType to return the <code>Array</code> of {@link Parser}
+   *          s for.
+   * @param url
+   *          The url for the content that may allow us to get the type from the
+   *          file suffix.
    * @return An <code>Array</code> of {@link Parser}s for the given contentType.
    *         If there were plugins mapped to a contentType via the
-   *         <code>parse-plugins.xml</code> file, but never enabled via
-   *         the <code>plugin.includes</code> Nutch conf, then those plugins
-   *         won't be part of this array, i.e., they will be skipped.
-   *         So, if the ordered list of parsing plugins for
-   *         <code>text/plain</code> was <code>[parse-text,parse-html,
+   *         <code>parse-plugins.xml</code> file, but never enabled via the
+   *         <code>plugin.includes</code> Nutch conf, then those plugins won't
+   *         be part of this array, i.e., they will be skipped. So, if the
+   *         ordered list of parsing plugins for <code>text/plain</code> was
+   *         <code>[parse-text,parse-html,
    *         parse-rtf]</code>, and only <code>parse-html</code> and
    *         <code>parse-rtf</code> were enabled via
-   *         <code>plugin.includes</code>, then this ordered Array would
-   *         consist of two {@link Parser} interfaces,
+   *         <code>plugin.includes</code>, then this ordered Array would consist
+   *         of two {@link Parser} interfaces,
    *         <code>[parse-html, parse-rtf]</code>.
    */
   public Parser[] getParsers(String contentType, String url)
-  throws ParserNotFound {
-    
+      throws ParserNotFound {
+
     List<Parser> parsers = null;
     List<Extension> parserExts = null;
-    
+
     ObjectCache objectCache = ObjectCache.get(conf);
-    
+
     // TODO once the MimeTypes is available
     // parsers = getExtensions(MimeUtils.map(contentType));
     // if (parsers != null) {
-    //   return parsers;
+    // return parsers;
     // }
     // Last Chance: Guess content-type from file url...
     // parsers = getExtensions(MimeUtils.getMimeType(url));
@@ -122,51 +124,52 @@
     }
 
     parsers = new Vector<Parser>(parserExts.size());
-    for (Iterator i=parserExts.iterator(); i.hasNext(); ){
+    for (Iterator i = parserExts.iterator(); i.hasNext();) {
       Extension ext = (Extension) i.next();
       Parser p = null;
       try {
-        //check to see if we've cached this parser instance yet
+        // check to see if we've cached this parser instance yet
         p = (Parser) objectCache.getObject(ext.getId());
         if (p == null) {
           // go ahead and instantiate it and then cache it
           p = (Parser) ext.getExtensionInstance();
-          objectCache.setObject(ext.getId(),p);
+          objectCache.setObject(ext.getId(), p);
         }
         parsers.add(p);
       } catch (PluginRuntimeException e) {
         if (LOG.isWarnEnabled()) {
           e.printStackTrace(LogUtil.getWarnStream(LOG));
           LOG.warn("ParserFactory:PluginRuntimeException when "
-                 + "initializing parser plugin "
-                 + ext.getDescriptor().getPluginId()
-                 + " instance in getParsers "
-                 + "function: attempting to continue instantiating parsers");
+              + "initializing parser plugin "
+              + ext.getDescriptor().getPluginId() + " instance in getParsers "
+              + "function: attempting to continue instantiating parsers");
         }
       }
     }
-    return parsers.toArray(new Parser[]{});
+    return parsers.toArray(new Parser[] {});
   }
-    
+
   /**
    * Function returns a {@link Parser} instance with the specified
-   * <code>extId</code>, representing its extension ID. If the Parser
-   * instance isn't found, then the function throws a
-   * <code>ParserNotFound</code> exception. If the function is able to find
-   * the {@link Parser} in the internal <code>PARSER_CACHE</code> then it
-   * will return the already instantiated Parser. Otherwise, if it has to
-   * instantiate the Parser itself , then this function will cache that Parser
-   * in the internal <code>PARSER_CACHE</code>.
+   * <code>extId</code>, representing its extension ID. If the Parser instance
+   * isn't found, then the function throws a <code>ParserNotFound</code>
+   * exception. If the function is able to find the {@link Parser} in the
+   * internal <code>PARSER_CACHE</code> then it will return the already
+   * instantiated Parser. Otherwise, if it has to instantiate the Parser itself
+   * , then this function will cache that Parser in the internal
+   * <code>PARSER_CACHE</code>.
    * 
-   * @param id The string extension ID (e.g.,
-   *        "org.apache.nutch.parse.rss.RSSParser",
-   *        "org.apache.nutch.parse.rtf.RTFParseFactory") of the {@link Parser}
-   *        implementation to return.
+   * @param id
+   *          The string extension ID (e.g.,
+   *          "org.apache.nutch.parse.rss.RSSParser",
+   *          "org.apache.nutch.parse.rtf.RTFParseFactory") of the
+   *          {@link Parser} implementation to return.
    * @return A {@link Parser} implementation specified by the parameter
    *         <code>id</code>.
-   * @throws ParserNotFound If the Parser is not found (i.e., registered with
-   *         the extension point), or if the there a
-   *         {@link PluginRuntimeException} instantiating the {@link Parser}.
+   * @throws ParserNotFound
+   *           If the Parser is not found (i.e., registered with the extension
+   *           point), or if the there a {@link PluginRuntimeException}
+   *           instantiating the {@link Parser}.
    */
   public Parser getParserById(String id) throws ParserNotFound {
 
@@ -174,7 +177,7 @@
     Extension parserExt = null;
 
     ObjectCache objectCache = ObjectCache.get(conf);
-    
+
     if (id != null) {
       parserExt = getExtension(extensions, id);
     }
@@ -185,12 +188,12 @@
     if (parserExt == null) {
       throw new ParserNotFound("No Parser Found for id [" + id + "]");
     }
-    
-    // first check the cache	    	   
+
+    // first check the cache
     if (objectCache.getObject(parserExt.getId()) != null) {
       return (Parser) objectCache.getObject(parserExt.getId());
 
-    // if not found in cache, instantiate the Parser    
+      // if not found in cache, instantiate the Parser
     } else {
       try {
         Parser p = (Parser) parserExt.getExtensionInstance();
@@ -198,30 +201,30 @@
         return p;
       } catch (PluginRuntimeException e) {
         if (LOG.isWarnEnabled()) {
-          LOG.warn("Canno initialize parser " +
-                   parserExt.getDescriptor().getPluginId() +
-                   " (cause: " + e.toString());
+          LOG.warn("Canno initialize parser "
+              + parserExt.getDescriptor().getPluginId() + " (cause: "
+              + e.toString());
         }
         throw new ParserNotFound("Cannot init parser for id [" + id + "]");
       }
     }
   }
-  
+
   /**
    * Finds the best-suited parse plugin for a given contentType.
    * 
-   * @param contentType Content-Type for which we seek a parse plugin.
-   * @return a list of extensions to be used for this contentType.
-   *         If none, returns <code>null</code>.
+   * @param contentType
+   *          Content-Type for which we seek a parse plugin.
+   * @return a list of extensions to be used for this contentType. If none,
+   *         returns <code>null</code>.
    */
   protected List<Extension> getExtensions(String contentType) {
-    
+
     ObjectCache objectCache = ObjectCache.get(conf);
     // First of all, tries to clean the content-type
     String type = null;
     type = MimeUtil.cleanMimeType(contentType);
 
-
     List<Extension> extensions = (List<Extension>) objectCache.getObject(type);
 
     // Just compare the reference:
@@ -229,100 +232,105 @@
     if (extensions == EMPTY_EXTENSION_LIST) {
       return null;
     }
-    
+
     if (extensions == null) {
       extensions = findExtensions(type);
       if (extensions != null) {
         objectCache.setObject(type, extensions);
       } else {
-      	// Put the empty extension list into cache
-      	// to remember we don't know any related extension.
+        // Put the empty extension list into cache
+        // to remember we don't know any related extension.
         objectCache.setObject(type, EMPTY_EXTENSION_LIST);
       }
     }
     return extensions;
   }
-  
+
   /**
    * searches a list of suitable parse plugins for the given contentType.
-   * <p>It first looks for a preferred plugin defined in the parse-plugin
-   * file.  If none is found, it returns a list of default plugins.
+   * <p>
+   * It first looks for a preferred plugin defined in the parse-plugin file. If
+   * none is found, it returns a list of default plugins.
    * 
-   * @param contentType Content-Type for which we seek a parse plugin.
-   * @return List - List of extensions to be used for this contentType.
-   *                If none, returns null.
+   * @param contentType
+   *          Content-Type for which we seek a parse plugin.
+   * @return List - List of extensions to be used for this contentType. If none,
+   *         returns null.
    */
   private List<Extension> findExtensions(String contentType) {
-    
+
     Extension[] extensions = this.extensionPoint.getExtensions();
-    
+
     // Look for a preferred plugin.
-    List<String> parsePluginList =
-      this.parsePluginList.getPluginList(contentType);
-    List<Extension> extensionList =
-      matchExtensions(parsePluginList, extensions, contentType);
+    List<String> parsePluginList = this.parsePluginList
+        .getPluginList(contentType);
+    List<Extension> extensionList = matchExtensions(parsePluginList,
+        extensions, contentType);
     if (extensionList != null) {
       return extensionList;
     }
-    
+
     // If none found, look for a default plugin.
     parsePluginList = this.parsePluginList.getPluginList(DEFAULT_PLUGIN);
     return matchExtensions(parsePluginList, extensions, DEFAULT_PLUGIN);
   }
-  
+
   /**
    * Tries to find a suitable parser for the given contentType.
    * <ol>
-   * <li>It checks if a parser which accepts the contentType
-   * can be found in the <code>plugins</code> list;</li>
-   * <li>If this list is empty, it tries to find amongst the loaded
-   * extensions whether some of them might suit and warns the user.</li>
+   * <li>It checks if a parser which accepts the contentType can be found in the
+   * <code>plugins</code> list;</li>
+   * <li>If this list is empty, it tries to find amongst the loaded extensions
+   * whether some of them might suit and warns the user.</li>
    * </ol>
-   * @param plugins List of candidate plugins.
-   * @param extensions Array of loaded extensions.
-   * @param contentType Content-Type for which we seek a parse plugin.
-   * @return List - List of extensions to be used for this contentType.
-   *                If none, returns null.
+   * 
+   * @param plugins
+   *          List of candidate plugins.
+   * @param extensions
+   *          Array of loaded extensions.
+   * @param contentType
+   *          Content-Type for which we seek a parse plugin.
+   * @return List - List of extensions to be used for this contentType. If none,
+   *         returns null.
    */
   private List<Extension> matchExtensions(List<String> plugins,
-                               Extension[] extensions,
-                               String contentType) {
-    
+      Extension[] extensions, String contentType) {
+
     List<Extension> extList = new ArrayList<Extension>();
     if (plugins != null) {
-      
+
       for (String parsePluginId : plugins) {
-        
+
         Extension ext = getExtension(extensions, parsePluginId, contentType);
         // the extension returned may be null
         // that means that it was not enabled in the plugin.includes
         // nutch conf property, but it was mapped in the
         // parse-plugins.xml
-        // file. 
+        // file.
         // OR it was enabled in plugin.includes, but the plugin's plugin.xml
         // file does not claim that the plugin supports the specified mimeType
         // in either case, LOG the appropriate error message to WARN level
-        
+
         if (ext == null) {
-          //try to get it just by its pluginId
+          // try to get it just by its pluginId
           ext = getExtension(extensions, parsePluginId);
-          
-          if (LOG.isWarnEnabled()) { 
+
+          if (LOG.isWarnEnabled()) {
             if (ext != null) {
               // plugin was enabled via plugin.includes
               // its plugin.xml just doesn't claim to support that
               // particular mimeType
-              LOG.warn("ParserFactory:Plugin: " + parsePluginId +
-                       " mapped to contentType " + contentType +
-                       " via parse-plugins.xml, but " + "its plugin.xml " +
-                       "file does not claim to support contentType: " +
-                       contentType);
+              LOG.warn("ParserFactory:Plugin: " + parsePluginId
+                  + " mapped to contentType " + contentType
+                  + " via parse-plugins.xml, but " + "its plugin.xml "
+                  + "file does not claim to support contentType: "
+                  + contentType);
             } else {
               // plugin wasn't enabled via plugin.includes
-              LOG.warn("ParserFactory: Plugin: " + parsePluginId + 
-                       " mapped to contentType " + contentType +
-                       " via parse-plugins.xml, but not enabled via " +
-                       "plugin.includes in nutch-default.xml");                     
+              LOG.warn("ParserFactory: Plugin: " + parsePluginId
+                  + " mapped to contentType " + contentType
+                  + " via parse-plugins.xml, but not enabled via "
+                  + "plugin.includes in nutch-default.xml");
             }
           }
         }
@@ -332,7 +340,7 @@
           extList.add(ext);
         }
       }
-      
+
     } else {
       // okay, there were no list of plugins defined for
       // this mimeType, however, there may be plugins registered
@@ -341,75 +349,78 @@
       // so, iterate through the list of extensions and if you find
       // any extensions where this is the case, throw a
       // NotMappedParserException
-      
-      for (int i=0; i<extensions.length; i++) {
-      	if ("*".equals(extensions[i].getAttribute("contentType"))){
+
+      for (int i = 0; i < extensions.length; i++) {
+        if ("*".equals(extensions[i].getAttribute("contentType"))) {
           extList.add(0, extensions[i]);
-        }
-        else if (extensions[i].getAttribute("contentType") != null
-            && contentType.matches(escapeContentType(extensions[i].getAttribute("contentType")))) {
+        } else if (extensions[i].getAttribute("contentType") != null
+            && contentType.matches(escapeContentType(extensions[i]
+                .getAttribute("contentType")))) {
           extList.add(extensions[i]);
         }
       }
-      
+
       if (extList.size() > 0) {
         if (LOG.isInfoEnabled()) {
           StringBuffer extensionsIDs = new StringBuffer("[");
           boolean isFirst = true;
-          for (Extension ext : extList){
-        	  if (!isFirst) extensionsIDs.append(" - ");
-        	  else isFirst=false;
-        	  extensionsIDs.append(ext.getId());
+          for (Extension ext : extList) {
+            if (!isFirst)
+              extensionsIDs.append(" - ");
+            else
+              isFirst = false;
+            extensionsIDs.append(ext.getId());
           }
-    	  extensionsIDs.append("]");
-          LOG.info("The parsing plugins: " + extensionsIDs.toString() +
-                   " are enabled via the plugin.includes system " +
-                   "property, and all claim to support the content type " +
-                   contentType + ", but they are not mapped to it  in the " +
-                   "parse-plugins.xml file");
+          extensionsIDs.append("]");
+          LOG.info("The parsing plugins: " + extensionsIDs.toString()
+              + " are enabled via the plugin.includes system "
+              + "property, and all claim to support the content type "
+              + contentType + ", but they are not mapped to it  in the "
+              + "parse-plugins.xml file");
         }
       } else if (LOG.isDebugEnabled()) {
-        LOG.debug("ParserFactory:No parse plugins mapped or enabled for " +
-                  "contentType " + contentType);
+        LOG.debug("ParserFactory:No parse plugins mapped or enabled for "
+            + "contentType " + contentType);
       }
     }
-    
+
     return (extList.size() > 0) ? extList : null;
   }
-  
+
   private String escapeContentType(String contentType) {
-  	// Escapes contentType in order to use as a regex 
-  	// (and keep backwards compatibility).
-  	// This enables to accept multiple types for a single parser. 
-  	return contentType.replace("+", "\\+").replace(".", "\\.");
-	}
+    // Escapes contentType in order to use as a regex
+    // (and keep backwards compatibility).
+    // This enables to accept multiple types for a single parser.
+    return contentType.replace("+", "\\+").replace(".", "\\.");
+  }
 
   private boolean match(Extension extension, String id, String type) {
-    return ((id.equals(extension.getId())) &&
-            (extension.getAttribute("contentType").equals("*") || 
-             type.matches(escapeContentType(extension.getAttribute("contentType"))) ||
-             type.equals(DEFAULT_PLUGIN)));
+    return ((id.equals(extension.getId())) && (extension.getAttribute(
+        "contentType").equals("*")
+        || type
+            .matches(escapeContentType(extension.getAttribute("contentType"))) || type
+          .equals(DEFAULT_PLUGIN)));
   }
-  
+
   /** Get an extension from its id and supported content-type. */
   private Extension getExtension(Extension[] list, String id, String type) {
-    for (int i=0; i<list.length; i++) {
+    for (int i = 0; i < list.length; i++) {
       if (match(list[i], id, type)) {
         return list[i];
       }
     }
     return null;
   }
-    
+
   private Extension getExtension(Extension[] list, String id) {
-    for (int i=0; i<list.length; i++) {
+    for (int i = 0; i < list.length; i++) {
       if (id.equals(list[i].getId())) {
         return list[i];
       }
     }
     return null;
   }
-  
+
   private Extension getExtensionFromAlias(Extension[] list, String id) {
     return getExtension(list, parsePluginList.getAliases().get(id));
   }
Index: src/java/org/apache/nutch/parse/Outlink.java
===================================================================
--- src/java/org/apache/nutch/parse/Outlink.java	(revision 1188252)
+++ src/java/org/apache/nutch/parse/Outlink.java	(working copy)
@@ -28,11 +28,13 @@
   private String toUrl;
   private String anchor;
 
-  public Outlink() {}
+  public Outlink() {
+  }
 
   public Outlink(String toUrl, String anchor) throws MalformedURLException {
     this.toUrl = toUrl;
-    if (anchor == null) anchor = "";
+    if (anchor == null)
+      anchor = "";
     this.anchor = anchor;
   }
 
@@ -43,8 +45,8 @@
 
   /** Skips over one Outlink in the input. */
   public static void skip(DataInput in) throws IOException {
-    Text.skip(in);                                // skip toUrl
-    Text.skip(in);                                // skip anchor
+    Text.skip(in); // skip toUrl
+    Text.skip(in); // skip anchor
   }
 
   public void write(DataOutput out) throws IOException {
@@ -58,21 +60,25 @@
     return outlink;
   }
 
-  public String getToUrl() { return toUrl; }
-  public String getAnchor() { return anchor; }
+  public String getToUrl() {
+    return toUrl;
+  }
 
+  public String getAnchor() {
+    return anchor;
+  }
 
   public boolean equals(Object o) {
     if (!(o instanceof Outlink))
       return false;
-    Outlink other = (Outlink)o;
-    return
-      this.toUrl.equals(other.toUrl) &&
-      this.anchor.equals(other.anchor);
+    Outlink other = (Outlink) o;
+    return this.toUrl.equals(other.toUrl) && this.anchor.equals(other.anchor);
   }
 
   public String toString() {
-    return "toUrl: " + toUrl + " anchor: " + anchor;  // removed "\n". toString, not printLine... WD.
+    return "toUrl: " + toUrl + " anchor: " + anchor; // removed "\n".
+    // toString, not
+    // printLine... WD.
   }
 
 }
Index: src/java/org/apache/nutch/parse/ParsePluginList.java
===================================================================
--- src/java/org/apache/nutch/parse/ParsePluginList.java	(revision 1188252)
+++ src/java/org/apache/nutch/parse/ParsePluginList.java	(working copy)
@@ -22,25 +22,23 @@
 import java.util.List;
 import java.util.Map;
 
-
 /**
  * This class represents a natural ordering for which parsing plugin should get
  * called for a particular mimeType. It provides methods to store the
  * parse-plugins.xml data, and methods to retreive the name of the appropriate
  * parsing plugin for a contentType.
- *
+ * 
  * @author mattmann
  * @version 1.0
  */
 class ParsePluginList {
-  
+
   /* a map to link mimeType to an ordered list of parsing plugins */
   private Map<String, List<String>> fMimeTypeToPluginMap = null;
-  
+
   /* A list of aliases */
   private Map<String, String> aliases = null;
-  
-  
+
   /**
    * Constructs a new ParsePluginList
    */
@@ -48,7 +46,7 @@
     fMimeTypeToPluginMap = new HashMap<String, List<String>>();
     aliases = new HashMap<String, String>();
   }
-  
+
   List<String> getPluginList(String mimeType) {
     return fMimeTypeToPluginMap.get(mimeType);
   }
@@ -56,18 +54,18 @@
   void setAliases(Map<String, String> aliases) {
     this.aliases = aliases;
   }
-  
+
   Map<String, String> getAliases() {
     return aliases;
   }
-  
+
   void setPluginList(String mimeType, List<String> l) {
     fMimeTypeToPluginMap.put(mimeType, l);
   }
-  
+
   List<String> getSupportedMimeTypes() {
-    return Arrays.asList(fMimeTypeToPluginMap.keySet().toArray(
-            new String[] {}));
+    return Arrays
+        .asList(fMimeTypeToPluginMap.keySet().toArray(new String[] {}));
   }
-  
+
 }
Index: src/java/org/apache/nutch/parse/HtmlParseFilters.java
===================================================================
--- src/java/org/apache/nutch/parse/HtmlParseFilters.java	(revision 1188252)
+++ src/java/org/apache/nutch/parse/HtmlParseFilters.java	(working copy)
@@ -27,76 +27,81 @@
 
 import org.w3c.dom.DocumentFragment;
 
-/** Creates and caches {@link HtmlParseFilter} implementing plugins.*/
+/** Creates and caches {@link HtmlParseFilter} implementing plugins. */
 public class HtmlParseFilters {
 
   private HtmlParseFilter[] htmlParseFilters;
-  
+
   public static final String HTMLPARSEFILTER_ORDER = "htmlparsefilter.order";
 
   public HtmlParseFilters(Configuration conf) {
-        String order = conf.get(HTMLPARSEFILTER_ORDER);
-        ObjectCache objectCache = ObjectCache.get(conf);
-        this.htmlParseFilters = (HtmlParseFilter[]) objectCache.getObject(HtmlParseFilter.class.getName());
-        if (htmlParseFilters == null) {
-          /*
-           * If ordered filters are required, prepare array of filters based on
-           * property
-           */
-          String[] orderedFilters = null;
-          if (order != null && !order.trim().equals("")) {
-            orderedFilters = order.split("\\s+");
+    String order = conf.get(HTMLPARSEFILTER_ORDER);
+    ObjectCache objectCache = ObjectCache.get(conf);
+    this.htmlParseFilters = (HtmlParseFilter[]) objectCache
+        .getObject(HtmlParseFilter.class.getName());
+    if (htmlParseFilters == null) {
+      /*
+       * If ordered filters are required, prepare array of filters based on
+       * property
+       */
+      String[] orderedFilters = null;
+      if (order != null && !order.trim().equals("")) {
+        orderedFilters = order.split("\\s+");
+      }
+      HashMap<String, HtmlParseFilter> filterMap = new HashMap<String, HtmlParseFilter>();
+      try {
+        ExtensionPoint point = PluginRepository.get(conf).getExtensionPoint(
+            HtmlParseFilter.X_POINT_ID);
+        if (point == null)
+          throw new RuntimeException(HtmlParseFilter.X_POINT_ID + " not found.");
+        Extension[] extensions = point.getExtensions();
+        for (int i = 0; i < extensions.length; i++) {
+          Extension extension = extensions[i];
+          HtmlParseFilter parseFilter = (HtmlParseFilter) extension
+              .getExtensionInstance();
+          if (!filterMap.containsKey(parseFilter.getClass().getName())) {
+            filterMap.put(parseFilter.getClass().getName(), parseFilter);
           }
-            HashMap<String, HtmlParseFilter> filterMap =
-              new HashMap<String, HtmlParseFilter>();
-            try {
-                ExtensionPoint point = PluginRepository.get(conf).getExtensionPoint(HtmlParseFilter.X_POINT_ID);
-                if (point == null)
-                    throw new RuntimeException(HtmlParseFilter.X_POINT_ID + " not found.");
-                Extension[] extensions = point.getExtensions();
-                for (int i = 0; i < extensions.length; i++) {
-                    Extension extension = extensions[i];
-                    HtmlParseFilter parseFilter = (HtmlParseFilter) extension.getExtensionInstance();
-                    if (!filterMap.containsKey(parseFilter.getClass().getName())) {
-                        filterMap.put(parseFilter.getClass().getName(), parseFilter);
-                    }
-                }
-                HtmlParseFilter[] htmlParseFilters = filterMap.values().toArray(new HtmlParseFilter[filterMap.size()]);
-                /*
-                 * If no ordered filters required, just get the filters in an
-                 * indeterminate order
-                 */
-                if (orderedFilters == null) {
-                  objectCache.setObject(HtmlParseFilter.class.getName(), htmlParseFilters);
-                }
-                /* Otherwise run the filters in the required order */
-                else {
-                  ArrayList<HtmlParseFilter> filters = new ArrayList<HtmlParseFilter>();
-                  for (int i = 0; i < orderedFilters.length; i++) {
-                    HtmlParseFilter filter = filterMap
-                        .get(orderedFilters[i]);
-                    if (filter != null) {
-                      filters.add(filter);
-                    }
-                  }
-                  objectCache.setObject(HtmlParseFilter.class.getName(), filters
-                      .toArray(new HtmlParseFilter[filters.size()]));
-                }
-            } catch (PluginRuntimeException e) {
-                throw new RuntimeException(e);
+        }
+        HtmlParseFilter[] htmlParseFilters = filterMap.values().toArray(
+            new HtmlParseFilter[filterMap.size()]);
+        /*
+         * If no ordered filters required, just get the filters in an
+         * indeterminate order
+         */
+        if (orderedFilters == null) {
+          objectCache.setObject(HtmlParseFilter.class.getName(),
+              htmlParseFilters);
+        }
+        /* Otherwise run the filters in the required order */
+        else {
+          ArrayList<HtmlParseFilter> filters = new ArrayList<HtmlParseFilter>();
+          for (int i = 0; i < orderedFilters.length; i++) {
+            HtmlParseFilter filter = filterMap.get(orderedFilters[i]);
+            if (filter != null) {
+              filters.add(filter);
             }
-            this.htmlParseFilters = (HtmlParseFilter[]) objectCache.getObject(HtmlParseFilter.class.getName());
+          }
+          objectCache.setObject(HtmlParseFilter.class.getName(),
+              filters.toArray(new HtmlParseFilter[filters.size()]));
         }
-    }                  
+      } catch (PluginRuntimeException e) {
+        throw new RuntimeException(e);
+      }
+      this.htmlParseFilters = (HtmlParseFilter[]) objectCache
+          .getObject(HtmlParseFilter.class.getName());
+    }
+  }
 
   /** Run all defined filters. */
-  public ParseResult filter(Content content, ParseResult parseResult, HTMLMetaTags metaTags, DocumentFragment doc) {
+  public ParseResult filter(Content content, ParseResult parseResult,
+      HTMLMetaTags metaTags, DocumentFragment doc) {
 
     // loop on each filter
-    for (int i = 0 ; i < this.htmlParseFilters.length; i++) {
+    for (int i = 0; i < this.htmlParseFilters.length; i++) {
       // call filter interface
-      parseResult =
-        htmlParseFilters[i].filter(content, parseResult, metaTags, doc);
+      parseResult = htmlParseFilters[i].filter(content, parseResult, metaTags,
+          doc);
 
       // any failure on parse obj, return
       if (!parseResult.isSuccess()) {
Index: src/java/org/apache/nutch/parse/HTMLMetaTags.java
===================================================================
--- src/java/org/apache/nutch/parse/HTMLMetaTags.java	(revision 1188252)
+++ src/java/org/apache/nutch/parse/HTMLMetaTags.java	(working copy)
@@ -22,8 +22,8 @@
 import java.util.Properties;
 
 /**
- * This class holds the information about HTML "meta" tags extracted from 
- * a page. Some special tags have convenience methods for easy checking.
+ * This class holds the information about HTML "meta" tags extracted from a
+ * page. Some special tags have convenience methods for easy checking.
  */
 public class HTMLMetaTags {
   private boolean noIndex = false;
@@ -154,8 +154,8 @@
   }
 
   /**
-   * A convenience method. Returns the current value of <code>refreshTime</code>.
-   * The value may be invalid if {@link #getRefresh()}returns
+   * A convenience method. Returns the current value of <code>refreshTime</code>
+   * . The value may be invalid if {@link #getRefresh()}returns
    * <code>false</code>.
    */
   public int getRefreshTime() {
@@ -177,26 +177,22 @@
   public Properties getHttpEquivTags() {
     return httpEquivTags;
   }
-  
+
   public String toString() {
     StringBuffer sb = new StringBuffer();
-    sb.append("base=" + baseHref
-            + ", noCache=" + noCache
-            + ", noFollow=" + noFollow
-            + ", noIndex=" + noIndex
-            + ", refresh=" + refresh
-            + ", refreshHref=" + refreshHref + "\n"
-            );
+    sb.append("base=" + baseHref + ", noCache=" + noCache + ", noFollow="
+        + noFollow + ", noIndex=" + noIndex + ", refresh=" + refresh
+        + ", refreshHref=" + refreshHref + "\n");
     sb.append(" * general tags:\n");
     Iterator it = generalTags.keySet().iterator();
     while (it.hasNext()) {
-      String key = (String)it.next();
+      String key = (String) it.next();
       sb.append("   - " + key + "\t=\t" + generalTags.get(key) + "\n");
     }
     sb.append(" * http-equiv tags:\n");
     it = httpEquivTags.keySet().iterator();
     while (it.hasNext()) {
-      String key = (String)it.next();
+      String key = (String) it.next();
       sb.append("   - " + key + "\t=\t" + httpEquivTags.get(key) + "\n");
     }
     return sb.toString();
Index: src/java/org/apache/nutch/parse/ParseCallable.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseCallable.java	(revision 1188252)
+++ src/java/org/apache/nutch/parse/ParseCallable.java	(working copy)
@@ -24,7 +24,7 @@
 class ParseCallable implements Callable<ParseResult> {
   private Parser p;
   private Content content;
-  
+
   public ParseCallable(Parser p, Content content) {
     this.p = p;
     this.content = content;
@@ -33,5 +33,5 @@
   @Override
   public ParseResult call() throws Exception {
     return p.getParse(content);
-  }    
+  }
 }
\ No newline at end of file
Index: src/java/org/apache/nutch/parse/Parser.java
===================================================================
--- src/java/org/apache/nutch/parse/Parser.java	(revision 1188252)
+++ src/java/org/apache/nutch/parse/Parser.java	(working copy)
@@ -24,33 +24,35 @@
 import org.apache.nutch.plugin.Pluggable;
 import org.apache.nutch.protocol.Content;
 
-/** A parser for content generated by a {@link org.apache.nutch.protocol.Protocol}
- * implementation.  This interface is implemented by extensions.  Nutch's core
- * contains no page parsing code.
+/**
+ * A parser for content generated by a
+ * {@link org.apache.nutch.protocol.Protocol} implementation. This interface is
+ * implemented by extensions. Nutch's core contains no page parsing code.
  */
 public interface Parser extends Pluggable, Configurable {
   /** The name of the extension point. */
   public final static String X_POINT_ID = Parser.class.getName();
 
-  /** 
+  /**
    * <p>
-   * This method parses the given content and returns a map of
-   * &lt;key, parse&gt; pairs. {@link Parse} instances will be persisted 
-   * under the given key.
+   * This method parses the given content and returns a map of &lt;key,
+   * parse&gt; pairs. {@link Parse} instances will be persisted under the given
+   * key.
    * </p>
    * <p>
-   * Note: Meta-redirects should be followed only when they are coming from
-   * the original URL. That is: <br> 
+   * Note: Meta-redirects should be followed only when they are coming from the
+   * original URL. That is: <br>
    * Assume fetcher is in parsing mode and is currently processing
-   * foo.bar.com/redirect.html. If this url contains a meta redirect
-   * to another url, fetcher should only follow the redirect if the map
-   * contains an entry of the form &lt;"foo.bar.com/redirect.html", 
-   * {@link Parse} with a {@link ParseStatus} indicating the redirect&gt;.
+   * foo.bar.com/redirect.html. If this url contains a meta redirect to another
+   * url, fetcher should only follow the redirect if the map contains an entry
+   * of the form &lt;"foo.bar.com/redirect.html", {@link Parse} with a
+   * {@link ParseStatus} indicating the redirect&gt;.
    * </p>
    * 
-   * @param c Content to be parsed
+   * @param c
+   *          Content to be parsed
    * @return a map containing &lt;key, parse&gt; pairs
    * @since NUTCH-443
    */
-   ParseResult getParse(Content c);
+  ParseResult getParse(Content c);
 }
Index: src/java/org/apache/nutch/parse/ParseImpl.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseImpl.java	(revision 1188252)
+++ src/java/org/apache/nutch/parse/ParseImpl.java	(working copy)
@@ -20,8 +20,9 @@
 import java.io.*;
 import org.apache.hadoop.io.*;
 
-
-/** The result of parsing a page's raw content.
+/**
+ * The result of parsing a page's raw content.
+ * 
  * @see Parser#getParse(Content)
  */
 public class ParseImpl implements Parse, Writable {
@@ -29,7 +30,8 @@
   private ParseData data;
   private boolean isCanonical;
 
-  public ParseImpl() {}
+  public ParseImpl() {
+  }
 
   public ParseImpl(Parse parse) {
     this(new ParseText(parse.getText()), parse.getData(), true);
@@ -38,7 +40,7 @@
   public ParseImpl(String text, ParseData data) {
     this(new ParseText(text), data, true);
   }
-  
+
   public ParseImpl(ParseText text, ParseData data) {
     this(text, data, true);
   }
@@ -49,12 +51,18 @@
     this.isCanonical = isCanonical;
   }
 
-  public String getText() { return text.getText(); }
+  public String getText() {
+    return text.getText();
+  }
 
-  public ParseData getData() { return data; }
+  public ParseData getData() {
+    return data;
+  }
 
-  public boolean isCanonical() { return isCanonical; }
-  
+  public boolean isCanonical() {
+    return isCanonical;
+  }
+
   public final void write(DataOutput out) throws IOException {
     out.writeBoolean(isCanonical);
     text.write(out);
Index: src/java/org/apache/nutch/parse/ParseSegment.java
===================================================================
--- src/java/org/apache/nutch/parse/ParseSegment.java	(revision 1188252)
+++ src/java/org/apache/nutch/parse/ParseSegment.java	(working copy)
@@ -44,13 +44,13 @@
     Reducer<Text, Writable, Text, Writable> {
 
   public static final Logger LOG = LoggerFactory.getLogger(ParseSegment.class);
-  
+
   private ScoringFilters scfilters;
-  
+
   public ParseSegment() {
     this(null);
   }
-  
+
   public ParseSegment(Configuration conf) {
     super(conf);
   }
@@ -60,21 +60,22 @@
     this.scfilters = new ScoringFilters(job);
   }
 
-  public void close() {}
-  
+  public void close() {
+  }
+
   private Text newKey = new Text();
 
   public void map(WritableComparable key, Content content,
-                  OutputCollector<Text, ParseImpl> output, Reporter reporter)
-    throws IOException {
+      OutputCollector<Text, ParseImpl> output, Reporter reporter)
+      throws IOException {
     // convert on the fly from old UTF8 keys
     if (key instanceof UTF8) {
       newKey.set(key.toString());
       key = newKey;
     }
-    
-    int status =
-      Integer.parseInt(content.getMetadata().get(Nutch.FETCH_STATUS_KEY));
+
+    int status = Integer.parseInt(content.getMetadata().get(
+        Nutch.FETCH_STATUS_KEY));
     if (status != CrawlDatum.STATUS_FETCH_SUCCESS) {
       // content not fetched successfully, skip document
       LOG.debug("Skipping " + key + " as content is not fetched successfully");
@@ -85,7 +86,8 @@
     try {
       parseResult = new ParseUtil(getConf()).parse(content);
     } catch (Exception e) {
-      LOG.warn("Error parsing: " + key + ": " + StringUtils.stringifyException(e));
+      LOG.warn("Error parsing: " + key + ": "
+          + StringUtils.stringifyException(e));
       return;
     }
 
@@ -95,7 +97,8 @@
       ParseStatus parseStatus = parse.getData().getStatus();
 
       LOG.info("Parsing: " + url);
-      reporter.incrCounter("ParserStatus", ParseStatus.majorCodes[parseStatus.getMajorCode()], 1);
+      reporter.incrCounter("ParserStatus",
+          ParseStatus.majorCodes[parseStatus.getMajorCode()], 1);
 
       if (!parseStatus.isSuccess()) {
         LOG.warn("Error parsing: " + key + ": " + parseStatus);
@@ -103,32 +106,34 @@
       }
 
       // pass segment name to parse data
-      parse.getData().getContentMeta().set(Nutch.SEGMENT_NAME_KEY, 
-                                           getConf().get(Nutch.SEGMENT_NAME_KEY));
+      parse.getData().getContentMeta()
+          .set(Nutch.SEGMENT_NAME_KEY, getConf().get(Nutch.SEGMENT_NAME_KEY));
 
       // compute the new signature
-      byte[] signature = 
-        SignatureFactory.getSignature(getConf()).calculate(content, parse); 
-      parse.getData().getContentMeta().set(Nutch.SIGNATURE_KEY, 
-          StringUtil.toHexString(signature));
-      
+      byte[] signature = SignatureFactory.getSignature(getConf()).calculate(
+          content, parse);
+      parse.getData().getContentMeta()
+          .set(Nutch.SIGNATURE_KEY, StringUtil.toHexString(signature));
+
       try {
         scfilters.passScoreAfterParsing(url, content, parse);
       } catch (ScoringFilterException e) {
         if (LOG.isWarnEnabled()) {
           e.printStackTrace(LogUtil.getWarnStream(LOG));
-          LOG.warn("Error passing score: "+ url +": "+e.getMessage());
+          LOG.warn("Error passing score: " + url + ": " + e.getMessage());
         }
       }
-      output.collect(url, new ParseImpl(new ParseText(parse.getText()), 
-                                        parse.getData(), parse.isCanonical()));
+      output.collect(
+          url,
+          new ParseImpl(new ParseText(parse.getText()), parse.getData(), parse
+              .isCanonical()));
     }
   }
 
   public void reduce(Text key, Iterator<Writable> values,
-                     OutputCollector<Text, Writable> output, Reporter reporter)
-    throws IOException {
-    output.collect(key, (Writable)values.next()); // collect first value
+      OutputCollector<Text, Writable> output, Reporter reporter)
+      throws IOException {
+    output.collect(key, (Writable) values.next()); // collect first value
   }
 
   public void parse(Path segment) throws IOException {
@@ -148,7 +153,7 @@
     job.setInputFormat(SequenceFileInputFormat.class);
     job.setMapperClass(ParseSegment.class);
     job.setReducerClass(ParseSegment.class);
-    
+
     FileOutputFormat.setOutputPath(job, segment);
     job.setOutputFormat(ParseOutputFormat.class);
     job.setOutputKeyClass(Text.class);
@@ -156,15 +161,16 @@
 
     JobClient.runJob(job);
     long end = System.currentTimeMillis();
-    LOG.info("ParseSegment: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    LOG.info("ParseSegment: finished at " + sdf.format(end) + ", elapsed: "
+        + TimingUtil.elapsedTime(start, end));
   }
 
-
   public static void main(String[] args) throws Exception {
-	int res = ToolRunner.run(NutchConfiguration.create(), new ParseSegment(), args);
-	System.exit(res);
+    int res = ToolRunner.run(NutchConfiguration.create(), new ParseSegment(),
+        args);
+    System.exit(res);
   }
-	  
+
   public int run(String[] args) throws Exception {
     Path segment;
 
@@ -173,7 +179,7 @@
     if (args.length == 0) {
       System.err.println(usage);
       System.exit(-1);
-    }      
+    }
     segment = new Path(args[0]);
     parse(segment);
     return 0;
Index: src/java/org/apache/nutch/util/SuffixStringMatcher.java
===================================================================
--- src/java/org/apache/nutch/util/SuffixStringMatcher.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/SuffixStringMatcher.java	(working copy)
@@ -21,8 +21,8 @@
 import java.util.Iterator;
 
 /**
- * A class for efficiently matching <code>String</code>s against a set
- * of suffixes.  Zero-length <code>Strings</code> are ignored.
+ * A class for efficiently matching <code>String</code>s against a set of
+ * suffixes. Zero-length <code>Strings</code> are ignored.
  */
 public class SuffixStringMatcher extends TrieStringMatcher {
 
@@ -32,7 +32,7 @@
    */
   public SuffixStringMatcher(String[] suffixes) {
     super();
-    for (int i= 0; i < suffixes.length; i++)
+    for (int i = 0; i < suffixes.length; i++)
       addPatternBackward(suffixes[i]);
   }
 
@@ -43,20 +43,20 @@
    */
   public SuffixStringMatcher(Collection suffixes) {
     super();
-    Iterator iter= suffixes.iterator();
+    Iterator iter = suffixes.iterator();
     while (iter.hasNext())
-      addPatternBackward((String)iter.next());
+      addPatternBackward((String) iter.next());
   }
 
   /**
-   * Returns true if the given <code>String</code> is matched by a
-   * suffix in the trie
+   * Returns true if the given <code>String</code> is matched by a suffix in the
+   * trie
    */
   public boolean matches(String input) {
-    TrieNode node= root;
-    for (int i= input.length() - 1; i >= 0; i--) {
-      node= node.getChild(input.charAt(i));
-      if (node == null) 
+    TrieNode node = root;
+    for (int i = input.length() - 1; i >= 0; i--) {
+      node = node.getChild(input.charAt(i));
+      if (node == null)
         return false;
       if (node.isTerminal())
         return true;
@@ -64,16 +64,15 @@
     return false;
   }
 
-
   /**
    * Returns the shortest suffix of <code>input<code> that is matched,
    * or <code>null<code> if no match exists.
    */
   public String shortestMatch(String input) {
-    TrieNode node= root;
-    for (int i= input.length() - 1; i >= 0; i--) {
-      node= node.getChild(input.charAt(i));
-      if (node == null) 
+    TrieNode node = root;
+    for (int i = input.length() - 1; i >= 0; i--) {
+      node = node.getChild(input.charAt(i));
+      if (node == null)
         return null;
       if (node.isTerminal())
         return input.substring(i);
@@ -86,29 +85,26 @@
    * or <code>null<code> if no match exists.
    */
   public String longestMatch(String input) {
-    TrieNode node= root;
-    String result= null;
-    for (int i= input.length() - 1; i >= 0; i--) {
-      node= node.getChild(input.charAt(i));
-      if (node == null) 
+    TrieNode node = root;
+    String result = null;
+    for (int i = input.length() - 1; i >= 0; i--) {
+      node = node.getChild(input.charAt(i));
+      if (node == null)
         break;
       if (node.isTerminal())
-        result= input.substring(i);
+        result = input.substring(i);
     }
     return result;
   }
 
   public static final void main(String[] argv) {
-    SuffixStringMatcher matcher= 
-      new SuffixStringMatcher( 
-        new String[] 
-        {"a", "abcd", "bcd", "bcdefg", "defg", "aac", "baz", "foo", "foobar"} );
+    SuffixStringMatcher matcher = new SuffixStringMatcher(new String[] { "a",
+        "abcd", "bcd", "bcdefg", "defg", "aac", "baz", "foo", "foobar" });
 
-    String[] tests= {"a", "ac", "abcd", "abcdefg", "apple", "aa", "aac",
-                    "aaccca", "abaz", "baz", "bazooka", "fo", "foobar",
-                    "kite", };
+    String[] tests = { "a", "ac", "abcd", "abcdefg", "apple", "aa", "aac",
+        "aaccca", "abaz", "baz", "bazooka", "fo", "foobar", "kite", };
 
-    for (int i= 0; i < tests.length; i++) {
+    for (int i = 0; i < tests.length; i++) {
       System.out.println("testing: " + tests[i]);
       System.out.println("   matches: " + matcher.matches(tests[i]));
       System.out.println("  shortest: " + matcher.shortestMatch(tests[i]));
Index: src/java/org/apache/nutch/util/URLUtil.java
===================================================================
--- src/java/org/apache/nutch/util/URLUtil.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/URLUtil.java	(working copy)
@@ -26,37 +26,41 @@
 
 /** Utility class for URL analysis */
 public class URLUtil {
-  
+
   /**
-   * Resolve relative URL-s and fix a few java.net.URL errors
-   * in handling of URLs with embedded params and pure query
-   * targets.
-   * @param base base url
-   * @param target target url (may be relative)
+   * Resolve relative URL-s and fix a few java.net.URL errors in handling of
+   * URLs with embedded params and pure query targets.
+   * 
+   * @param base
+   *          base url
+   * @param target
+   *          target url (may be relative)
    * @return resolved absolute url.
    * @throws MalformedURLException
    */
   public static URL resolveURL(URL base, String target)
-          throws MalformedURLException {
+      throws MalformedURLException {
     target = target.trim();
 
-    /* this is probably not needed anymore - see NUTCH-797.
-    // handle params that are embedded into the base url - move them to target
-    // so URL class constructs the new url class properly
-    if (base.toString().indexOf(';') > 0)
-      return fixEmbeddedParams(base, target);
-    */
-    
+    /*
+     * this is probably not needed anymore - see NUTCH-797. // handle params
+     * that are embedded into the base url - move them to target // so URL class
+     * constructs the new url class properly if (base.toString().indexOf(';') >
+     * 0) return fixEmbeddedParams(base, target);
+     */
+
     // handle the case that there is a target that is a pure query,
     // for example
     // http://careers3.accenture.com/Careers/ASPX/Search.aspx?co=0&sk=0
     // It has urls in the page of the form href="?co=0&sk=0&pg=1", and by
     // default
     // URL constructs the base+target combo as
-    // http://careers3.accenture.com/Careers/ASPX/?co=0&sk=0&pg=1, incorrectly
+    // http://careers3.accenture.com/Careers/ASPX/?co=0&sk=0&pg=1,
+    // incorrectly
     // dropping the Search.aspx target
     //
-    // Browsers handle these just fine, they must have an exception similar to
+    // Browsers handle these just fine, they must have an exception similar
+    // to
     // this
     if (target.startsWith("?")) {
       return fixPureQueryTargets(base, target);
@@ -66,9 +70,10 @@
   }
 
   /** Handle the case in RFC3986 section 5.4.1 example 7, and similar. */
-   static URL fixPureQueryTargets(URL base, String target)
-          throws MalformedURLException {
-    if (!target.startsWith("?")) return new URL(base, target);
+  static URL fixPureQueryTargets(URL base, String target)
+      throws MalformedURLException {
+    if (!target.startsWith("?"))
+      return new URL(base, target);
 
     String basePath = base.getPath();
     String baseRightMost = "";
@@ -77,7 +82,8 @@
       baseRightMost = basePath.substring(baseRightMostIdx + 1);
     }
 
-    if (target.startsWith("?")) target = baseRightMost + target;
+    if (target.startsWith("?"))
+      target = baseRightMost + target;
 
     return new URL(base, target);
   }
@@ -103,7 +109,7 @@
    *           If the url is not a well formed URL.
    */
   private static URL fixEmbeddedParams(URL base, String target)
-          throws MalformedURLException {
+      throws MalformedURLException {
 
     // the target contains params information or the base doesn't then no
     // conversion necessary, return regular URL
@@ -116,12 +122,14 @@
     int startParams = baseURL.indexOf(';');
     String params = baseURL.substring(startParams);
 
-    // if the target has a query string then put the params information after
-    // any path but before the query string, otherwise just append to the path
+    // if the target has a query string then put the params information
+    // after
+    // any path but before the query string, otherwise just append to the
+    // path
     int startQS = target.indexOf('?');
     if (startQS >= 0) {
       target = target.substring(0, startQS) + params
-              + target.substring(startQS);
+          + target.substring(startQS);
     } else {
       target += params;
     }
@@ -129,125 +137,142 @@
     return new URL(base, target);
   }
 
-  private static Pattern IP_PATTERN = Pattern.compile("(\\d{1,3}\\.){3}(\\d{1,3})");
+  private static Pattern IP_PATTERN = Pattern
+      .compile("(\\d{1,3}\\.){3}(\\d{1,3})");
 
-  /** Returns the domain name of the url. The domain name of a url is
-   *  the substring of the url's hostname, w/o subdomain names. As an
-   *  example <br><code>
+  /**
+   * Returns the domain name of the url. The domain name of a url is the
+   * substring of the url's hostname, w/o subdomain names. As an example <br>
+   * <code>
    *  getDomainName(conf, new URL(http://lucene.apache.org/))
    *  </code><br>
-   *  will return <br><code> apache.org</code>
-   *   */
+   * will return <br>
+   * <code> apache.org</code>
+   * */
   public static String getDomainName(URL url) {
     DomainSuffixes tlds = DomainSuffixes.getInstance();
     String host = url.getHost();
-    //it seems that java returns hostnames ending with .
-    if(host.endsWith("."))
+    // it seems that java returns hostnames ending with .
+    if (host.endsWith("."))
       host = host.substring(0, host.length() - 1);
-    if(IP_PATTERN.matcher(host).matches())
+    if (IP_PATTERN.matcher(host).matches())
       return host;
-    
+
     int index = 0;
     String candidate = host;
-    for(;index >= 0;) {
+    for (; index >= 0;) {
       index = candidate.indexOf('.');
-      String subCandidate = candidate.substring(index+1); 
-      if(tlds.isDomainSuffix(subCandidate)) {
-        return candidate; 
+      String subCandidate = candidate.substring(index + 1);
+      if (tlds.isDomainSuffix(subCandidate)) {
+        return candidate;
       }
       candidate = subCandidate;
     }
     return candidate;
   }
 
-  /** Returns the domain name of the url. The domain name of a url is
-   *  the substring of the url's hostname, w/o subdomain names. As an
-   *  example <br><code>
+  /**
+   * Returns the domain name of the url. The domain name of a url is the
+   * substring of the url's hostname, w/o subdomain names. As an example <br>
+   * <code>
    *  getDomainName(conf, new http://lucene.apache.org/)
    *  </code><br>
-   *  will return <br><code> apache.org</code>
+   * will return <br>
+   * <code> apache.org</code>
+   * 
    * @throws MalformedURLException
    */
   public static String getDomainName(String url) throws MalformedURLException {
     return getDomainName(new URL(url));
   }
 
-  /** Returns whether the given urls have the same domain name.
-   * As an example, <br>
+  /**
+   * Returns whether the given urls have the same domain name. As an example, <br>
    * <code> isSameDomain(new URL("http://lucene.apache.org")
    * , new URL("http://people.apache.org/"))
    * <br> will return true. </code>
-   *
+   * 
    * @return true if the domain names are equal
    */
   public static boolean isSameDomainName(URL url1, URL url2) {
     return getDomainName(url1).equalsIgnoreCase(getDomainName(url2));
   }
 
-  /**Returns whether the given urls have the same domain name.
-  * As an example, <br>
-  * <code> isSameDomain("http://lucene.apache.org"
-  * ,"http://people.apache.org/")
-  * <br> will return true. </code>
-  * @return true if the domain names are equal
-  * @throws MalformedURLException
-  */
+  /**
+   * Returns whether the given urls have the same domain name. As an example, <br>
+   * <code> isSameDomain("http://lucene.apache.org"
+   * ,"http://people.apache.org/")
+   * <br> will return true. </code>
+   * 
+   * @return true if the domain names are equal
+   * @throws MalformedURLException
+   */
   public static boolean isSameDomainName(String url1, String url2)
-    throws MalformedURLException {
+      throws MalformedURLException {
     return isSameDomainName(new URL(url1), new URL(url2));
   }
 
-  /** Returns the {@link DomainSuffix} corresponding to the
-   * last public part of the hostname
+  /**
+   * Returns the {@link DomainSuffix} corresponding to the last public part of
+   * the hostname
    */
   public static DomainSuffix getDomainSuffix(URL url) {
     DomainSuffixes tlds = DomainSuffixes.getInstance();
     String host = url.getHost();
-    if(IP_PATTERN.matcher(host).matches())
+    if (IP_PATTERN.matcher(host).matches())
       return null;
-    
+
     int index = 0;
     String candidate = host;
-    for(;index >= 0;) {
+    for (; index >= 0;) {
       index = candidate.indexOf('.');
-      String subCandidate = candidate.substring(index+1);
+      String subCandidate = candidate.substring(index + 1);
       DomainSuffix d = tlds.get(subCandidate);
-      if(d != null) {
-        return d; 
+      if (d != null) {
+        return d;
       }
       candidate = subCandidate;
     }
     return null;
   }
 
-  /** Returns the {@link DomainSuffix} corresponding to the
-   * last public part of the hostname
+  /**
+   * Returns the {@link DomainSuffix} corresponding to the last public part of
+   * the hostname
    */
-  public static DomainSuffix getDomainSuffix(String url) throws MalformedURLException {
+  public static DomainSuffix getDomainSuffix(String url)
+      throws MalformedURLException {
     return getDomainSuffix(new URL(url));
   }
 
-  /** Partitions of the hostname of the url by "."  */
+  /** Partitions of the hostname of the url by "." */
   public static String[] getHostSegments(URL url) {
     String host = url.getHost();
-    //return whole hostname, if it is an ipv4
-    //TODO : handle ipv6
-    if(IP_PATTERN.matcher(host).matches())
-      return new String[] {host};
+    // return whole hostname, if it is an ipv4
+    // TODO : handle ipv6
+    if (IP_PATTERN.matcher(host).matches())
+      return new String[] { host };
     return host.split("\\.");
   }
 
-  /** Partitions of the hostname of the url by "."
-   * @throws MalformedURLException */
-  public static String[] getHostSegments(String url) throws MalformedURLException {
-   return getHostSegments(new URL(url));
+  /**
+   * Partitions of the hostname of the url by "."
+   * 
+   * @throws MalformedURLException
+   */
+  public static String[] getHostSegments(String url)
+      throws MalformedURLException {
+    return getHostSegments(new URL(url));
   }
 
   /**
-   * <p>Given two urls, a src and a destination of a redirect, it returns the 
-   * representative url.<p>
+   * <p>
+   * Given two urls, a src and a destination of a redirect, it returns the
+   * representative url.
+   * <p>
    * 
-   * <p>This method implements an extended version of the algorithm used by the
+   * <p>
+   * This method implements an extended version of the algorithm used by the
    * Yahoo! Slurp crawler described here:<br>
    * <a href=
    * "http://help.yahoo.com/l/nz/yahooxtra/search/webcrawler/slurp-11.html"> How
@@ -255,46 +280,63 @@
    * <br>
    * <ol>
    * <li>Choose target url if either url is malformed.</li>
-   * <li>If different domains the keep the destination whether or not the 
+   * <li>If different domains the keep the destination whether or not the
    * redirect is temp or perm</li>
-   * <ul><li>a.com -> b.com*</li></ul>
+   * <ul>
+   * <li>a.com -> b.com*</li>
+   * </ul>
    * <li>If the redirect is permanent and the source is root, keep the source.</li>
-   * <ul><li>*a.com -> a.com?y=1 || *a.com -> a.com/xyz/index.html</li></ul>
-   * <li>If the redirect is permanent and the source is not root and the 
+   * <ul>
+   * <li>*a.com -> a.com?y=1 || *a.com -> a.com/xyz/index.html</li>
+   * </ul>
+   * <li>If the redirect is permanent and the source is not root and the
    * destination is root, keep the destination</li>
-   * <ul><li>a.com/xyz/index.html -> a.com*</li></ul>
+   * <ul>
+   * <li>a.com/xyz/index.html -> a.com*</li>
+   * </ul>
    * <li>If the redirect is permanent and neither the source nor the destination
    * is root, then keep the destination</li>
-   * <ul><li>a.com/xyz/index.html -> a.com/abc/page.html*</li></ul>
+   * <ul>
+   * <li>a.com/xyz/index.html -> a.com/abc/page.html*</li>
+   * </ul>
    * <li>If the redirect is temporary and source is root and destination is not
    * root, then keep the source</li>
-   * <ul><li>*a.com -> a.com/xyz/index.html</li></ul>
+   * <ul>
+   * <li>*a.com -> a.com/xyz/index.html</li>
+   * </ul>
    * <li>If the redirect is temporary and source is not root and destination is
    * root, then keep the destination</li>
-   * <ul><li>a.com/xyz/index.html -> a.com*</li></ul>
+   * <ul>
+   * <li>a.com/xyz/index.html -> a.com*</li>
+   * </ul>
    * <li>If the redirect is temporary and neither the source or the destination
-   * is root, then keep the shortest url.  First check for the shortest host,
-   * and if both are equal then check by path.  Path is first by length then by
-   * the number of / path separators.</li>
+   * is root, then keep the shortest url. First check for the shortest host, and
+   * if both are equal then check by path. Path is first by length then by the
+   * number of / path separators.</li>
    * <ul>
    * <li>a.com/xyz/index.html -> a.com/abc/page.html*</li>
    * <li>*www.a.com/xyz/index.html -> www.news.a.com/xyz/index.html</li>
    * </ul>
    * <li>If the redirect is temporary and both the source and the destination
    * are root, then keep the shortest sub-domain</li>
-   * <ul><li>*www.a.com -> www.news.a.com</li></ul>
+   * <ul>
+   * <li>*www.a.com -> www.news.a.com</li>
+   * </ul>
    * <br>
-   * While not in this logic there is a further piece of representative url 
-   * logic that occurs during indexing and after scoring.  During creation of 
-   * the basic fields before indexing, if a url has a representative url stored
-   * we check both the url and its representative url (which should never be 
-   * the same) against their linkrank scores and the highest scoring one is 
-   * kept as the url and the lower scoring one is held as the orig url inside 
-   * of the index.
+   * While not in this logic there is a further piece of representative url
+   * logic that occurs during indexing and after scoring. During creation of the
+   * basic fields before indexing, if a url has a representative url stored we
+   * check both the url and its representative url (which should never be the
+   * same) against their linkrank scores and the highest scoring one is kept as
+   * the url and the lower scoring one is held as the orig url inside of the
+   * index.
    * 
-   * @param src The source url.
-   * @param dst The destination url.
-   * @param temp Is the redirect a temporary redirect.
+   * @param src
+   *          The source url.
+   * @param dst
+   *          The destination url.
+   * @param temp
+   *          Is the redirect a temporary redirect.
    * 
    * @return String The representative url.
    */
@@ -306,8 +348,7 @@
     try {
       srcUrl = new URL(src);
       dstUrl = new URL(dst);
-    }
-    catch (MalformedURLException e) {
+    } catch (MalformedURLException e) {
       return dst;
     }
 
@@ -325,27 +366,28 @@
 
     // 1) different domain them keep dest, temp or perm
     // a.com -> b.com*
-    //    
+    //
     // 2) permanent and root, keep src
     // *a.com -> a.com?y=1 || *a.com -> a.com/xyz/index.html
-    //      
+    //
     // 3) permanent and not root and dest root, keep dest
     // a.com/xyz/index.html -> a.com*
-    //      
+    //
     // 4) permanent and neither root keep dest
     // a.com/xyz/index.html -> a.com/abc/page.html*
-    //      
+    //
     // 5) temp and root and dest not root keep src
     // *a.com -> a.com/xyz/index.html
-    //  
+    //
     // 7) temp and not root and dest root keep dest
     // a.com/xyz/index.html -> a.com*
-    //  
-    // 8) temp and neither root, keep shortest, if hosts equal by path else by
+    //
+    // 8) temp and neither root, keep shortest, if hosts equal by path else
+    // by
     // hosts. paths are first by length then by number of / separators
     // a.com/xyz/index.html -> a.com/abc/page.html*
     // *www.a.com/xyz/index.html -> www.news.a.com/xyz/index.html
-    //  
+    //
     // 9) temp and both root keep shortest sub domain
     // *www.a.com -> www.news.a.com
 
@@ -357,39 +399,35 @@
 
     // if it is a permanent redirect
     if (!temp) {
-      
+
       // if source is root return source, otherwise destination
       if (srcRoot) {
         return src;
-      }
-      else {
+      } else {
         return dst;
       }
-    }
-    else { // temporary redirect
+    } else { // temporary redirect
 
       // source root and destination not root
       if (srcRoot && !destRoot) {
         return src;
-      }
-      else if (!srcRoot && destRoot) { // destination root and source not
+      } else if (!srcRoot && destRoot) { // destination root and source
+        // not
         return dst;
-      }
-      else if (!srcRoot && !destRoot && (srcHost.equals(dstHost))) {
+      } else if (!srcRoot && !destRoot && (srcHost.equals(dstHost))) {
 
-        // source and destination hosts are the same, check paths, host length
+        // source and destination hosts are the same, check paths, host
+        // length
         int numSrcPaths = srcFile.split("/").length;
         int numDstPaths = dstFile.split("/").length;
         if (numSrcPaths != numDstPaths) {
           return (numDstPaths < numSrcPaths ? dst : src);
-        }
-        else {
+        } else {
           int srcPathLength = srcFile.length();
           int dstPathLength = dstFile.length();
           return (dstPathLength < srcPathLength ? dst : src);
         }
-      }
-      else {
+      } else {
 
         // different host names and both root take the shortest
         int numSrcSubs = srcHost.split("\\.").length;
@@ -403,51 +441,51 @@
    * Returns the lowercased hostname for the url or null if the url is not well
    * formed.
    * 
-   * @param url The url to check.
+   * @param url
+   *          The url to check.
    * @return String The hostname for the url.
    */
   public static String getHost(String url) {
     try {
       return new URL(url).getHost().toLowerCase();
-    }
-    catch (MalformedURLException e) {
+    } catch (MalformedURLException e) {
       return null;
     }
   }
 
   /**
-   * Returns the page for the url.  The page consists of the protocol, host,
-   * and path, but does not include the query string.  The host is lowercased
-   * but the path is not.
+   * Returns the page for the url. The page consists of the protocol, host, and
+   * path, but does not include the query string. The host is lowercased but the
+   * path is not.
    * 
-   * @param url The url to check.
+   * @param url
+   *          The url to check.
    * @return String The page for the url.
    */
   public static String getPage(String url) {
     try {
-      // get the full url, and replace the query string with and empty string
+      // get the full url, and replace the query string with and empty
+      // string
       url = url.toLowerCase();
       String queryStr = new URL(url).getQuery();
       return (queryStr != null) ? url.replace("?" + queryStr, "") : url;
-    }
-    catch (MalformedURLException e) {
+    } catch (MalformedURLException e) {
       return null;
     }
   }
-  
+
   /** For testing */
-  public static void main(String[] args){
-    
-    if(args.length!=1) {
+  public static void main(String[] args) {
+
+    if (args.length != 1) {
       System.err.println("Usage : URLUtil <url>");
-      return ;
+      return;
     }
-    
+
     String url = args[0];
     try {
       System.out.println(URLUtil.getDomainName(new URL(url)));
-    }
-    catch (MalformedURLException ex) {
+    } catch (MalformedURLException ex) {
       ex.printStackTrace();
     }
   }
Index: src/java/org/apache/nutch/util/HadoopFSUtil.java
===================================================================
--- src/java/org/apache/nutch/util/HadoopFSUtil.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/HadoopFSUtil.java	(working copy)
@@ -25,48 +25,48 @@
 
 public class HadoopFSUtil {
 
-    /**
-     * Returns PathFilter that passes all paths through.
-     */
-    public static PathFilter getPassAllFilter() {
-        return new PathFilter() {
-            public boolean accept(Path arg0) {
-                return true;
-            }
-        };
-    }
+  /**
+   * Returns PathFilter that passes all paths through.
+   */
+  public static PathFilter getPassAllFilter() {
+    return new PathFilter() {
+      public boolean accept(Path arg0) {
+        return true;
+      }
+    };
+  }
 
-    /**
-     * Returns PathFilter that passes directories through.
-     */
-    public static PathFilter getPassDirectoriesFilter(final FileSystem fs) {
-        return new PathFilter() {
-            public boolean accept(final Path path) {
-                try {
-                    return fs.getFileStatus(path).isDir();
-                } catch (IOException ioe) {
-                    return false;
-                }
-            }
+  /**
+   * Returns PathFilter that passes directories through.
+   */
+  public static PathFilter getPassDirectoriesFilter(final FileSystem fs) {
+    return new PathFilter() {
+      public boolean accept(final Path path) {
+        try {
+          return fs.getFileStatus(path).isDir();
+        } catch (IOException ioe) {
+          return false;
+        }
+      }
 
-        };
+    };
+  }
+
+  /**
+   * Turns an array of FileStatus into an array of Paths.
+   */
+  public static Path[] getPaths(FileStatus[] stats) {
+    if (stats == null) {
+      return null;
     }
-    
-    /**
-     * Turns an array of FileStatus into an array of Paths.
-     */
-    public static Path[] getPaths(FileStatus[] stats) {
-      if (stats == null) {
-        return null;
-      }
-      if (stats.length == 0) {
-        return new Path[0];
-      }
-      Path[] res = new Path[stats.length];
-      for (int i = 0; i < stats.length; i++) {
-        res[i] = stats[i].getPath();
-      }
-      return res;
+    if (stats.length == 0) {
+      return new Path[0];
     }
+    Path[] res = new Path[stats.length];
+    for (int i = 0; i < stats.length; i++) {
+      res[i] = stats[i].getPath();
+    }
+    return res;
+  }
 
 }
Index: src/java/org/apache/nutch/util/StringUtil.java
===================================================================
--- src/java/org/apache/nutch/util/StringUtil.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/StringUtil.java	(working copy)
@@ -18,42 +18,42 @@
 package org.apache.nutch.util;
 
 /**
- * A collection of String processing utility methods. 
+ * A collection of String processing utility methods.
  */
 public class StringUtil {
 
   /**
-   * Returns a copy of <code>s</code> padded with trailing spaces so
-   * that it's length is <code>length</code>.  Strings already
-   * <code>length</code> characters long or longer are not altered.
+   * Returns a copy of <code>s</code> padded with trailing spaces so that it's
+   * length is <code>length</code>. Strings already <code>length</code>
+   * characters long or longer are not altered.
    */
   public static String rightPad(String s, int length) {
-    StringBuffer sb= new StringBuffer(s);
-    for (int i= length - s.length(); i > 0; i--) 
+    StringBuffer sb = new StringBuffer(s);
+    for (int i = length - s.length(); i > 0; i--)
       sb.append(" ");
     return sb.toString();
   }
 
   /**
-   * Returns a copy of <code>s</code> padded with leading spaces so
-   * that it's length is <code>length</code>.  Strings already
-   * <code>length</code> characters long or longer are not altered.
+   * Returns a copy of <code>s</code> padded with leading spaces so that it's
+   * length is <code>length</code>. Strings already <code>length</code>
+   * characters long or longer are not altered.
    */
   public static String leftPad(String s, int length) {
-    StringBuffer sb= new StringBuffer();
-    for (int i= length - s.length(); i > 0; i--) 
+    StringBuffer sb = new StringBuffer();
+    for (int i = length - s.length(); i > 0; i--)
       sb.append(" ");
     sb.append(s);
     return sb.toString();
   }
 
+  private static final char[] HEX_DIGITS = { '0', '1', '2', '3', '4', '5', '6',
+      '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
 
-  private static final char[] HEX_DIGITS =
-  {'0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'};
-
   /**
    * Convenience call for {@link #toHexString(byte[], String, int)}, where
    * <code>sep = null; lineLen = Integer.MAX_VALUE</code>.
+   * 
    * @param buf
    */
   public static String toHexString(byte[] buf) {
@@ -63,37 +63,48 @@
   /**
    * Get a text representation of a byte[] as hexadecimal String, where each
    * pair of hexadecimal digits corresponds to consecutive bytes in the array.
-   * @param buf input data
-   * @param sep separate every pair of hexadecimal digits with this separator, or
-   * null if no separation is needed.
-   * @param lineLen break the output String into lines containing output for lineLen
-   * bytes.
+   * 
+   * @param buf
+   *          input data
+   * @param sep
+   *          separate every pair of hexadecimal digits with this separator, or
+   *          null if no separation is needed.
+   * @param lineLen
+   *          break the output String into lines containing output for lineLen
+   *          bytes.
    */
   public static String toHexString(byte[] buf, String sep, int lineLen) {
-    if (buf == null) return null;
-    if (lineLen <= 0) lineLen = Integer.MAX_VALUE;
+    if (buf == null)
+      return null;
+    if (lineLen <= 0)
+      lineLen = Integer.MAX_VALUE;
     StringBuffer res = new StringBuffer(buf.length * 2);
     for (int i = 0; i < buf.length; i++) {
       int b = buf[i];
       res.append(HEX_DIGITS[(b >> 4) & 0xf]);
       res.append(HEX_DIGITS[b & 0xf]);
-      if (i > 0 && (i % lineLen) == 0) res.append('\n');
-      else if (sep != null && i < lineLen - 1) res.append(sep); 
+      if (i > 0 && (i % lineLen) == 0)
+        res.append('\n');
+      else if (sep != null && i < lineLen - 1)
+        res.append(sep);
     }
     return res.toString();
   }
-  
+
   /**
    * Convert a String containing consecutive (no inside whitespace) hexadecimal
-   * digits into a corresponding byte array. If the number of digits is not even,
-   * a '0' will be appended in the front of the String prior to conversion.
-   * Leading and trailing whitespace is ignored.
-   * @param text input text
+   * digits into a corresponding byte array. If the number of digits is not
+   * even, a '0' will be appended in the front of the String prior to
+   * conversion. Leading and trailing whitespace is ignored.
+   * 
+   * @param text
+   *          input text
    * @return converted byte array, or null if unable to convert
    */
   public static byte[] fromHexString(String text) {
     text = text.trim();
-    if (text.length() % 2 != 0) text = "0" + text;
+    if (text.length() % 2 != 0)
+      text = "0" + text;
     int resLen = text.length() / 2;
     int loNibble, hiNibble;
     byte[] res = new byte[resLen];
@@ -101,12 +112,13 @@
       int j = i << 1;
       hiNibble = charToNibble(text.charAt(j));
       loNibble = charToNibble(text.charAt(j + 1));
-      if (loNibble == -1 || hiNibble == -1) return null;
-      res[i] = (byte)(hiNibble << 4 | loNibble);
+      if (loNibble == -1 || hiNibble == -1)
+        return null;
+      res[i] = (byte) (hiNibble << 4 | loNibble);
     }
     return res;
   }
-  
+
   private static final int charToNibble(char c) {
     if (c >= '0' && c <= '9') {
       return c - '0';
@@ -129,8 +141,8 @@
   public static void main(String[] args) {
     if (args.length != 1)
       System.out.println("Usage: StringUtil <encoding name>");
-    else 
-      System.out.println(args[0] + " is resolved to " +
-                         EncodingDetector.resolveEncodingAlias(args[0]));
+    else
+      System.out.println(args[0] + " is resolved to "
+          + EncodingDetector.resolveEncodingAlias(args[0]));
   }
 }
Index: src/java/org/apache/nutch/util/CommandRunner.java
===================================================================
--- src/java/org/apache/nutch/util/CommandRunner.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/CommandRunner.java	(working copy)
@@ -82,11 +82,11 @@
   }
 
   public void evaluate() throws IOException {
-      this.exec();
+    this.exec();
   }
 
   /**
-   *
+   * 
    * @return process exit value (return code) or -1 if timed out.
    * @throws IOException
    */
@@ -94,13 +94,11 @@
     Process proc = Runtime.getRuntime().exec(_command);
     _barrier = new CyclicBarrier(3 + ((_stdin != null) ? 1 : 0));
 
-    PullerThread so =
-      new PullerThread("STDOUT", proc.getInputStream(), _stdout);
+    PullerThread so = new PullerThread("STDOUT", proc.getInputStream(), _stdout);
     so.setDaemon(true);
     so.start();
 
-    PullerThread se =
-      new PullerThread("STDERR", proc.getErrorStream(), _stderr);
+    PullerThread se = new PullerThread("STDERR", proc.getErrorStream(), _stderr);
     se.setDaemon(true);
     se.start();
 
@@ -145,11 +143,12 @@
             Thread.sleep(1000);
             _xit = proc.exitValue();
           } catch (InterruptedException ie) {
-              if (Thread.interrupted()) {
-                  break; // stop waiting on an interrupt for this thread
-              } else {
-                  continue;
-              }
+            if (Thread.interrupted()) {
+              break; // stop waiting on an interrupt for this
+              // thread
+            } else {
+              continue;
+            }
           } catch (IllegalThreadStateException iltse) {
             continue;
           }
@@ -181,11 +180,8 @@
 
     private boolean _closeInput;
 
-    protected PumperThread(
-      String name,
-      InputStream is,
-      OutputStream os,
-      boolean closeInput) {
+    protected PumperThread(String name, InputStream is, OutputStream os,
+        boolean closeInput) {
       super(name);
       _is = is;
       _os = os;
@@ -218,12 +214,12 @@
         }
       }
       try {
-         _barrier.await();
-       } catch (InterruptedException ie) {
-         /* IGNORE */
-       } catch (BrokenBarrierException bbe) {
-         /* IGNORE */
-       }
+        _barrier.await();
+      } catch (InterruptedException ie) {
+        /* IGNORE */
+      } catch (BrokenBarrierException bbe) {
+        /* IGNORE */
+      }
     }
   }
 
@@ -269,8 +265,9 @@
 
     for (int i = 0; i < args.length; i++) {
       if (args[i].equals("-timeout")) {
-        timeout = Integer.parseInt(args[++i]);;
-      } else if (i != args.length-2) {
+        timeout = Integer.parseInt(args[++i]);
+        ;
+      } else if (i != args.length - 2) {
         System.err.println(usage);
         System.exit(-1);
       } else {
@@ -290,6 +287,6 @@
 
     cr.evaluate();
 
-    System.err.println("output value: "+cr.getExitValue());
+    System.err.println("output value: " + cr.getExitValue());
   }
 }
Index: src/java/org/apache/nutch/util/NutchConfiguration.java
===================================================================
--- src/java/org/apache/nutch/util/NutchConfiguration.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/NutchConfiguration.java	(working copy)
@@ -23,37 +23,42 @@
 
 import org.apache.hadoop.conf.Configuration;
 
-
-/** Utility to create Hadoop {@link Configuration}s that include Nutch-specific
- * resources.  */
+/**
+ * Utility to create Hadoop {@link Configuration}s that include Nutch-specific
+ * resources.
+ */
 public class NutchConfiguration {
   public static final String UUID_KEY = "nutch.conf.uuid";
-  
-  private NutchConfiguration() {}                 // singleton
-  
+
+  private NutchConfiguration() {
+  } // singleton
+
   /*
-   * Configuration.hashCode() doesn't return values that
-   * correspond to a unique set of parameters. This is a workaround
-   * so that we can track instances of Configuration created by Nutch.
+   * Configuration.hashCode() doesn't return values that correspond to a unique
+   * set of parameters. This is a workaround so that we can track instances of
+   * Configuration created by Nutch.
    */
   private static void setUUID(Configuration conf) {
     UUID uuid = UUID.randomUUID();
     conf.set(UUID_KEY, uuid.toString());
   }
-  
+
   /**
-   * Retrieve a Nutch UUID of this configuration object, or null
-   * if the configuration was created elsewhere.
-   * @param conf configuration instance
+   * Retrieve a Nutch UUID of this configuration object, or null if the
+   * configuration was created elsewhere.
+   * 
+   * @param conf
+   *          configuration instance
    * @return uuid or null
    */
   public static String getUUID(Configuration conf) {
     return conf.get(UUID_KEY);
   }
 
-  /** Create a {@link Configuration} for Nutch. This will load the standard
-   * Nutch resources, <code>nutch-default.xml</code> and
-   * <code>nutch-site.xml</code> overrides.
+  /**
+   * Create a {@link Configuration} for Nutch. This will load the standard Nutch
+   * resources, <code>nutch-default.xml</code> and <code>nutch-site.xml</code>
+   * overrides.
    */
   public static Configuration create() {
     Configuration conf = new Configuration();
@@ -61,14 +66,19 @@
     addNutchResources(conf);
     return conf;
   }
-  
-  /** Create a {@link Configuration} from supplied properties.
-   * @param addNutchResources if true, then first <code>nutch-default.xml</code>,
-   * and then <code>nutch-site.xml</code> will be loaded prior to applying the
-   * properties. Otherwise these resources won't be used.
-   * @param nutchProperties a set of properties to define (or override)
+
+  /**
+   * Create a {@link Configuration} from supplied properties.
+   * 
+   * @param addNutchResources
+   *          if true, then first <code>nutch-default.xml</code>, and then
+   *          <code>nutch-site.xml</code> will be loaded prior to applying the
+   *          properties. Otherwise these resources won't be used.
+   * @param nutchProperties
+   *          a set of properties to define (or override)
    */
-  public static Configuration create(boolean addNutchResources, Properties nutchProperties) {
+  public static Configuration create(boolean addNutchResources,
+      Properties nutchProperties) {
     Configuration conf = new Configuration();
     setUUID(conf);
     if (addNutchResources) {
@@ -83,8 +93,8 @@
   /**
    * Add the standard Nutch resources to {@link Configuration}.
    * 
-   * @param conf               Configuration object to which
-   *                           configuration is to be added.
+   * @param conf
+   *          Configuration object to which configuration is to be added.
    */
   private static Configuration addNutchResources(Configuration conf) {
     conf.addResource("nutch-default.xml");
@@ -92,4 +102,3 @@
     return conf;
   }
 }
-
Index: src/java/org/apache/nutch/util/NutchJob.java
===================================================================
--- src/java/org/apache/nutch/util/NutchJob.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/NutchJob.java	(working copy)
@@ -20,7 +20,7 @@
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.mapred.JobConf;
 
-/** A {@link JobConf} for Nutch jobs.  */
+/** A {@link JobConf} for Nutch jobs. */
 public class NutchJob extends JobConf {
 
   public NutchJob(Configuration conf) {
@@ -28,4 +28,3 @@
   }
 
 }
-
Index: src/java/org/apache/nutch/util/DomUtil.java
===================================================================
--- src/java/org/apache/nutch/util/DomUtil.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/DomUtil.java	(working copy)
@@ -38,7 +38,6 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-
 public class DomUtil {
 
   private final static Logger LOG = LoggerFactory.getLogger(DomUtil.class);
@@ -61,10 +60,10 @@
       input.setEncoding("UTF-8");
       parser.parse(input);
       int i = 0;
-      while (! (parser.getDocument().getChildNodes().item(i) instanceof Element)) {
-       i++;
-      } 
-      element = (Element)parser.getDocument().getChildNodes().item(i);
+      while (!(parser.getDocument().getChildNodes().item(i) instanceof Element)) {
+        i++;
+      }
+      element = (Element) parser.getDocument().getChildNodes().item(i);
     } catch (FileNotFoundException e) {
       e.printStackTrace(LogUtil.getWarnStream(LOG));
     } catch (SAXException e) {
Index: src/java/org/apache/nutch/util/LogUtil.java
===================================================================
--- src/java/org/apache/nutch/util/LogUtil.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/LogUtil.java	(working copy)
@@ -26,10 +26,9 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-
 /**
  * Utility class for logging.
- *
+ * 
  * @author J&eacute;r&ocirc;me Charron
  */
 public class LogUtil {
@@ -38,8 +37,8 @@
 
   private static Method TRACE = null;
   private static Method DEBUG = null;
-  private static Method INFO  = null;
-  private static Method WARN  = null;
+  private static Method INFO = null;
+  private static Method WARN = null;
   private static Method ERROR = null;
   private static Method FATAL = null;
 
@@ -47,18 +46,17 @@
     try {
       TRACE = Logger.class.getMethod("trace", new Class[] { String.class });
       DEBUG = Logger.class.getMethod("debug", new Class[] { String.class });
-      INFO  = Logger.class.getMethod("info",  new Class[] { String.class });
-      WARN  = Logger.class.getMethod("warn",  new Class[] { String.class });
+      INFO = Logger.class.getMethod("info", new Class[] { String.class });
+      WARN = Logger.class.getMethod("warn", new Class[] { String.class });
       ERROR = Logger.class.getMethod("error", new Class[] { String.class });
       FATAL = Logger.class.getMethod("error", new Class[] { String.class });
-    } catch(Exception e) {
+    } catch (Exception e) {
       if (LOG.isErrorEnabled()) {
         LOG.error("Cannot init log methods", e);
       }
     }
   }
-  
-  
+
   public static PrintStream getTraceStream(final Logger logger) {
     return getLogStream(logger, TRACE);
   }
@@ -70,7 +68,7 @@
   public static PrintStream getInfoStream(final Logger logger) {
     return getLogStream(logger, INFO);
   }
-  
+
   public static PrintStream getWarnStream(final Logger logger) {
     return getLogStream(logger, WARN);
   }
@@ -82,34 +80,35 @@
   public static PrintStream getFatalStream(final Logger logger) {
     return getLogStream(logger, FATAL);
   }
-  
+
   /** Returns a stream that, when written to, adds log lines. */
-  private static PrintStream getLogStream(final Logger logger, final Method method) {
+  private static PrintStream getLogStream(final Logger logger,
+      final Method method) {
     return new PrintStream(new ByteArrayOutputStream() {
-        private int scan = 0;
+      private int scan = 0;
 
-        private boolean hasNewline() {
-          for (; scan < count; scan++) {
-            if (buf[scan] == '\n')
-              return true;
-          }
-          return false;
+      private boolean hasNewline() {
+        for (; scan < count; scan++) {
+          if (buf[scan] == '\n')
+            return true;
         }
+        return false;
+      }
 
-        public void flush() throws IOException {
-          if (!hasNewline())
-            return;
-          try {
-            method.invoke(logger, new String[] { toString().trim() });
-          } catch (Exception e) {
-            if (LOG.isErrorEnabled()) {
-              LOG.error("Cannot log with method [" + method + "]", e);
-            }
+      public void flush() throws IOException {
+        if (!hasNewline())
+          return;
+        try {
+          method.invoke(logger, new String[] { toString().trim() });
+        } catch (Exception e) {
+          if (LOG.isErrorEnabled()) {
+            LOG.error("Cannot log with method [" + method + "]", e);
           }
-          reset();
-          scan = 0;
         }
-      }, true);
+        reset();
+        scan = 0;
+      }
+    }, true);
   }
 
 }
Index: src/java/org/apache/nutch/util/EncodingDetector.java
===================================================================
--- src/java/org/apache/nutch/util/EncodingDetector.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/EncodingDetector.java	(working copy)
@@ -40,27 +40,26 @@
 
 /**
  * A simple class for detecting character encodings.
- *
+ * 
  * <p>
  * Broadly this encompasses two functions, which are distinctly separate:
- *
+ * 
  * <ol>
- *  <li>Auto detecting a set of "clues" from input text.</li>
- *  <li>Taking a set of clues and making a "best guess" as to the
- *      "real" encoding.</li>
+ * <li>Auto detecting a set of "clues" from input text.</li>
+ * <li>Taking a set of clues and making a "best guess" as to the "real"
+ * encoding.</li>
  * </ol>
  * </p>
- *
+ * 
  * <p>
- * A caller will often have some extra information about what the
- * encoding might be (e.g. from the HTTP header or HTML meta-tags, often
- * wrong but still potentially useful clues). The types of clues may differ
- * from caller to caller. Thus a typical calling sequence is:
+ * A caller will often have some extra information about what the encoding might
+ * be (e.g. from the HTTP header or HTML meta-tags, often wrong but still
+ * potentially useful clues). The types of clues may differ from caller to
+ * caller. Thus a typical calling sequence is:
  * <ul>
- *    <li>Run step (1) to generate a set of auto-detected clues;</li>
- *    <li>Combine these clues with the caller-dependent "extra clues"
- *        available;</li>
- *    <li>Run step (2) to guess what the most probable answer is.</li>
+ * <li>Run step (1) to generate a set of auto-detected clues;</li>
+ * <li>Combine these clues with the caller-dependent "extra clues" available;</li>
+ * <li>Run step (2) to guess what the most probable answer is.</li>
  * </p>
  */
 public class EncodingDetector {
@@ -90,34 +89,32 @@
     }
 
     public String toString() {
-      return value + " (" + source +
-           ((confidence >= 0) ? ", " + confidence + "% confidence" : "") + ")";
+      return value + " (" + source
+          + ((confidence >= 0) ? ", " + confidence + "% confidence" : "") + ")";
     }
 
     public boolean isEmpty() {
-      return (value==null || "".equals(value));
+      return (value == null || "".equals(value));
     }
 
     public boolean meetsThreshold() {
-      return (confidence < 0 ||
-               (minConfidence >= 0 && confidence >= minConfidence));
+      return (confidence < 0 || (minConfidence >= 0 && confidence >= minConfidence));
     }
   }
 
-  public static final Logger LOG = LoggerFactory.getLogger(EncodingDetector.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(EncodingDetector.class);
 
   public static final int NO_THRESHOLD = -1;
 
-  public static final String MIN_CONFIDENCE_KEY =
-    "encodingdetector.charset.min.confidence";
+  public static final String MIN_CONFIDENCE_KEY = "encodingdetector.charset.min.confidence";
 
-  private static final HashMap<String, String> ALIASES =
-    new HashMap<String, String>();
+  private static final HashMap<String, String> ALIASES = new HashMap<String, String>();
 
   private static final HashSet<String> DETECTABLES = new HashSet<String>();
 
   // CharsetDetector will die without a minimum amount of data.
-  private static final int MIN_LENGTH=4;
+  private static final int MIN_LENGTH = 4;
 
   static {
     DETECTABLES.add("text/html");
@@ -130,23 +127,22 @@
     DETECTABLES.add("application/rss+xml");
     DETECTABLES.add("application/xhtml+xml");
     /*
-     * the following map is not an alias mapping table, but
-     * maps character encodings which are often used in mislabelled
-     * documents to their correct encodings. For instance,
-     * there are a lot of documents labelled 'ISO-8859-1' which contain
-     * characters not covered by ISO-8859-1 but covered by windows-1252.
-     * Because windows-1252 is a superset of ISO-8859-1 (sharing code points
-     * for the common part), it's better to treat ISO-8859-1 as
-     * synonymous with windows-1252 than to reject, as invalid, documents
-     * labelled as ISO-8859-1 that have characters outside ISO-8859-1.
+     * the following map is not an alias mapping table, but maps character
+     * encodings which are often used in mislabelled documents to their correct
+     * encodings. For instance, there are a lot of documents labelled
+     * 'ISO-8859-1' which contain characters not covered by ISO-8859-1 but
+     * covered by windows-1252. Because windows-1252 is a superset of ISO-8859-1
+     * (sharing code points for the common part), it's better to treat
+     * ISO-8859-1 as synonymous with windows-1252 than to reject, as invalid,
+     * documents labelled as ISO-8859-1 that have characters outside ISO-8859-1.
      */
     ALIASES.put("ISO-8859-1", "windows-1252");
     ALIASES.put("EUC-KR", "x-windows-949");
     ALIASES.put("x-EUC-CN", "GB18030");
     ALIASES.put("GBK", "GB18030");
-    //ALIASES.put("Big5", "Big5HKSCS");
-    //ALIASES.put("TIS620", "Cp874");
-    //ALIASES.put("ISO-8859-11", "Cp874");
+    // ALIASES.put("Big5", "Big5HKSCS");
+    // ALIASES.put("TIS620", "Cp874");
+    // ALIASES.put("ISO-8859-11", "Cp874");
 
   }
 
@@ -190,8 +186,9 @@
     }
 
     // add character encoding coming from HTTP response header
-    addClue(parseCharacterEncoding(
-        content.getMetadata().get(Response.CONTENT_TYPE)), "header");
+    addClue(
+        parseCharacterEncoding(content.getMetadata().get(Response.CONTENT_TYPE)),
+        "header");
   }
 
   public void addClue(String value, String source, int confidence) {
@@ -210,21 +207,23 @@
 
   /**
    * Guess the encoding with the previously specified list of clues.
-   *
-   * @param content Content instance
-   * @param defaultValue Default encoding to return if no encoding can be
-   * detected with enough confidence. Note that this will <b>not</b> be
-   * normalized with {@link EncodingDetector#resolveEncodingAlias}
-   *
+   * 
+   * @param content
+   *          Content instance
+   * @param defaultValue
+   *          Default encoding to return if no encoding can be detected with
+   *          enough confidence. Note that this will <b>not</b> be normalized
+   *          with {@link EncodingDetector#resolveEncodingAlias}
+   * 
    * @return Guessed encoding or defaultValue
    */
   public String guessEncoding(Content content, String defaultValue) {
     /*
-     * This algorithm could be replaced by something more sophisticated;
-     * ideally we would gather a bunch of data on where various clues
-     * (autodetect, HTTP headers, HTML meta tags, etc.) disagree, tag each with
-     * the correct answer, and use machine learning/some statistical method
-     * to generate a better heuristic.
+     * This algorithm could be replaced by something more sophisticated; ideally
+     * we would gather a bunch of data on where various clues (autodetect, HTTP
+     * headers, HTML meta tags, etc.) disagree, tag each with the correct
+     * answer, and use machine learning/some statistical method to generate a
+     * better heuristic.
      */
 
     String base = content.getBaseUrl();
@@ -234,10 +233,9 @@
     }
 
     /*
-     * Go down the list of encoding "clues". Use a clue if:
-     *  1. Has a confidence value which meets our confidence threshold, OR
-     *  2. Doesn't meet the threshold, but is the best try,
-     *     since nothing else is available.
+     * Go down the list of encoding "clues". Use a clue if: 1. Has a confidence
+     * value which meets our confidence threshold, OR 2. Doesn't meet the
+     * threshold, but is the best try, since nothing else is available.
      */
     EncodingClue defaultClue = new EncodingClue(defaultValue, "default");
     EncodingClue bestClue = defaultClue;
@@ -249,8 +247,8 @@
       String charset = clue.value;
       if (minConfidence >= 0 && clue.confidence >= minConfidence) {
         if (LOG.isTraceEnabled()) {
-          LOG.trace(base + ": Choosing encoding: " + charset +
-                    " with confidence " + clue.confidence);
+          LOG.trace(base + ": Choosing encoding: " + charset
+              + " with confidence " + clue.confidence);
         }
         return resolveEncodingAlias(charset).toLowerCase();
       } else if (clue.confidence == NO_THRESHOLD && bestClue == defaultClue) {
@@ -270,10 +268,10 @@
   }
 
   /*
-   * Strictly for analysis, look for "disagreements." The top guess from
-   * each source is examined; if these meet the threshold and disagree, then
-   * we log the information -- useful for testing or generating training data
-   * for a better heuristic.
+   * Strictly for analysis, look for "disagreements." The top guess from each
+   * source is examined; if these meet the threshold and disagree, then we log
+   * the information -- useful for testing or generating training data for a
+   * better heuristic.
    */
   private void findDisagreements(String url, List<EncodingClue> newClues) {
     HashSet<String> valsSeen = new HashSet<String>();
@@ -295,9 +293,9 @@
     if (disagreement) {
       // dump all values in case of disagreement
       StringBuffer sb = new StringBuffer();
-      sb.append("Disagreement: "+url+"; ");
+      sb.append("Disagreement: " + url + "; ");
       for (int i = 0; i < newClues.size(); i++) {
-        if (i>0) {
+        if (i > 0) {
           sb.append(", ");
         }
         sb.append(newClues.get(i));
@@ -312,7 +310,7 @@
         return null;
       String canonicalName = new String(Charset.forName(encoding).name());
       return ALIASES.containsKey(canonicalName) ? ALIASES.get(canonicalName)
-                                                : canonicalName;
+          : canonicalName;
     } catch (Exception e) {
       LOG.warn("Invalid encoding " + encoding + " detected, using default.");
       return null;
@@ -320,14 +318,14 @@
   }
 
   /**
-   * Parse the character encoding from the specified content type header.
-   * If the content type is null, or there is no explicit character encoding,
-   * <code>null</code> is returned.
-   * <br />
-   * This method was copied from org.apache.catalina.util.RequestUtil,
-   * which is licensed under the Apache License, Version 2.0 (the "License").
-   *
-   * @param contentType a content type header
+   * Parse the character encoding from the specified content type header. If the
+   * content type is null, or there is no explicit character encoding,
+   * <code>null</code> is returned. <br />
+   * This method was copied from org.apache.catalina.util.RequestUtil, which is
+   * licensed under the Apache License, Version 2.0 (the "License").
+   * 
+   * @param contentType
+   *          a content type header
    */
   public static String parseCharacterEncoding(String contentType) {
     if (contentType == null)
@@ -341,7 +339,7 @@
       encoding = encoding.substring(0, end);
     encoding = encoding.trim();
     if ((encoding.length() > 2) && (encoding.startsWith("\""))
-      && (encoding.endsWith("\"")))
+        && (encoding.endsWith("\"")))
       encoding = encoding.substring(1, encoding.length() - 1);
     return (encoding.trim());
 
@@ -354,12 +352,12 @@
     }
 
     Configuration conf = NutchConfiguration.create();
-    EncodingDetector detector =
-      new EncodingDetector(NutchConfiguration.create());
+    EncodingDetector detector = new EncodingDetector(
+        NutchConfiguration.create());
 
     // do everything as bytes; don't want any conversion
-    BufferedInputStream istr =
-      new BufferedInputStream(new FileInputStream(args[0]));
+    BufferedInputStream istr = new BufferedInputStream(new FileInputStream(
+        args[0]));
     ByteArrayOutputStream ostr = new ByteArrayOutputStream();
     byte[] bytes = new byte[1000];
     boolean more = true;
@@ -378,8 +376,8 @@
     byte[] data = ostr.toByteArray();
 
     // make a fake Content
-    Content content =
-      new Content("", "", data, "text/html", new Metadata(), conf);
+    Content content = new Content("", "", data, "text/html", new Metadata(),
+        conf);
 
     detector.autoDetectClues(content, true);
     String encoding = detector.guessEncoding(content,
Index: src/java/org/apache/nutch/util/DeflateUtils.java
===================================================================
--- src/java/org/apache/nutch/util/DeflateUtils.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/DeflateUtils.java	(working copy)
@@ -28,19 +28,18 @@
 import org.slf4j.LoggerFactory;
 
 /**
- *  A collection of utility methods for working on deflated data.
+ * A collection of utility methods for working on deflated data.
  */
 public class DeflateUtils {
-  
+
   private static final Logger LOG = LoggerFactory.getLogger(DeflateUtils.class);
   private static final int EXPECTED_COMPRESSION_RATIO = 5;
   private static final int BUF_SIZE = 4096;
 
   /**
-   * Returns an inflated copy of the input array.  If the deflated 
-   * input has been truncated or corrupted, a best-effort attempt is
-   * made to inflate as much as possible.  If no data can be extracted
-   * <code>null</code> is returned.
+   * Returns an inflated copy of the input array. If the deflated input has been
+   * truncated or corrupted, a best-effort attempt is made to inflate as much as
+   * possible. If no data can be extracted <code>null</code> is returned.
    */
   public static final byte[] inflateBestEffort(byte[] in) {
     return inflateBestEffort(in, Integer.MAX_VALUE);
@@ -48,38 +47,37 @@
 
   /**
    * Returns an inflated copy of the input array, truncated to
-   * <code>sizeLimit</code> bytes, if necessary.  If the deflated input
-   * has been truncated or corrupted, a best-effort attempt is made to
-   * inflate as much as possible.  If no data can be extracted
-   * <code>null</code> is returned.
+   * <code>sizeLimit</code> bytes, if necessary. If the deflated input has been
+   * truncated or corrupted, a best-effort attempt is made to inflate as much as
+   * possible. If no data can be extracted <code>null</code> is returned.
    */
   public static final byte[] inflateBestEffort(byte[] in, int sizeLimit) {
-    // decompress using InflaterInputStream 
-    ByteArrayOutputStream outStream = 
-      new ByteArrayOutputStream(EXPECTED_COMPRESSION_RATIO * in.length);
+    // decompress using InflaterInputStream
+    ByteArrayOutputStream outStream = new ByteArrayOutputStream(
+        EXPECTED_COMPRESSION_RATIO * in.length);
 
     // "true" because HTTP does not provide zlib headers
     Inflater inflater = new Inflater(true);
-    InflaterInputStream inStream = 
-      new InflaterInputStream(new ByteArrayInputStream(in), inflater);
+    InflaterInputStream inStream = new InflaterInputStream(
+        new ByteArrayInputStream(in), inflater);
 
     byte[] buf = new byte[BUF_SIZE];
     int written = 0;
     while (true) {
       try {
-	int size = inStream.read(buf);
-	if (size <= 0) 
-	  break;
-	if ((written + size) > sizeLimit) {
-	  outStream.write(buf, 0, sizeLimit - written);
-	  break;
-	}
-	outStream.write(buf, 0, size);
-	written+= size;
+        int size = inStream.read(buf);
+        if (size <= 0)
+          break;
+        if ((written + size) > sizeLimit) {
+          outStream.write(buf, 0, sizeLimit - written);
+          break;
+        }
+        outStream.write(buf, 0, size);
+        written += size;
       } catch (Exception e) {
-	LOG.info( "Caught Exception in inflateBestEffort" );
+        LOG.info("Caught Exception in inflateBestEffort");
         e.printStackTrace(LogUtil.getWarnStream(LOG));
-	break;
+        break;
       }
     }
     try {
@@ -90,23 +88,24 @@
     return outStream.toByteArray();
   }
 
-
   /**
-   * Returns an inflated copy of the input array.  
-   * @throws IOException if the input cannot be properly decompressed
+   * Returns an inflated copy of the input array.
+   * 
+   * @throws IOException
+   *           if the input cannot be properly decompressed
    */
   public static final byte[] inflate(byte[] in) throws IOException {
-    // decompress using InflaterInputStream 
-    ByteArrayOutputStream outStream = 
-      new ByteArrayOutputStream(EXPECTED_COMPRESSION_RATIO * in.length);
+    // decompress using InflaterInputStream
+    ByteArrayOutputStream outStream = new ByteArrayOutputStream(
+        EXPECTED_COMPRESSION_RATIO * in.length);
 
-    InflaterInputStream inStream = 
-      new InflaterInputStream ( new ByteArrayInputStream(in) );
+    InflaterInputStream inStream = new InflaterInputStream(
+        new ByteArrayInputStream(in));
 
     byte[] buf = new byte[BUF_SIZE];
     while (true) {
       int size = inStream.read(buf);
-      if (size <= 0) 
+      if (size <= 0)
         break;
       outStream.write(buf, 0, size);
     }
@@ -119,9 +118,9 @@
    * Returns a deflated copy of the input array.
    */
   public static final byte[] deflate(byte[] in) {
-    // compress using DeflaterOutputStream 
-    ByteArrayOutputStream byteOut = 
-      new ByteArrayOutputStream(in.length / EXPECTED_COMPRESSION_RATIO);
+    // compress using DeflaterOutputStream
+    ByteArrayOutputStream byteOut = new ByteArrayOutputStream(in.length
+        / EXPECTED_COMPRESSION_RATIO);
 
     DeflaterOutputStream outStream = new DeflaterOutputStream(byteOut);
 
Index: src/java/org/apache/nutch/util/MimeUtil.java
===================================================================
--- src/java/org/apache/nutch/util/MimeUtil.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/MimeUtil.java	(working copy)
@@ -35,12 +35,12 @@
  * @author mattmann
  * @since NUTCH-608
  * 
- * <p>
- * This is a facade class to insulate Nutch from its underlying Mime Type
- * substrate library, <a href="http://incubator.apache.org/tika/">Apache Tika</a>.
- * Any mime handling code should be placed in this utility class, and hidden
- * from the Nutch classes that rely on it.
- * </p>
+ *        <p>
+ *        This is a facade class to insulate Nutch from its underlying Mime Type
+ *        substrate library, <a href="http://incubator.apache.org/tika/">Apache
+ *        Tika</a>. Any mime handling code should be placed in this utility
+ *        class, and hidden from the Nutch classes that rely on it.
+ *        </p>
  */
 public final class MimeUtil {
 
@@ -53,7 +53,8 @@
   private boolean mimeMagic;
 
   /* our log stream */
-  private static final Logger LOG = LoggerFactory.getLogger(MimeUtil.class.getName());
+  private static final Logger LOG = LoggerFactory.getLogger(MimeUtil.class
+      .getName());
 
   public MimeUtil(Configuration conf) {
     ObjectCache objectCache = ObjectCache.get(conf);
@@ -61,25 +62,26 @@
         .getName());
     if (mimeTypez == null) {
       try {
-          String customMimeTypeFile = conf.get("mime.types.file");
-          if (customMimeTypeFile!=null && customMimeTypeFile.equals("")==false){
-              try {
-              mimeTypez = MimeTypesFactory.create(conf
-                      .getConfResourceAsInputStream(customMimeTypeFile));
-              }
-              catch (Exception e){
-                  LOG.error("Can't load mime.types.file : "+customMimeTypeFile+" using Tika's default");
-              }
+        String customMimeTypeFile = conf.get("mime.types.file");
+        if (customMimeTypeFile != null
+            && customMimeTypeFile.equals("") == false) {
+          try {
+            mimeTypez = MimeTypesFactory.create(conf
+                .getConfResourceAsInputStream(customMimeTypeFile));
+          } catch (Exception e) {
+            LOG.error("Can't load mime.types.file : " + customMimeTypeFile
+                + " using Tika's default");
           }
-          if (mimeTypez==null)
-              mimeTypez = MimeTypes.getDefaultMimeTypes();
+        }
+        if (mimeTypez == null)
+          mimeTypez = MimeTypes.getDefaultMimeTypes();
       } catch (Exception e) {
-        LOG.error("Exception in MimeUtil "+e.getMessage());
+        LOG.error("Exception in MimeUtil " + e.getMessage());
         throw new RuntimeException(e);
       }
       objectCache.setObject(MimeTypes.class.getName(), mimeTypez);
     }
-    
+
     this.mimeTypes = mimeTypez;
     this.mimeMagic = conf.getBoolean("mime.type.magic", true);
   }
@@ -115,17 +117,17 @@
   /**
    * A facade interface to trying all the possible mime type resolution
    * strategies available within Tika. First, the mime type provided in
-   * <code>typeName</code> is cleaned, with {@link #cleanMimeType(String)}.
-   * Then the cleaned mime type is looked up in the underlying Tika
-   * {@link MimeTypes} registry, by its cleaned name. If the {@link MimeType} is
-   * found, then that mime type is used, otherwise {@link URL} resolution is
-   * used to try and determine the mime type. If that means is unsuccessful, and
-   * if <code>mime.type.magic</code> is enabled in {@link NutchConfiguration},
-   * then mime type magic resolution is used to try and obtain a
+   * <code>typeName</code> is cleaned, with {@link #cleanMimeType(String)}. Then
+   * the cleaned mime type is looked up in the underlying Tika {@link MimeTypes}
+   * registry, by its cleaned name. If the {@link MimeType} is found, then that
+   * mime type is used, otherwise {@link URL} resolution is used to try and
+   * determine the mime type. If that means is unsuccessful, and if
+   * <code>mime.type.magic</code> is enabled in {@link NutchConfiguration}, then
+   * mime type magic resolution is used to try and obtain a
    * better-than-the-default approximation of the {@link MimeType}.
    * 
    * @param typeName
-   *          The original mime type, returned from a {@link ProtocolOutput}.
+   *          The original mime type, returned from a {@link ProtocolOutput} .
    * @param url
    *          The given {@link URL}, that Nutch was trying to crawl.
    * @param data
@@ -138,8 +140,7 @@
 
     try {
       cleanedMimeType = MimeUtil.cleanMimeType(typeName) != null ? this.mimeTypes
-          .forName(MimeUtil.cleanMimeType(typeName)).getName()
-          : null;
+          .forName(MimeUtil.cleanMimeType(typeName)).getName() : null;
     } catch (MimeTypeException mte) {
       // Seems to be a malformed mime type name...
     }
@@ -162,20 +163,25 @@
     }
 
     // if magic is enabled use mime magic to guess if the mime type returned
-    // from the magic guess is different than the one that's already set so far
-    // if it is, and it's not the default mime type, then go with the mime type
+    // from the magic guess is different than the one that's already set so
+    // far
+    // if it is, and it's not the default mime type, then go with the mime
+    // type
     // returned by the magic
     if (this.mimeMagic) {
       MimeType magicType = this.mimeTypes.getMimeType(data);
-      if (magicType != null && !magicType.getName().equals(MimeTypes.OCTET_STREAM)
-          && !magicType.getName().equals(MimeTypes.PLAIN_TEXT)
-          && type != null && !type.getName().equals(magicType.getName())) {
-        // If magic enabled and the current mime type differs from that of the
+      if (magicType != null
+          && !magicType.getName().equals(MimeTypes.OCTET_STREAM)
+          && !magicType.getName().equals(MimeTypes.PLAIN_TEXT) && type != null
+          && !type.getName().equals(magicType.getName())) {
+        // If magic enabled and the current mime type differs from that
+        // of the
         // one returned from the magic, take the magic mimeType
         type = magicType;
       }
 
-      // if type is STILL null after all the resolution strategies, go for the
+      // if type is STILL null after all the resolution strategies, go for
+      // the
       // default type
       if (type == null) {
         try {
@@ -195,8 +201,8 @@
    * @param url
    *          A string representation of the document {@link URL} to sense the
    *          {@link MimeType} for.
-   * @return An appropriate {@link MimeType}, identified from the given
-   *         Document url in string form.
+   * @return An appropriate {@link MimeType}, identified from the given Document
+   *         url in string form.
    */
   public MimeType getMimeType(String url) {
     return this.mimeTypes.getMimeType(url);
@@ -208,8 +214,8 @@
    * 
    * @param name
    *          The name of a valid {@link MimeType} in the Tika mime registry.
-   * @return The object representation of the {@link MimeType}, if it exists,
-   *         or null otherwise.
+   * @return The object representation of the {@link MimeType}, if it exists, or
+   *         null otherwise.
    */
   public MimeType forName(String name) {
     try {
Index: src/java/org/apache/nutch/util/TimingUtil.java
===================================================================
--- src/java/org/apache/nutch/util/TimingUtil.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/TimingUtil.java	(working copy)
@@ -21,35 +21,39 @@
 
 public class TimingUtil {
 
-    private static long[] TIME_FACTOR = { 60 * 60 * 1000, 60 * 1000, 1000 };
+  private static long[] TIME_FACTOR = { 60 * 60 * 1000, 60 * 1000, 1000 };
 
-    /**
-     * Calculate the elapsed time between two times specified in milliseconds.
-     * @param start The start of the time period
-     * @param end The end of the time period
-     * @return a string of the form "XhYmZs" when the elapsed time is X hours, Y minutes and Z seconds or null if start > end.
-     */
-    public static String elapsedTime(long start, long end){
-        if (start > end) {
-            return null;
-        }
+  /**
+   * Calculate the elapsed time between two times specified in milliseconds.
+   * 
+   * @param start
+   *          The start of the time period
+   * @param end
+   *          The end of the time period
+   * @return a string of the form "XhYmZs" when the elapsed time is X hours, Y
+   *         minutes and Z seconds or null if start > end.
+   */
+  public static String elapsedTime(long start, long end) {
+    if (start > end) {
+      return null;
+    }
 
-        long[] elapsedTime = new long[TIME_FACTOR.length];
+    long[] elapsedTime = new long[TIME_FACTOR.length];
 
-        for (int i = 0; i < TIME_FACTOR.length; i++) {
-            elapsedTime[i] = start > end ? -1 : (end - start) / TIME_FACTOR[i];
-            start += TIME_FACTOR[i] * elapsedTime[i];
-        }
+    for (int i = 0; i < TIME_FACTOR.length; i++) {
+      elapsedTime[i] = start > end ? -1 : (end - start) / TIME_FACTOR[i];
+      start += TIME_FACTOR[i] * elapsedTime[i];
+    }
 
-        NumberFormat nf = NumberFormat.getInstance();
-        nf.setMinimumIntegerDigits(2);
-        StringBuffer buf = new StringBuffer();
-        for (int i = 0; i < elapsedTime.length; i++) {
-            if (i > 0) {
-                buf.append(":");
-            }
-            buf.append(nf.format(elapsedTime[i]));
-        }
-        return buf.toString();
+    NumberFormat nf = NumberFormat.getInstance();
+    nf.setMinimumIntegerDigits(2);
+    StringBuffer buf = new StringBuffer();
+    for (int i = 0; i < elapsedTime.length; i++) {
+      if (i > 0) {
+        buf.append(":");
+      }
+      buf.append(nf.format(elapsedTime[i]));
     }
+    return buf.toString();
+  }
 }
Index: src/java/org/apache/nutch/util/LockUtil.java
===================================================================
--- src/java/org/apache/nutch/util/LockUtil.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/LockUtil.java	(working copy)
@@ -28,22 +28,29 @@
  * @author Andrzej Bialecki
  */
 public class LockUtil {
-  
+
   /**
    * Create a lock file.
-   * @param fs filesystem
-   * @param lockFile name of the lock file
-   * @param accept if true, and the target file exists, consider it valid. If false
-   * and the target file exists, throw an IOException.
-   * @throws IOException if accept is false, and the target file already exists,
-   * or if it's a directory.
+   * 
+   * @param fs
+   *          filesystem
+   * @param lockFile
+   *          name of the lock file
+   * @param accept
+   *          if true, and the target file exists, consider it valid. If false
+   *          and the target file exists, throw an IOException.
+   * @throws IOException
+   *           if accept is false, and the target file already exists, or if
+   *           it's a directory.
    */
-  public static void createLockFile(FileSystem fs, Path lockFile, boolean accept) throws IOException {
+  public static void createLockFile(FileSystem fs, Path lockFile, boolean accept)
+      throws IOException {
     if (fs.exists(lockFile)) {
-      if(!accept)
+      if (!accept)
         throw new IOException("lock file " + lockFile + " already exists.");
       if (fs.getFileStatus(lockFile).isDir())
-        throw new IOException("lock file " + lockFile + " already exists and is a directory.");
+        throw new IOException("lock file " + lockFile
+            + " already exists and is a directory.");
       // do nothing - the file already exists.
     } else {
       // make sure parents exist
@@ -55,16 +62,23 @@
   /**
    * Remove lock file. NOTE: applications enforce the semantics of this file -
    * this method simply removes any file with a given name.
-   * @param fs filesystem
-   * @param lockFile lock file name
+   * 
+   * @param fs
+   *          filesystem
+   * @param lockFile
+   *          lock file name
    * @return false, if the lock file doesn't exist. True, if it existed and was
-   * successfully removed.
-   * @throws IOException if lock file exists but it is a directory.
+   *         successfully removed.
+   * @throws IOException
+   *           if lock file exists but it is a directory.
    */
-  public static boolean removeLockFile(FileSystem fs, Path lockFile) throws IOException {
-    if (!fs.exists(lockFile)) return false;
+  public static boolean removeLockFile(FileSystem fs, Path lockFile)
+      throws IOException {
+    if (!fs.exists(lockFile))
+      return false;
     if (fs.getFileStatus(lockFile).isDir())
-      throw new IOException("lock file " + lockFile + " exists but is a directory!");
+      throw new IOException("lock file " + lockFile
+          + " exists but is a directory!");
     return fs.delete(lockFile, false);
   }
 }
Index: src/java/org/apache/nutch/util/domain/DomainStatistics.java
===================================================================
--- src/java/org/apache/nutch/util/domain/DomainStatistics.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/domain/DomainStatistics.java	(working copy)
@@ -48,37 +48,40 @@
 import org.apache.nutch.util.URLUtil;
 
 /**
- * Extracts some very basic statistics about domains from the crawldb 
+ * Extracts some very basic statistics about domains from the crawldb
  */
-public class DomainStatistics
-extends MapReduceBase
-implements Tool, Mapper<Text, CrawlDatum, Text, LongWritable>,
-           Reducer<Text, LongWritable, LongWritable, Text> {
+public class DomainStatistics extends MapReduceBase implements Tool,
+    Mapper<Text, CrawlDatum, Text, LongWritable>,
+    Reducer<Text, LongWritable, LongWritable, Text> {
 
-  private static final Logger LOG = LoggerFactory.getLogger(DomainStatistics.class);
-  
+  private static final Logger LOG = LoggerFactory
+      .getLogger(DomainStatistics.class);
+
   private static final Text FETCHED_TEXT = new Text("FETCHED");
   private static final Text NOT_FETCHED_TEXT = new Text("NOT_FETCHED");
-  
-  public static enum MyCounter {FETCHED, NOT_FETCHED, EMPTY_RESULT};
-  
+
+  public static enum MyCounter {
+    FETCHED, NOT_FETCHED, EMPTY_RESULT
+  };
+
   private static final int MODE_HOST = 1;
   private static final int MODE_DOMAIN = 2;
   private static final int MODE_SUFFIX = 3;
-  
+
   private int mode = 0;
-  
+
   private Configuration conf;
-  
+
   public int run(String[] args) throws IOException {
     if (args.length < 3) {
-      System.out.println("usage: DomainStatistics inputDirs outDir host|domain|suffix [numOfReducer]");
+      System.out
+          .println("usage: DomainStatistics inputDirs outDir host|domain|suffix [numOfReducer]");
       return 1;
     }
     String inputDir = args[0];
     String outputDir = args[1];
     int numOfReducers = 1;
-    
+
     if (args.length > 3) {
       numOfReducers = Integer.parseInt(args[3]);
     }
@@ -91,14 +94,14 @@
     job.setJobName("Domain statistics");
 
     int mode = 0;
-    if(args[2].equals("host"))
+    if (args[2].equals("host"))
       mode = MODE_HOST;
-    else if(args[2].equals("domain"))
+    else if (args[2].equals("domain"))
       mode = MODE_DOMAIN;
-    else if(args[2].equals("suffix"))
+    else if (args[2].equals("suffix"))
       mode = MODE_SUFFIX;
     job.setInt("domain.statistics.mode", mode);
-    
+
     String[] inputDirsSpecs = inputDir.split(",");
     for (int i = 0; i < inputDirsSpecs.length; i++) {
       FileInputFormat.addInputPath(job, new Path(inputDirsSpecs[i]));
@@ -115,11 +118,12 @@
     job.setReducerClass(DomainStatistics.class);
     job.setCombinerClass(DomainStatisticsCombiner.class);
     job.setNumReduceTasks(numOfReducers);
-    
+
     JobClient.runJob(job);
-    
+
     long end = System.currentTimeMillis();
-    LOG.info("DomainStatistics: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    LOG.info("DomainStatistics: finished at " + sdf.format(end) + ", elapsed: "
+        + TimingUtil.elapsedTime(start, end));
     return 0;
   }
 
@@ -128,7 +132,6 @@
     super.configure(job);
     mode = job.getInt("domain.statistics.mode", MODE_DOMAIN);
   }
-  
 
   public Configuration getConf() {
     return conf;
@@ -140,35 +143,35 @@
 
   public void map(Text urlText, CrawlDatum datum,
       OutputCollector<Text, LongWritable> output, Reporter reporter)
-  throws IOException {
-    
-    if(datum.getStatus() == CrawlDatum.STATUS_DB_FETCHED 
+      throws IOException {
+
+    if (datum.getStatus() == CrawlDatum.STATUS_DB_FETCHED
         || datum.getStatus() == CrawlDatum.STATUS_FETCH_SUCCESS) {
       try {
         URL url = new URL(urlText.toString());
         String out = null;
         switch (mode) {
-          case MODE_HOST:
-            out = url.getHost();
-            break;
-          case MODE_DOMAIN:
-            out = URLUtil.getDomainName(url);
-            break;
-          case MODE_SUFFIX:
-            out = URLUtil.getDomainSuffix(url).getDomain();
-            break;
+        case MODE_HOST:
+          out = url.getHost();
+          break;
+        case MODE_DOMAIN:
+          out = URLUtil.getDomainName(url);
+          break;
+        case MODE_SUFFIX:
+          out = URLUtil.getDomainSuffix(url).getDomain();
+          break;
         }
-        if(out.trim().equals("")) {
+        if (out.trim().equals("")) {
           LOG.info("url : " + url);
           reporter.incrCounter(MyCounter.EMPTY_RESULT, 1);
         }
-        
+
         output.collect(new Text(out), new LongWritable(1));
-      } catch (Exception ex) { }
+      } catch (Exception ex) {
+      }
       reporter.incrCounter(MyCounter.FETCHED, 1);
       output.collect(FETCHED_TEXT, new LongWritable(1));
-    }
-    else {
+    } else {
       reporter.incrCounter(MyCounter.NOT_FETCHED, 1);
       output.collect(NOT_FETCHED_TEXT, new LongWritable(1));
     }
@@ -176,31 +179,30 @@
 
   public void reduce(Text key, Iterator<LongWritable> values,
       OutputCollector<LongWritable, Text> output, Reporter reporter)
-  throws IOException {
-    
+      throws IOException {
+
     long total = 0;
-    
-    while(values.hasNext()) {
+
+    while (values.hasNext()) {
       LongWritable val = values.next();
       total += val.get();
     }
-    //invert output 
+    // invert output
     output.collect(new LongWritable(total), key);
   }
-    
-  
-  public static class DomainStatisticsCombiner extends MapReduceBase
-  implements Reducer<Text, LongWritable, Text, LongWritable> {
 
+  public static class DomainStatisticsCombiner extends MapReduceBase implements
+      Reducer<Text, LongWritable, Text, LongWritable> {
+
     public void reduce(Text key, Iterator<LongWritable> values,
         OutputCollector<Text, LongWritable> output, Reporter reporter)
-    throws IOException {
+        throws IOException {
       long total = 0;
-      
-      while(values.hasNext()) {
+
+      while (values.hasNext()) {
         LongWritable val = values.next();
         total += val.get();
-      } 
+      }
       output.collect(key, new LongWritable(total));
     }
 
@@ -209,5 +211,5 @@
   public static void main(String[] args) throws Exception {
     ToolRunner.run(NutchConfiguration.create(), new DomainStatistics(), args);
   }
-  
+
 }
Index: src/java/org/apache/nutch/util/domain/TopLevelDomain.java
===================================================================
--- src/java/org/apache/nutch/util/domain/TopLevelDomain.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/domain/TopLevelDomain.java	(working copy)
@@ -18,41 +18,47 @@
 package org.apache.nutch.util.domain;
 
 /**
- * (From wikipedia) A top-level domain (TLD) is the last part of an 
- * Internet domain name; that is, the letters which follow the final 
- * dot of any domain name. For example, in the domain name 
- * <code>www.website.com</code>, the top-level domain is <code>com</code>.
+ * (From wikipedia) A top-level domain (TLD) is the last part of an Internet
+ * domain name; that is, the letters which follow the final dot of any domain
+ * name. For example, in the domain name <code>www.website.com</code>, the
+ * top-level domain is <code>com</code>.
+ * 
  * @author Enis Soztutar &lt;enis.soz.nutch@gmail.com&gt;
  * @see http://www.iana.org/
  * @see http://en.wikipedia.org/wiki/Top-level_domain
  */
 public class TopLevelDomain extends DomainSuffix {
 
-  public enum Type { INFRASTRUCTURE, GENERIC, COUNTRY };
-  
+  public enum Type {
+    INFRASTRUCTURE, GENERIC, COUNTRY
+  };
+
   private Type type;
   private String countryName = null;
-  
-  public TopLevelDomain(String domain, Type type, Status status, float boost){
+
+  public TopLevelDomain(String domain, Type type, Status status, float boost) {
     super(domain, status, boost);
     this.type = type;
   }
 
-  public TopLevelDomain(String domain, Status status, float boost, String countryName){
+  public TopLevelDomain(String domain, Status status, float boost,
+      String countryName) {
     super(domain, status, boost);
     this.type = Type.COUNTRY;
     this.countryName = countryName;
   }
-  
+
   public Type getType() {
     return type;
   }
 
-  /** Returns the country name if TLD is Country Code TLD
+  /**
+   * Returns the country name if TLD is Country Code TLD
+   * 
    * @return country name or null
-   */ 
-  public String getCountryName(){
+   */
+  public String getCountryName() {
     return countryName;
   }
-  
+
 }
Index: src/java/org/apache/nutch/util/domain/DomainSuffixes.java
===================================================================
--- src/java/org/apache/nutch/util/domain/DomainSuffixes.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/domain/DomainSuffixes.java	(working copy)
@@ -25,57 +25,62 @@
 import org.apache.hadoop.util.StringUtils;
 
 /**
- * Storage class for <code>DomainSuffix</code> objects 
- * Note: this class is singleton
+ * Storage class for <code>DomainSuffix</code> objects Note: this class is
+ * singleton
+ * 
  * @author Enis Soztutar &lt;enis.soz.nutch@gmail.com&gt;
  */
 public class DomainSuffixes {
-  private static final Logger LOG = LoggerFactory.getLogger(DomainSuffixes.class);
-  
-  private HashMap<String, DomainSuffix> domains = new HashMap<String, DomainSuffix>(); 
-  
+  private static final Logger LOG = LoggerFactory
+      .getLogger(DomainSuffixes.class);
+
+  private HashMap<String, DomainSuffix> domains = new HashMap<String, DomainSuffix>();
+
   private static DomainSuffixes instance;
-  
+
   /** private ctor */
   private DomainSuffixes() {
     String file = "domain-suffixes.xml";
-    InputStream input = this.getClass().getClassLoader().getResourceAsStream(file);
+    InputStream input = this.getClass().getClassLoader()
+        .getResourceAsStream(file);
     try {
       new DomainSuffixesReader().read(this, input);
-    }
-    catch (Exception ex) {
+    } catch (Exception ex) {
       LOG.warn(StringUtils.stringifyException(ex));
     }
   }
-  
+
   /**
    * Singleton instance, lazy instantination
+   * 
    * @return
    */
   public static DomainSuffixes getInstance() {
-    if(instance == null) {
+    if (instance == null) {
       instance = new DomainSuffixes();
     }
     return instance;
   }
-  
+
   void addDomainSuffix(DomainSuffix tld) {
     domains.put(tld.getDomain(), tld);
   }
 
   /** return whether the extension is a registered domain entry */
   public boolean isDomainSuffix(String extension) {
-    return domains.containsKey(extension); 
+    return domains.containsKey(extension);
   }
-    
+
   /**
-   * Return the {@link DomainSuffix} object for the extension, if 
-   * extension is a top level domain returned object will be an 
-   * instance of {@link TopLevelDomain}
-   * @param extension of the domain
+   * Return the {@link DomainSuffix} object for the extension, if extension is a
+   * top level domain returned object will be an instance of
+   * {@link TopLevelDomain}
+   * 
+   * @param extension
+   *          of the domain
    */
   public DomainSuffix get(String extension) {
     return domains.get(extension);
   }
-  
+
 }
Index: src/java/org/apache/nutch/util/domain/DomainSuffixesReader.java
===================================================================
--- src/java/org/apache/nutch/util/domain/DomainSuffixesReader.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/domain/DomainSuffixesReader.java	(working copy)
@@ -36,16 +36,17 @@
 import org.xml.sax.SAXException;
 
 /**
- * For parsing xml files containing domain suffix definitions.
- * Parsed xml files should validate against 
- * <code>domain-suffixes.xsd</code>  
+ * For parsing xml files containing domain suffix definitions. Parsed xml files
+ * should validate against <code>domain-suffixes.xsd</code>
+ * 
  * @author Enis Soztutar &lt;enis.soz.nutch@gmail.com&gt;
  */
 class DomainSuffixesReader {
 
-  private static final Logger LOG = LoggerFactory.getLogger(DomainSuffixesReader.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(DomainSuffixesReader.class);
 
-  void read(DomainSuffixes tldEntries, InputStream input) throws IOException{
+  void read(DomainSuffixes tldEntries, InputStream input) throws IOException {
     try {
 
       DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
@@ -54,28 +55,29 @@
       Document document = builder.parse(new InputSource(input));
 
       Element root = document.getDocumentElement();
-      
-      if(root != null && root.getTagName().equals("domains")) {
-        
-        Element tlds = (Element)root.getElementsByTagName("tlds").item(0);
-        Element suffixes = (Element)root.getElementsByTagName("suffixes").item(0);
-        
-        //read tlds
-        readITLDs(tldEntries, (Element)tlds.getElementsByTagName("itlds").item(0));
-        readGTLDs(tldEntries, (Element)tlds.getElementsByTagName("gtlds").item(0));
-        readCCTLDs(tldEntries, (Element)tlds.getElementsByTagName("cctlds").item(0));
-        
+
+      if (root != null && root.getTagName().equals("domains")) {
+
+        Element tlds = (Element) root.getElementsByTagName("tlds").item(0);
+        Element suffixes = (Element) root.getElementsByTagName("suffixes")
+            .item(0);
+
+        // read tlds
+        readITLDs(tldEntries, (Element) tlds.getElementsByTagName("itlds")
+            .item(0));
+        readGTLDs(tldEntries, (Element) tlds.getElementsByTagName("gtlds")
+            .item(0));
+        readCCTLDs(tldEntries, (Element) tlds.getElementsByTagName("cctlds")
+            .item(0));
+
         readSuffixes(tldEntries, suffixes);
-      }
-      else {
+      } else {
         throw new IOException("xml file is not valid");
       }
-    }
-    catch (ParserConfigurationException ex) {
+    } catch (ParserConfigurationException ex) {
       LOG.warn(StringUtils.stringifyException(ex));
       throw new IOException(ex.getMessage());
-    }
-    catch (SAXException ex) {
+    } catch (SAXException ex) {
       LOG.warn(StringUtils.stringifyException(ex));
       throw new IOException(ex.getMessage());
     }
@@ -83,22 +85,24 @@
 
   void readITLDs(DomainSuffixes tldEntries, Element el) {
     NodeList children = el.getElementsByTagName("tld");
-    for(int i=0;i<children.getLength();i++) {
-      tldEntries.addDomainSuffix(readGTLD((Element)children.item(i), Type.INFRASTRUCTURE));
+    for (int i = 0; i < children.getLength(); i++) {
+      tldEntries.addDomainSuffix(readGTLD((Element) children.item(i),
+          Type.INFRASTRUCTURE));
     }
   }
-    
+
   void readGTLDs(DomainSuffixes tldEntries, Element el) {
     NodeList children = el.getElementsByTagName("tld");
-    for(int i=0;i<children.getLength();i++) {
-      tldEntries.addDomainSuffix(readGTLD((Element)children.item(i), Type.GENERIC));
+    for (int i = 0; i < children.getLength(); i++) {
+      tldEntries.addDomainSuffix(readGTLD((Element) children.item(i),
+          Type.GENERIC));
     }
   }
 
   void readCCTLDs(DomainSuffixes tldEntries, Element el) throws IOException {
     NodeList children = el.getElementsByTagName("tld");
-    for(int i=0;i<children.getLength();i++) {
-      tldEntries.addDomainSuffix(readCCTLD((Element)children.item(i)));
+    for (int i = 0; i < children.getLength(); i++) {
+      tldEntries.addDomainSuffix(readCCTLD((Element) children.item(i)));
     }
   }
 
@@ -113,39 +117,40 @@
     String domain = el.getAttribute("domain");
     Status status = readStatus(el);
     float boost = readBoost(el);
-    String countryName = readCountryName(el); 
-    return new TopLevelDomain(domain, status, boost, countryName);  
+    String countryName = readCountryName(el);
+    return new TopLevelDomain(domain, status, boost, countryName);
   }
-  
+
   /** read optional field status */
   Status readStatus(Element el) {
     NodeList list = el.getElementsByTagName("status");
-    if(list == null || list.getLength() == 0)
+    if (list == null || list.getLength() == 0)
       return DomainSuffix.DEFAULT_STATUS;
     return Status.valueOf(list.item(0).getFirstChild().getNodeValue());
   }
-  
+
   /** read optional field boost */
   float readBoost(Element el) {
     NodeList list = el.getElementsByTagName("boost");
-    if(list == null || list.getLength() == 0)
+    if (list == null || list.getLength() == 0)
       return DomainSuffix.DEFAULT_BOOST;
     return Float.parseFloat(list.item(0).getFirstChild().getNodeValue());
   }
-  
-  /** read field countryname 
-    */
+
+  /**
+   * read field countryname
+   */
   String readCountryName(Element el) throws IOException {
     NodeList list = el.getElementsByTagName("country");
-    if(list == null || list.getLength() == 0)
+    if (list == null || list.getLength() == 0)
       throw new IOException("Country name should be given");
     return list.item(0).getNodeValue();
   }
-  
+
   void readSuffixes(DomainSuffixes tldEntries, Element el) {
     NodeList children = el.getElementsByTagName("suffix");
-    for(int i=0;i<children.getLength();i++) {
-      tldEntries.addDomainSuffix(readSuffix((Element)children.item(i)));
+    for (int i = 0; i < children.getLength(); i++) {
+      tldEntries.addDomainSuffix(readSuffix((Element) children.item(i)));
     }
   }
 
@@ -155,5 +160,5 @@
     float boost = readBoost(el);
     return new DomainSuffix(domain, status, boost);
   }
-  
+
 }
Index: src/java/org/apache/nutch/util/domain/DomainSuffix.java
===================================================================
--- src/java/org/apache/nutch/util/domain/DomainSuffix.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/domain/DomainSuffix.java	(working copy)
@@ -18,17 +18,18 @@
 package org.apache.nutch.util.domain;
 
 /**
- * This class represents the last part of the host name, 
- * which is operated by authoritives, not individuals. This information 
- * is needed to find the domain name of a host. The domain name of a host
- * is defined to be the last part before the domain suffix, w/o subdomain 
- * names.  As an example the domain name of <br><code> http://lucene.apache.org/ 
- * </code><br> is <code> apache.org</code>   
- * <br>
- * This class holds three fields,  
- * <strong>domain</strong> field represents the suffix (such as "co.uk")
- * <strong>boost</strong> is a float for boosting score of url's with this suffix
- * <strong>status</strong> field represents domain's status
+ * This class represents the last part of the host name, which is operated by
+ * authoritives, not individuals. This information is needed to find the domain
+ * name of a host. The domain name of a host is defined to be the last part
+ * before the domain suffix, w/o subdomain names. As an example the domain name
+ * of <br>
+ * <code> http://lucene.apache.org/ 
+ * </code><br>
+ * is <code> apache.org</code> <br>
+ * This class holds three fields, <strong>domain</strong> field represents the
+ * suffix (such as "co.uk") <strong>boost</strong> is a float for boosting score
+ * of url's with this suffix <strong>status</strong> field represents domain's
+ * status
  * 
  * @author Enis Soztutar &lt;enis.soz.nutch@gmail.com&gt;
  * @see TopLevelDomain
@@ -37,10 +38,10 @@
 public class DomainSuffix {
 
   /**
-   * Enumeration of the status of the tld. Please see domain-suffixes.xml. 
+   * Enumeration of the status of the tld. Please see domain-suffixes.xml.
    */
-  public enum Status { INFRASTRUCTURE, SPONSORED, UNSPONSORED
-    , STARTUP, PROPOSED, DELETED, PSEUDO_DOMAIN, DEPRECATED, IN_USE, NOT_IN_USE, REJECTED
+  public enum Status {
+    INFRASTRUCTURE, SPONSORED, UNSPONSORED, STARTUP, PROPOSED, DELETED, PSEUDO_DOMAIN, DEPRECATED, IN_USE, NOT_IN_USE, REJECTED
   };
 
   private String domain;
@@ -49,7 +50,7 @@
 
   public static final float DEFAULT_BOOST = 1.0f;
   public static final Status DEFAULT_STATUS = Status.IN_USE;
-  
+
   public DomainSuffix(String domain, Status status, float boost) {
     this.domain = domain;
     this.status = status;
@@ -59,7 +60,7 @@
   public DomainSuffix(String domain) {
     this(domain, DEFAULT_STATUS, DEFAULT_BOOST);
   }
-  
+
   public String getDomain() {
     return domain;
   }
@@ -71,7 +72,7 @@
   public float getBoost() {
     return boost;
   }
-  
+
   @Override
   public String toString() {
     return domain;
Index: src/java/org/apache/nutch/util/GenericWritableConfigurable.java
===================================================================
--- src/java/org/apache/nutch/util/GenericWritableConfigurable.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/GenericWritableConfigurable.java	(working copy)
@@ -24,12 +24,15 @@
 import org.apache.hadoop.io.GenericWritable;
 import org.apache.hadoop.io.Writable;
 
-/** A generic Writable wrapper that can inject Configuration to {@link Configurable}s */ 
-public abstract class GenericWritableConfigurable extends GenericWritable 
-                                                  implements Configurable {
+/**
+ * A generic Writable wrapper that can inject Configuration to
+ * {@link Configurable}s
+ */
+public abstract class GenericWritableConfigurable extends GenericWritable
+    implements Configurable {
 
   private Configuration conf;
-  
+
   public Configuration getConf() {
     return conf;
   }
@@ -37,7 +40,7 @@
   public void setConf(Configuration conf) {
     this.conf = conf;
   }
-  
+
   @Override
   public void readFields(DataInput in) throws IOException {
     byte type = in.readByte();
@@ -50,8 +53,8 @@
     }
     Writable w = get();
     if (w instanceof Configurable)
-      ((Configurable)w).setConf(conf);
+      ((Configurable) w).setConf(conf);
     w.readFields(in);
   }
-  
+
 }
Index: src/java/org/apache/nutch/util/PrefixStringMatcher.java
===================================================================
--- src/java/org/apache/nutch/util/PrefixStringMatcher.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/PrefixStringMatcher.java	(working copy)
@@ -21,46 +21,47 @@
 import java.util.Iterator;
 
 /**
- * A class for efficiently matching <code>String</code>s against a set
- * of prefixes.
+ * A class for efficiently matching <code>String</code>s against a set of
+ * prefixes.
  */
 public class PrefixStringMatcher extends TrieStringMatcher {
 
   /**
    * Creates a new <code>PrefixStringMatcher</code> which will match
-   * <code>String</code>s with any prefix in the supplied array.
-   * Zero-length <code>Strings</code> are ignored.
+   * <code>String</code>s with any prefix in the supplied array. Zero-length
+   * <code>Strings</code> are ignored.
    */
   public PrefixStringMatcher(String[] prefixes) {
     super();
-    for (int i= 0; i < prefixes.length; i++)
+    for (int i = 0; i < prefixes.length; i++)
       addPatternForward(prefixes[i]);
   }
 
   /**
    * Creates a new <code>PrefixStringMatcher</code> which will match
-   * <code>String</code>s with any prefix in the supplied    
+   * <code>String</code>s with any prefix in the supplied
    * <code>Collection</code>.
-   *
-   * @throws ClassCastException if any <code>Object</code>s in the
-   * collection are not <code>String</code>s
+   * 
+   * @throws ClassCastException
+   *           if any <code>Object</code>s in the collection are not
+   *           <code>String</code>s
    */
   public PrefixStringMatcher(Collection prefixes) {
     super();
-    Iterator iter= prefixes.iterator();
+    Iterator iter = prefixes.iterator();
     while (iter.hasNext())
-      addPatternForward((String)iter.next());
+      addPatternForward((String) iter.next());
   }
 
   /**
-   * Returns true if the given <code>String</code> is matched by a
-   * prefix in the trie
+   * Returns true if the given <code>String</code> is matched by a prefix in the
+   * trie
    */
   public boolean matches(String input) {
-    TrieNode node= root;
-    for (int i= 0; i < input.length(); i++) {
-      node= node.getChild(input.charAt(i));
-      if (node == null) 
+    TrieNode node = root;
+    for (int i = 0; i < input.length(); i++) {
+      node = node.getChild(input.charAt(i));
+      if (node == null)
         return false;
       if (node.isTerminal())
         return true;
@@ -73,13 +74,13 @@
    * or <code>null<code> if no match exists.
    */
   public String shortestMatch(String input) {
-    TrieNode node= root;
-    for (int i= 0; i < input.length(); i++) {
-      node= node.getChild(input.charAt(i));
-      if (node == null) 
+    TrieNode node = root;
+    for (int i = 0; i < input.length(); i++) {
+      node = node.getChild(input.charAt(i));
+      if (node == null)
         return null;
       if (node.isTerminal())
-        return input.substring(0, i+1);
+        return input.substring(0, i + 1);
     }
     return null;
   }
@@ -89,29 +90,26 @@
    * or <code>null<code> if no match exists.
    */
   public String longestMatch(String input) {
-    TrieNode node= root;
-    String result= null;
-    for (int i= 0; i < input.length(); i++) {
-      node= node.getChild(input.charAt(i));
-      if (node == null) 
+    TrieNode node = root;
+    String result = null;
+    for (int i = 0; i < input.length(); i++) {
+      node = node.getChild(input.charAt(i));
+      if (node == null)
         break;
       if (node.isTerminal())
-        result= input.substring(0, i+1);
+        result = input.substring(0, i + 1);
     }
     return result;
   }
 
   public static final void main(String[] argv) {
-    PrefixStringMatcher matcher= 
-      new PrefixStringMatcher( 
-        new String[] 
-        {"abcd", "abc", "aac", "baz", "foo", "foobar"} );
+    PrefixStringMatcher matcher = new PrefixStringMatcher(new String[] {
+        "abcd", "abc", "aac", "baz", "foo", "foobar" });
 
-    String[] tests= {"a", "ab", "abc", "abcdefg", "apple", "aa", "aac",
-                     "aaccca", "abaz", "baz", "bazooka", "fo", "foobar",
-                     "kite", };
+    String[] tests = { "a", "ab", "abc", "abcdefg", "apple", "aa", "aac",
+        "aaccca", "abaz", "baz", "bazooka", "fo", "foobar", "kite", };
 
-    for (int i= 0; i < tests.length; i++) {
+    for (int i = 0; i < tests.length; i++) {
       System.out.println("testing: " + tests[i]);
       System.out.println("   matches: " + matcher.matches(tests[i]));
       System.out.println("  shortest: " + matcher.shortestMatch(tests[i]));
Index: src/java/org/apache/nutch/util/FSUtils.java
===================================================================
--- src/java/org/apache/nutch/util/FSUtils.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/FSUtils.java	(working copy)
@@ -33,16 +33,20 @@
    * path. If removeOld is set to false then the old path will be set to the
    * name current.old.
    * 
-   * @param fs The FileSystem.
-   * @param current The end path, the one being replaced.
-   * @param replacement The path to replace with.
-   * @param removeOld True if we are removing the current path.
+   * @param fs
+   *          The FileSystem.
+   * @param current
+   *          The end path, the one being replaced.
+   * @param replacement
+   *          The path to replace with.
+   * @param removeOld
+   *          True if we are removing the current path.
    * 
-   * @throws IOException If an error occurs during replacement.
+   * @throws IOException
+   *           If an error occurs during replacement.
    */
   public static void replace(FileSystem fs, Path current, Path replacement,
-    boolean removeOld)
-    throws IOException {
+      boolean removeOld) throws IOException {
 
     // rename any current path to old
     Path old = new Path(current + ".old");
@@ -60,12 +64,14 @@
   /**
    * Closes a group of SequenceFile readers.
    * 
-   * @param readers The SequenceFile readers to close.
-   * @throws IOException If an error occurs while closing a reader.
+   * @param readers
+   *          The SequenceFile readers to close.
+   * @throws IOException
+   *           If an error occurs while closing a reader.
    */
   public static void closeReaders(SequenceFile.Reader[] readers)
-    throws IOException {
-    
+      throws IOException {
+
     // loop through the readers, closing one by one
     if (readers != null) {
       for (int i = 0; i < readers.length; i++) {
@@ -80,12 +86,13 @@
   /**
    * Closes a group of MapFile readers.
    * 
-   * @param readers The MapFile readers to close.
-   * @throws IOException If an error occurs while closing a reader.
+   * @param readers
+   *          The MapFile readers to close.
+   * @throws IOException
+   *           If an error occurs while closing a reader.
    */
-  public static void closeReaders(MapFile.Reader[] readers)
-    throws IOException {
-    
+  public static void closeReaders(MapFile.Reader[] readers) throws IOException {
+
     // loop through the readers closing one by one
     if (readers != null) {
       for (int i = 0; i < readers.length; i++) {
Index: src/java/org/apache/nutch/util/GZIPUtils.java
===================================================================
--- src/java/org/apache/nutch/util/GZIPUtils.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/GZIPUtils.java	(working copy)
@@ -28,19 +28,18 @@
 import org.slf4j.LoggerFactory;
 
 /**
- *  A collection of utility methods for working on GZIPed data.
+ * A collection of utility methods for working on GZIPed data.
  */
 public class GZIPUtils {
-  
+
   private static final Logger LOG = LoggerFactory.getLogger(GZIPUtils.class);
-  private static final int EXPECTED_COMPRESSION_RATIO= 5;
-  private static final int BUF_SIZE= 4096;
+  private static final int EXPECTED_COMPRESSION_RATIO = 5;
+  private static final int BUF_SIZE = 4096;
 
   /**
-   * Returns an gunzipped copy of the input array.  If the gzipped
-   * input has been truncated or corrupted, a best-effort attempt is
-   * made to unzip as much as possible.  If no data can be extracted
-   * <code>null</code> is returned.
+   * Returns an gunzipped copy of the input array. If the gzipped input has been
+   * truncated or corrupted, a best-effort attempt is made to unzip as much as
+   * possible. If no data can be extracted <code>null</code> is returned.
    */
   public static final byte[] unzipBestEffort(byte[] in) {
     return unzipBestEffort(in, Integer.MAX_VALUE);
@@ -48,33 +47,32 @@
 
   /**
    * Returns an gunzipped copy of the input array, truncated to
-   * <code>sizeLimit</code> bytes, if necessary.  If the gzipped input
-   * has been truncated or corrupted, a best-effort attempt is made to
-   * unzip as much as possible.  If no data can be extracted
-   * <code>null</code> is returned.
+   * <code>sizeLimit</code> bytes, if necessary. If the gzipped input has been
+   * truncated or corrupted, a best-effort attempt is made to unzip as much as
+   * possible. If no data can be extracted <code>null</code> is returned.
    */
   public static final byte[] unzipBestEffort(byte[] in, int sizeLimit) {
     try {
-      // decompress using GZIPInputStream 
-      ByteArrayOutputStream outStream = 
-        new ByteArrayOutputStream(EXPECTED_COMPRESSION_RATIO * in.length);
+      // decompress using GZIPInputStream
+      ByteArrayOutputStream outStream = new ByteArrayOutputStream(
+          EXPECTED_COMPRESSION_RATIO * in.length);
 
-      GZIPInputStream inStream = 
-        new GZIPInputStream ( new ByteArrayInputStream(in) );
+      GZIPInputStream inStream = new GZIPInputStream(new ByteArrayInputStream(
+          in));
 
       byte[] buf = new byte[BUF_SIZE];
       int written = 0;
       while (true) {
         try {
           int size = inStream.read(buf);
-          if (size <= 0) 
+          if (size <= 0)
             break;
           if ((written + size) > sizeLimit) {
             outStream.write(buf, 0, sizeLimit - written);
             break;
           }
           outStream.write(buf, 0, size);
-          written+= size;
+          written += size;
         } catch (Exception e) {
           break;
         }
@@ -91,23 +89,23 @@
     }
   }
 
-
   /**
-   * Returns an gunzipped copy of the input array.  
-   * @throws IOException if the input cannot be properly decompressed
+   * Returns an gunzipped copy of the input array.
+   * 
+   * @throws IOException
+   *           if the input cannot be properly decompressed
    */
   public static final byte[] unzip(byte[] in) throws IOException {
-    // decompress using GZIPInputStream 
-    ByteArrayOutputStream outStream = 
-      new ByteArrayOutputStream(EXPECTED_COMPRESSION_RATIO * in.length);
+    // decompress using GZIPInputStream
+    ByteArrayOutputStream outStream = new ByteArrayOutputStream(
+        EXPECTED_COMPRESSION_RATIO * in.length);
 
-    GZIPInputStream inStream = 
-      new GZIPInputStream ( new ByteArrayInputStream(in) );
+    GZIPInputStream inStream = new GZIPInputStream(new ByteArrayInputStream(in));
 
     byte[] buf = new byte[BUF_SIZE];
     while (true) {
       int size = inStream.read(buf);
-      if (size <= 0) 
+      if (size <= 0)
         break;
       outStream.write(buf, 0, size);
     }
@@ -121,11 +119,11 @@
    */
   public static final byte[] zip(byte[] in) {
     try {
-      // compress using GZIPOutputStream 
-      ByteArrayOutputStream byteOut= 
-        new ByteArrayOutputStream(in.length / EXPECTED_COMPRESSION_RATIO);
+      // compress using GZIPOutputStream
+      ByteArrayOutputStream byteOut = new ByteArrayOutputStream(in.length
+          / EXPECTED_COMPRESSION_RATIO);
 
-      GZIPOutputStream outStream= new GZIPOutputStream(byteOut);
+      GZIPOutputStream outStream = new GZIPOutputStream(byteOut);
 
       try {
         outStream.write(in);
@@ -146,5 +144,5 @@
       return null;
     }
   }
-    
+
 }
Index: src/java/org/apache/nutch/util/ObjectCache.java
===================================================================
--- src/java/org/apache/nutch/util/ObjectCache.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/ObjectCache.java	(working copy)
@@ -24,35 +24,33 @@
 import org.apache.hadoop.conf.Configuration;
 
 public class ObjectCache {
-  
+
   private static final Logger LOG = LoggerFactory.getLogger(ObjectCache.class);
-  
-  private static final WeakHashMap<Configuration, ObjectCache> CACHE = 
-    new WeakHashMap<Configuration, ObjectCache>();
 
+  private static final WeakHashMap<Configuration, ObjectCache> CACHE = new WeakHashMap<Configuration, ObjectCache>();
+
   private final HashMap<String, Object> objectMap;
-  
+
   private ObjectCache() {
     objectMap = new HashMap<String, Object>();
   }
-  
+
   public static ObjectCache get(Configuration conf) {
     ObjectCache objectCache = CACHE.get(conf);
     if (objectCache == null) {
-      LOG.debug("No object cache found for conf=" + conf 
-                  + ", instantiating a new object cache");
+      LOG.debug("No object cache found for conf=" + conf
+          + ", instantiating a new object cache");
       objectCache = new ObjectCache();
       CACHE.put(conf, objectCache);
     }
     return objectCache;
   }
-  
+
   public Object getObject(String key) {
     return objectMap.get(key);
   }
-  
+
   public void setObject(String key, Object value) {
     objectMap.put(key, value);
   }
 }
-
Index: src/java/org/apache/nutch/util/NodeWalker.java
===================================================================
--- src/java/org/apache/nutch/util/NodeWalker.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/NodeWalker.java	(working copy)
@@ -22,13 +22,17 @@
 import org.w3c.dom.NodeList;
 
 /**
- * <p>A utility class that allows the walking of any DOM tree using a stack 
- * instead of recursion.  As the node tree is walked the next node is popped
- * off of the stack and all of its children are automatically added to the 
- * stack to be called in tree order.</p>
+ * <p>
+ * A utility class that allows the walking of any DOM tree using a stack instead
+ * of recursion. As the node tree is walked the next node is popped off of the
+ * stack and all of its children are automatically added to the stack to be
+ * called in tree order.
+ * </p>
  * 
- * <p>Currently this class is not thread safe.  It is assumed that only one
- * thread will be accessing the <code>NodeWalker</code> at any given time.</p>
+ * <p>
+ * Currently this class is not thread safe. It is assumed that only one thread
+ * will be accessing the <code>NodeWalker</code> at any given time.
+ * </p>
  */
 public class NodeWalker {
 
@@ -36,7 +40,7 @@
   private Node currentNode;
   private NodeList currentChildren;
   private Stack<Node> nodes;
-  
+
   /**
    * Starts the <code>Node</code> tree from the root node.
    * 
@@ -47,62 +51,68 @@
     nodes = new Stack<Node>();
     nodes.add(rootNode);
   }
-  
+
   /**
-   * <p>Returns the next <code>Node</code> on the stack and pushes all of its
-   * children onto the stack, allowing us to walk the node tree without the
-   * use of recursion.  If there are no more nodes on the stack then null is
-   * returned.</p>
+   * <p>
+   * Returns the next <code>Node</code> on the stack and pushes all of its
+   * children onto the stack, allowing us to walk the node tree without the use
+   * of recursion. If there are no more nodes on the stack then null is
+   * returned.
+   * </p>
    * 
-   * @return Node The next <code>Node</code> on the stack or null if there
-   * isn't a next node.
+   * @return Node The next <code>Node</code> on the stack or null if there isn't
+   *         a next node.
    */
   public Node nextNode() {
-    
+
     // if no next node return null
     if (!hasNext()) {
       return null;
     }
-    
+
     // pop the next node off of the stack and push all of its children onto
     // the stack
     currentNode = nodes.pop();
     currentChildren = currentNode.getChildNodes();
     int childLen = (currentChildren != null) ? currentChildren.getLength() : 0;
-    
+
     // put the children node on the stack in first to last order
     for (int i = childLen - 1; i >= 0; i--) {
       nodes.add(currentChildren.item(i));
     }
-    
+
     return currentNode;
   }
-  
+
   /**
-   * <p>Skips over and removes from the node stack the children of the last
-   * node.  When getting a next node from the walker, that node's children 
-   * are automatically added to the stack.  You can call this method to remove
-   * those children from the stack.</p>
+   * <p>
+   * Skips over and removes from the node stack the children of the last node.
+   * When getting a next node from the walker, that node's children are
+   * automatically added to the stack. You can call this method to remove those
+   * children from the stack.
+   * </p>
    * 
-   * <p>This is useful when you don't want to process deeper into the 
-   * current path of the node tree but you want to continue processing sibling
-   * nodes.</p>
-   *
+   * <p>
+   * This is useful when you don't want to process deeper into the current path
+   * of the node tree but you want to continue processing sibling nodes.
+   * </p>
+   * 
    */
   public void skipChildren() {
-    
+
     int childLen = (currentChildren != null) ? currentChildren.getLength() : 0;
-    
-    for (int i = 0 ; i < childLen ; i++) {
+
+    for (int i = 0; i < childLen; i++) {
       Node child = nodes.peek();
       if (child.equals(currentChildren.item(i))) {
         nodes.pop();
       }
     }
   }
-  
+
   /**
    * Returns true if there are more nodes on the current stack.
+   * 
    * @return
    */
   public boolean hasNext() {
Index: src/java/org/apache/nutch/util/TrieStringMatcher.java
===================================================================
--- src/java/org/apache/nutch/util/TrieStringMatcher.java	(revision 1188252)
+++ src/java/org/apache/nutch/util/TrieStringMatcher.java	(working copy)
@@ -17,21 +17,19 @@
 
 package org.apache.nutch.util;
 
-
 import java.util.Arrays;
 import java.util.LinkedList;
 import java.util.ListIterator;
 
 /**
- * TrieStringMatcher is a base class for simple tree-based string
- * matching.
- *
+ * TrieStringMatcher is a base class for simple tree-based string matching.
+ * 
  */
 public abstract class TrieStringMatcher {
   protected TrieNode root;
 
   protected TrieStringMatcher() {
-    this.root= new TrieNode('\000', false);
+    this.root = new TrieNode('\000', false);
   }
 
   /**
@@ -44,20 +42,19 @@
     protected boolean terminal;
 
     /**
-     * Creates a new TrieNode, which contains the given
-     * <code>nodeChar</code>.  If <code>isTerminal</code> is
-     * <code>true</code>, the new node is a <em>terminal</em> node in
-     * the trie.
-     */  
+     * Creates a new TrieNode, which contains the given <code>nodeChar</code>.
+     * If <code>isTerminal</code> is <code>true</code>, the new node is a
+     * <em>terminal</em> node in the trie.
+     */
     TrieNode(char nodeChar, boolean isTerminal) {
-      this.nodeChar= nodeChar;
-      this.terminal= isTerminal;
-      this.childrenList= new LinkedList<TrieNode>();
+      this.nodeChar = nodeChar;
+      this.terminal = isTerminal;
+      this.childrenList = new LinkedList<TrieNode>();
     }
 
     /**
-     * Returns <code>true</code> if this node is a <em>terminal</em>
-     * node in the trie.
+     * Returns <code>true</code> if this node is a <em>terminal</em> node in the
+     * trie.
      */
     boolean isTerminal() {
       return terminal;
@@ -65,67 +62,68 @@
 
     /**
      * Returns the child node of this node whose node-character is
-     * <code>nextChar</code>.  If no such node exists, one will be is
-     * added.  If <em>isTerminal</em> is <code>true</code>, the node 
-     * will be a terminal node in the trie.
+     * <code>nextChar</code>. If no such node exists, one will be is added. If
+     * <em>isTerminal</em> is <code>true</code>, the node will be a terminal
+     * node in the trie.
      */
     TrieNode getChildAddIfNotPresent(char nextChar, boolean isTerminal) {
       if (childrenList == null) {
-        childrenList= new LinkedList<TrieNode>();
+        childrenList = new LinkedList<TrieNode>();
         childrenList.addAll(Arrays.asList(children));
-        children= null;
+        children = null;
       }
 
       if (childrenList.size() == 0) {
-        TrieNode newNode= new TrieNode(nextChar, isTerminal);
+        TrieNode newNode = new TrieNode(nextChar, isTerminal);
         childrenList.add(newNode);
         return newNode;
       }
 
-      ListIterator<TrieNode> iter= childrenList.listIterator();
-      TrieNode node= iter.next();
-      while ( (node.nodeChar < nextChar) && iter.hasNext() ) 
-        node= iter.next();
-                        
+      ListIterator<TrieNode> iter = childrenList.listIterator();
+      TrieNode node = iter.next();
+      while ((node.nodeChar < nextChar) && iter.hasNext())
+        node = iter.next();
+
       if (node.nodeChar == nextChar) {
-        node.terminal= node.terminal | isTerminal;
+        node.terminal = node.terminal | isTerminal;
         return node;
       }
 
-      if (node.nodeChar > nextChar) 
+      if (node.nodeChar > nextChar)
         iter.previous();
 
-      TrieNode newNode= new TrieNode(nextChar, isTerminal);
+      TrieNode newNode = new TrieNode(nextChar, isTerminal);
       iter.add(newNode);
-      return newNode;                   
+      return newNode;
     }
 
     /**
      * Returns the child node of this node whose node-character is
-     * <code>nextChar</code>.  If no such node exists,
-     * <code>null</code> is returned.
+     * <code>nextChar</code>. If no such node exists, <code>null</code> is
+     * returned.
      */
     TrieNode getChild(char nextChar) {
       if (children == null) {
-        children= childrenList.toArray(new TrieNode[childrenList.size()]);
-        childrenList= null;
+        children = childrenList.toArray(new TrieNode[childrenList.size()]);
+        childrenList = null;
         Arrays.sort(children);
       }
 
-      int min= 0;
-      int max= children.length - 1;
-      int mid= 0;
+      int min = 0;
+      int max = children.length - 1;
+      int mid = 0;
       while (min < max) {
-        mid= (min + max) / 2;
-        if (children[mid].nodeChar == nextChar) 
+        mid = (min + max) / 2;
+        if (children[mid].nodeChar == nextChar)
           return children[mid];
         if (children[mid].nodeChar < nextChar)
-          min= mid + 1;
-        else // if (children[mid].nodeChar > nextChar)
-          max= mid - 1;
+          min = mid + 1;
+        else
+          // if (children[mid].nodeChar > nextChar)
+          max = mid - 1;
       }
 
-      if (min == max) 
+      if (min == max)
         if (children[min].nodeChar == nextChar)
           return children[min];
 
@@ -133,59 +131,57 @@
     }
 
     public int compareTo(TrieNode other) {
-      if (this.nodeChar < other.nodeChar) 
+      if (this.nodeChar < other.nodeChar)
         return -1;
-      if (this.nodeChar == other.nodeChar) 
+      if (this.nodeChar == other.nodeChar)
         return 0;
-//    if (this.nodeChar > other.nodeChar) 
+      // if (this.nodeChar > other.nodeChar)
       return 1;
     }
   }
 
   /**
    * Returns the next {@link TrieNode} visited, given that you are at
-   * <code>node</code>, and the the next character in the input is 
-   * the <code>idx</code>'th character of <code>s</code>.
+   * <code>node</code>, and the the next character in the input is the
+   * <code>idx</code>'th character of <code>s</code>.
    */
   protected final TrieNode matchChar(TrieNode node, String s, int idx) {
     return node.getChild(s.charAt(idx));
   }
 
   /**
-   * Adds any necessary nodes to the trie so that the given
-   * <code>String</code> can be decoded and the last character is
-   * represented by a terminal node.  Zero-length <code>Strings</code>
-   * are ignored.
+   * Adds any necessary nodes to the trie so that the given <code>String</code>
+   * can be decoded and the last character is represented by a terminal node.
+   * Zero-length <code>Strings</code> are ignored.
    */
   protected final void addPatternForward(String s) {
-    TrieNode node= root;
-    int stop= s.length() - 1;
+    TrieNode node = root;
+    int stop = s.length() - 1;
     int i;
     if (s.length() > 0) {
-      for (i= 0; i < stop; i++)
-        node= node.getChildAddIfNotPresent(s.charAt(i), false);
-      node= node.getChildAddIfNotPresent(s.charAt(i), true);
+      for (i = 0; i < stop; i++)
+        node = node.getChildAddIfNotPresent(s.charAt(i), false);
+      node = node.getChildAddIfNotPresent(s.charAt(i), true);
     }
   }
 
   /**
-   * Adds any necessary nodes to the trie so that the given
-   * <code>String</code> can be decoded <em>in reverse</em> and the
-   * first character is represented by a terminal node.  Zero-length
-   * <code>Strings</code> are ignored.
+   * Adds any necessary nodes to the trie so that the given <code>String</code>
+   * can be decoded <em>in reverse</em> and the first character is represented
+   * by a terminal node. Zero-length <code>Strings</code> are ignored.
    */
   protected final void addPatternBackward(String s) {
-    TrieNode node= root;
+    TrieNode node = root;
     if (s.length() > 0) {
-      for (int i= s.length()-1; i > 0; i--) 
-        node= node.getChildAddIfNotPresent(s.charAt(i), false);
-      node= node.getChildAddIfNotPresent(s.charAt(0), true);
+      for (int i = s.length() - 1; i > 0; i--)
+        node = node.getChildAddIfNotPresent(s.charAt(i), false);
+      node = node.getChildAddIfNotPresent(s.charAt(0), true);
     }
   }
 
   /**
-   * Returns true if the given <code>String</code> is matched by a
-   * pattern in the trie
+   * Returns true if the given <code>String</code> is matched by a pattern in
+   * the trie
    */
   public abstract boolean matches(String input);
 
Index: src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java
===================================================================
--- src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java	(revision 1188252)
+++ src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java	(working copy)
@@ -52,42 +52,44 @@
 import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.SolrDocumentList;
 
-/** 
+/**
  * Utility class for deleting duplicate documents from a solr index.
- *
+ * 
  * The algorithm goes like follows:
  * 
  * Preparation:
  * <ol>
  * <li>Query the solr server for the number of documents (say, N)</li>
- * <li>Partition N among M map tasks. For example, if we have two map tasks
- * the first map task will deal with solr documents from 0 - (N / 2 - 1) and
- * the second will deal with documents from (N / 2) to (N - 1).</li>
+ * <li>Partition N among M map tasks. For example, if we have two map tasks the
+ * first map task will deal with solr documents from 0 - (N / 2 - 1) and the
+ * second will deal with documents from (N / 2) to (N - 1).</li>
  * </ol>
  * 
  * MapReduce:
  * <ul>
- * <li>Map: Identity map where keys are digests and values are {@link SolrRecord}
- * instances(which contain id, boost and timestamp)</li>
+ * <li>Map: Identity map where keys are digests and values are
+ * {@link SolrRecord} instances(which contain id, boost and timestamp)</li>
  * <li>Reduce: After map, {@link SolrRecord}s with the same digest will be
- * grouped together. Now, of these documents with the same digests, delete
- * all of them except the one with the highest score (boost field). If two
- * (or more) documents have the same score, then the document with the latest
- * timestamp is kept. Again, every other is deleted from solr index.
- * </li>
+ * grouped together. Now, of these documents with the same digests, delete all
+ * of them except the one with the highest score (boost field). If two (or more)
+ * documents have the same score, then the document with the latest timestamp is
+ * kept. Again, every other is deleted from solr index.</li>
  * </ul>
  * 
- * Note that unlike {@link DeleteDuplicates} we assume that two documents in
- * a solr index will never have the same URL. So this class only deals with
- * documents with <b>different</b> URLs but the same digest. 
+ * Note that unlike {@link DeleteDuplicates} we assume that two documents in a
+ * solr index will never have the same URL. So this class only deals with
+ * documents with <b>different</b> URLs but the same digest.
  */
 public class SolrDeleteDuplicates
-implements Reducer<Text, SolrDeleteDuplicates.SolrRecord, Text, SolrDeleteDuplicates.SolrRecord>,
-Tool {
+    implements
+    Reducer<Text, SolrDeleteDuplicates.SolrRecord, Text, SolrDeleteDuplicates.SolrRecord>,
+    Tool {
 
-  public static final Logger LOG = LoggerFactory.getLogger(SolrDeleteDuplicates.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(SolrDeleteDuplicates.class);
 
-  private static final String SOLR_GET_ALL_QUERY = SolrConstants.ID_FIELD + ":[* TO *]";
+  private static final String SOLR_GET_ALL_QUERY = SolrConstants.ID_FIELD
+      + ":[* TO *]";
 
   private static final int NUM_MAX_DELETE_REQUEST = 1000;
 
@@ -97,12 +99,13 @@
     private long tstamp;
     private String id;
 
-    public SolrRecord() { }
-    
+    public SolrRecord() {
+    }
+
     public SolrRecord(SolrRecord old) {
-	this.id = old.id;
-	this.boost = old.boost;
-	this.tstamp = old.tstamp;
+      this.id = old.id;
+      this.boost = old.boost;
+      this.tstamp = old.tstamp;
     }
 
     public SolrRecord(String id, float boost, long tstamp) {
@@ -124,10 +127,10 @@
     }
 
     public void readSolrDocument(SolrDocument doc) {
-      id = (String)doc.getFieldValue(SolrConstants.ID_FIELD);
-      boost = (Float)doc.getFieldValue(SolrConstants.BOOST_FIELD);
+      id = (String) doc.getFieldValue(SolrConstants.ID_FIELD);
+      boost = (Float) doc.getFieldValue(SolrConstants.BOOST_FIELD);
 
-      Date buffer = (Date)doc.getFieldValue(SolrConstants.TIMESTAMP_FIELD);
+      Date buffer = (Date) doc.getFieldValue(SolrConstants.TIMESTAMP_FIELD);
       tstamp = buffer.getTime();
     }
 
@@ -141,7 +144,7 @@
       Text.writeString(out, id);
       out.writeFloat(boost);
       out.writeLong(tstamp);
-    } 
+    }
   }
 
   public static class SolrInputSplit implements InputSplit {
@@ -149,7 +152,8 @@
     private int docBegin;
     private int numDocs;
 
-    public SolrInputSplit() { }
+    public SolrInputSplit() {
+    }
 
     public SolrInputSplit(int docBegin, int numDocs) {
       this.docBegin = docBegin;
@@ -169,7 +173,7 @@
     }
 
     public String[] getLocations() throws IOException {
-      return new String[] {} ;
+      return new String[] {};
     }
 
     public void readFields(DataInput in) throws IOException {
@@ -186,7 +190,8 @@
   public static class SolrInputFormat implements InputFormat<Text, SolrRecord> {
 
     /** Return each index as a split. */
-    public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
+    public InputSplit[] getSplits(JobConf job, int numSplits)
+        throws IOException {
       SolrServer solr = SolrUtils.getCommonsHttpSolrServer(job);
 
       final SolrQuery solrQuery = new SolrQuery(SOLR_GET_ALL_QUERY);
@@ -200,32 +205,31 @@
         throw new IOException(e);
       }
 
-      int numResults = (int)response.getResults().getNumFound();
-      int numDocsPerSplit = (numResults / numSplits); 
+      int numResults = (int) response.getResults().getNumFound();
+      int numDocsPerSplit = (numResults / numSplits);
       int currentDoc = 0;
       SolrInputSplit[] splits = new SolrInputSplit[numSplits];
       for (int i = 0; i < numSplits - 1; i++) {
         splits[i] = new SolrInputSplit(currentDoc, numDocsPerSplit);
         currentDoc += numDocsPerSplit;
       }
-      splits[splits.length - 1] = new SolrInputSplit(currentDoc, numResults - currentDoc);
+      splits[splits.length - 1] = new SolrInputSplit(currentDoc, numResults
+          - currentDoc);
 
       return splits;
     }
 
-    public RecordReader<Text, SolrRecord> getRecordReader(final InputSplit split,
-        final JobConf job, 
-        Reporter reporter)
+    public RecordReader<Text, SolrRecord> getRecordReader(
+        final InputSplit split, final JobConf job, Reporter reporter)
         throws IOException {
 
       SolrServer solr = SolrUtils.getCommonsHttpSolrServer(job);
       SolrInputSplit solrSplit = (SolrInputSplit) split;
       final int numDocs = solrSplit.getNumDocs();
-      
+
       SolrQuery solrQuery = new SolrQuery(SOLR_GET_ALL_QUERY);
       solrQuery.setFields(SolrConstants.ID_FIELD, SolrConstants.BOOST_FIELD,
-                          SolrConstants.TIMESTAMP_FIELD,
-                          SolrConstants.DIGEST_FIELD);
+          SolrConstants.TIMESTAMP_FIELD, SolrConstants.DIGEST_FIELD);
       solrQuery.setStart(solrSplit.getDocBegin());
       solrQuery.setRows(numDocs);
 
@@ -242,7 +246,8 @@
 
         private int currentDoc = 0;
 
-        public void close() throws IOException { }
+        public void close() throws IOException {
+        }
 
         public Text createKey() {
           return new Text();
@@ -266,13 +271,14 @@
           }
 
           SolrDocument doc = solrDocs.get(currentDoc);
-          String digest = (String) doc.getFieldValue(SolrConstants.DIGEST_FIELD);
+          String digest = (String) doc
+              .getFieldValue(SolrConstants.DIGEST_FIELD);
           key.set(digest);
           value.readSolrDocument(doc);
 
           currentDoc++;
           return true;
-        }    
+        }
       };
     }
   }
@@ -304,7 +310,6 @@
     }
   }
 
-
   public void close() throws IOException {
     try {
       if (numDeletes > 0) {
@@ -322,13 +327,13 @@
 
   public void reduce(Text key, Iterator<SolrRecord> values,
       OutputCollector<Text, SolrRecord> output, Reporter reporter)
-  throws IOException {
+      throws IOException {
     SolrRecord recordToKeep = new SolrRecord(values.next());
     while (values.hasNext()) {
       SolrRecord solrRecord = values.next();
-      if (solrRecord.getBoost() > recordToKeep.getBoost() ||
-          (solrRecord.getBoost() == recordToKeep.getBoost() && 
-              solrRecord.getTstamp() > recordToKeep.getTstamp())) {
+      if (solrRecord.getBoost() > recordToKeep.getBoost()
+          || (solrRecord.getBoost() == recordToKeep.getBoost() && solrRecord
+              .getTstamp() > recordToKeep.getTstamp())) {
         updateRequest.deleteById(recordToKeep.id);
         recordToKeep = new SolrRecord(solrRecord);
       } else {
@@ -338,7 +343,8 @@
       reporter.incrCounter("SolrDedupStatus", "Deleted documents", 1);
       if (numDeletes >= NUM_MAX_DELETE_REQUEST) {
         try {
-          LOG.info("SolrDeleteDuplicates: deleting " + numDeletes + " duplicates");
+          LOG.info("SolrDeleteDuplicates: deleting " + numDeletes
+              + " duplicates");
           updateRequest.process(solr);
         } catch (SolrServerException e) {
           throw new IOException(e);
@@ -358,7 +364,7 @@
     long start = System.currentTimeMillis();
     LOG.info("SolrDeleteDuplicates: starting at " + sdf.format(start));
     LOG.info("SolrDeleteDuplicates: Solr url: " + solrUrl);
-    
+
     JobConf job = new NutchJob(getConf());
 
     job.set(SolrConstants.SERVER_URL, solrUrl);
@@ -373,7 +379,8 @@
     JobClient.runJob(job);
 
     long end = System.currentTimeMillis();
-    LOG.info("SolrDeleteDuplicates: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    LOG.info("SolrDeleteDuplicates: finished at " + sdf.format(end)
+        + ", elapsed: " + TimingUtil.elapsedTime(start, end));
   }
 
   public int run(String[] args) throws IOException {
Index: src/java/org/apache/nutch/indexer/solr/SolrUtils.java
===================================================================
--- src/java/org/apache/nutch/indexer/solr/SolrUtils.java	(revision 1188252)
+++ src/java/org/apache/nutch/indexer/solr/SolrUtils.java	(working copy)
@@ -31,8 +31,9 @@
 
   public static Logger LOG = LoggerFactory.getLogger(SolrIndexer.class);
 
-  public static CommonsHttpSolrServer getCommonsHttpSolrServer(JobConf job) throws MalformedURLException {
-    HttpClient client=new HttpClient();
+  public static CommonsHttpSolrServer getCommonsHttpSolrServer(JobConf job)
+      throws MalformedURLException {
+    HttpClient client = new HttpClient();
 
     // Check for username/password
     if (job.getBoolean(SolrConstants.USE_AUTH, false)) {
@@ -40,9 +41,13 @@
 
       LOG.info("Authenticating as: " + username);
 
-      AuthScope scope = new AuthScope(AuthScope.ANY_HOST, AuthScope.ANY_PORT, AuthScope.ANY_REALM, AuthScope.ANY_SCHEME);
+      AuthScope scope = new AuthScope(AuthScope.ANY_HOST, AuthScope.ANY_PORT,
+          AuthScope.ANY_REALM, AuthScope.ANY_SCHEME);
 
-      client.getState().setCredentials(scope, new UsernamePasswordCredentials(username, job.get(SolrConstants.PASSWORD)));
+      client.getState().setCredentials(
+          scope,
+          new UsernamePasswordCredentials(username, job
+              .get(SolrConstants.PASSWORD)));
 
       HttpClientParams params = client.getParams();
       params.setAuthenticationPreemptive(true);
@@ -60,9 +65,12 @@
     for (int i = 0; i < input.length(); i++) {
       ch = input.charAt(i);
 
-      // Strip all non-characters http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:Noncharacter_Code_Point=True:]
-      // and non-printable control characters except tabulator, new line and carriage return
-      if (ch % 0x10000 != 0xffff && // 0xffff - 0x10ffff range step 0x10000
+      // Strip all non-characters
+      // http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:Noncharacter_Code_Point=True:]
+      // and non-printable control characters except tabulator, new line
+      // and carriage return
+      if (ch % 0x10000 != 0xffff && // 0xffff - 0x10ffff range step
+          // 0x10000
           ch % 0x10000 != 0xfffe && // 0xfffe - 0x10fffe range
           (ch <= 0xfdd0 || ch >= 0xfdef) && // 0xfdd0 - 0xfdef
           (ch > 0x1F || ch == 0x9 || ch == 0xa || ch == 0xd)) {
Index: src/java/org/apache/nutch/indexer/solr/SolrClean.java
===================================================================
--- src/java/org/apache/nutch/indexer/solr/SolrClean.java	(revision 1188252)
+++ src/java/org/apache/nutch/indexer/solr/SolrClean.java	(working copy)
@@ -48,13 +48,13 @@
 import org.apache.solr.client.solrj.request.UpdateRequest;
 
 /**
-* The class scans CrawlDB looking for entries with status DB_GONE (404) and sends delete requests to Solr
-* for those documents.
-* 
-* 
-* @author Claudio Martella
-*
-*/
+ * The class scans CrawlDB looking for entries with status DB_GONE (404) and
+ * sends delete requests to Solr for those documents.
+ * 
+ * 
+ * @author Claudio Martella
+ * 
+ */
 
 public class SolrClean implements Tool {
   public static final Logger LOG = LoggerFactory.getLogger(SolrClean.class);
@@ -70,14 +70,17 @@
     this.conf = conf;
   }
 
-  public static class DBFilter implements Mapper<Text, CrawlDatum, ByteWritable, Text> {
+  public static class DBFilter implements
+      Mapper<Text, CrawlDatum, ByteWritable, Text> {
     private ByteWritable OUT = new ByteWritable(CrawlDatum.STATUS_DB_GONE);
 
     @Override
-    public void configure(JobConf arg0) { }
+    public void configure(JobConf arg0) {
+    }
 
     @Override
-    public void close() throws IOException { }
+    public void close() throws IOException {
+    }
 
     @Override
     public void map(Text key, CrawlDatum value,
@@ -90,7 +93,8 @@
     }
   }
 
-  public static class SolrDeleter implements Reducer<ByteWritable, Text, Text, ByteWritable> {
+  public static class SolrDeleter implements
+      Reducer<ByteWritable, Text, Text, ByteWritable> {
     private static final int NUM_MAX_DELETE_REQUEST = 1000;
     private int numDeletes = 0;
     private int totalDeleted = 0;
@@ -130,7 +134,7 @@
     @Override
     public void reduce(ByteWritable key, Iterator<Text> values,
         OutputCollector<Text, ByteWritable> output, Reporter reporter)
-    throws IOException {
+        throws IOException {
       while (values.hasNext()) {
         Text document = values.next();
         updateRequest.deleteById(document.toString());
@@ -151,7 +155,8 @@
     }
   }
 
-  public void delete(String crawldb, String solrUrl, boolean noCommit) throws IOException {
+  public void delete(String crawldb, String solrUrl, boolean noCommit)
+      throws IOException {
     SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
     long start = System.currentTimeMillis();
     LOG.info("SolrClean: starting at " + sdf.format(start));
@@ -171,7 +176,8 @@
     JobClient.runJob(job);
 
     long end = System.currentTimeMillis();
-    LOG.info("SolrClean: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    LOG.info("SolrClean: finished at " + sdf.format(end) + ", elapsed: "
+        + TimingUtil.elapsedTime(start, end));
   }
 
   public int run(String[] args) throws IOException {
@@ -191,8 +197,8 @@
   }
 
   public static void main(String[] args) throws Exception {
-    int result = ToolRunner.run(NutchConfiguration.create(),
-        new SolrClean(), args);
+    int result = ToolRunner.run(NutchConfiguration.create(), new SolrClean(),
+        args);
     System.exit(result);
   }
 }
Index: src/java/org/apache/nutch/indexer/solr/SolrConstants.java
===================================================================
--- src/java/org/apache/nutch/indexer/solr/SolrConstants.java	(revision 1188252)
+++ src/java/org/apache/nutch/indexer/solr/SolrConstants.java	(working copy)
@@ -30,15 +30,15 @@
   public static final String USERNAME = SOLR_PREFIX + "auth.username";
 
   public static final String PASSWORD = SOLR_PREFIX + "auth.password";
-  
+
   public static final String ID_FIELD = "id";
-  
+
   public static final String URL_FIELD = "url";
-  
+
   public static final String BOOST_FIELD = "boost";
-  
+
   public static final String TIMESTAMP_FIELD = "tstamp";
-  
+
   public static final String DIGEST_FIELD = "digest";
 
 }
Index: src/java/org/apache/nutch/indexer/solr/SolrWriter.java
===================================================================
--- src/java/org/apache/nutch/indexer/solr/SolrWriter.java	(revision 1188252)
+++ src/java/org/apache/nutch/indexer/solr/SolrWriter.java	(working copy)
@@ -40,8 +40,7 @@
   private SolrServer solr;
   private SolrMappingReader solrMapping;
 
-  private final List<SolrInputDocument> inputDocs =
-    new ArrayList<SolrInputDocument>();
+  private final List<SolrInputDocument> inputDocs = new ArrayList<SolrInputDocument>();
 
   private int commitSize;
 
@@ -53,23 +52,24 @@
 
   public void write(NutchDocument doc) throws IOException {
     final SolrInputDocument inputDoc = new SolrInputDocument();
-    for(final Entry<String, NutchField> e : doc) {
+    for (final Entry<String, NutchField> e : doc) {
       for (final Object val : e.getValue().getValues()) {
         // normalise the string representation for a Date
         Object val2 = val;
 
-        if (val instanceof Date){
+        if (val instanceof Date) {
           val2 = DateUtil.getThreadLocalDateFormat().format(val);
         }
 
         if (e.getKey().equals("content")) {
-          val2 = SolrUtils.stripNonCharCodepoints((String)val);
+          val2 = SolrUtils.stripNonCharCodepoints((String) val);
         }
 
-        inputDoc.addField(solrMapping.mapKey(e.getKey()), val2, e.getValue().getWeight());
+        inputDoc.addField(solrMapping.mapKey(e.getKey()), val2, e.getValue()
+            .getWeight());
         String sCopy = solrMapping.mapCopyKey(e.getKey());
         if (sCopy != e.getKey()) {
-        	inputDoc.addField(sCopy, val);	
+          inputDoc.addField(sCopy, val);
         }
       }
     }
Index: src/java/org/apache/nutch/indexer/solr/SolrIndexer.java
===================================================================
--- src/java/org/apache/nutch/indexer/solr/SolrIndexer.java	(revision 1188252)
+++ src/java/org/apache/nutch/indexer/solr/SolrIndexer.java	(working copy)
@@ -57,7 +57,7 @@
 
   public void indexSolr(String solrUrl, Path crawlDb, Path linkDb,
       List<Path> segments) throws IOException {
-      indexSolr(solrUrl, crawlDb, linkDb, segments, false);
+    indexSolr(solrUrl, crawlDb, linkDb, segments, false);
   }
 
   public void indexSolr(String solrUrl, Path crawlDb, Path linkDb,
@@ -76,22 +76,22 @@
 
     job.setReduceSpeculativeExecution(false);
 
-    final Path tmp = new Path("tmp_" + System.currentTimeMillis() + "-" +
-                         new Random().nextInt());
+    final Path tmp = new Path("tmp_" + System.currentTimeMillis() + "-"
+        + new Random().nextInt());
 
     FileOutputFormat.setOutputPath(job, tmp);
     try {
       JobClient.runJob(job);
       // do the commits once and for all the reducers in one go
-      SolrServer solr =  SolrUtils.getCommonsHttpSolrServer(job);
+      SolrServer solr = SolrUtils.getCommonsHttpSolrServer(job);
 
       if (!noCommit) {
         solr.commit();
       }
       long end = System.currentTimeMillis();
-      LOG.info("SolrIndexer: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
-    }
-    catch (Exception e){
+      LOG.info("SolrIndexer: finished at " + sdf.format(end) + ", elapsed: "
+          + TimingUtil.elapsedTime(start, end));
+    } catch (Exception e) {
       LOG.error(e.toString());
     } finally {
       FileSystem.get(job).delete(tmp, true);
@@ -100,7 +100,8 @@
 
   public int run(String[] args) throws Exception {
     if (args.length < 3) {
-      System.err.println("Usage: SolrIndexer <solr url> <crawldb> [-linkdb <linkdb>] (<segment> ... | -dir <segments>) [-noCommit]");
+      System.err
+          .println("Usage: SolrIndexer <solr url> <crawldb> [-linkdb <linkdb>] (<segment> ... | -dir <segments>) [-noCommit]");
       return -1;
     }
 
@@ -112,14 +113,13 @@
     boolean noCommit = false;
 
     for (int i = 2; i < args.length; i++) {
-    	if (args[i].equals("-linkdb")) {
-    		linkDb = new Path(args[++i]);
-    	}
-    	else if (args[i].equals("-dir")) {
+      if (args[i].equals("-linkdb")) {
+        linkDb = new Path(args[++i]);
+      } else if (args[i].equals("-dir")) {
         Path dir = new Path(args[++i]);
         FileSystem fs = dir.getFileSystem(getConf());
         FileStatus[] fstats = fs.listStatus(dir,
-                HadoopFSUtil.getPassDirectoriesFilter(fs));
+            HadoopFSUtil.getPassDirectoriesFilter(fs));
         Path[] files = HadoopFSUtil.getPaths(fstats);
         for (Path p : files) {
           segments.add(p);
@@ -141,7 +141,8 @@
   }
 
   public static void main(String[] args) throws Exception {
-    final int res = ToolRunner.run(NutchConfiguration.create(), new SolrIndexer(), args);
+    final int res = ToolRunner.run(NutchConfiguration.create(),
+        new SolrIndexer(), args);
     System.exit(res);
   }
 }
Index: src/java/org/apache/nutch/indexer/solr/SolrMappingReader.java
===================================================================
--- src/java/org/apache/nutch/indexer/solr/SolrMappingReader.java	(revision 1188252)
+++ src/java/org/apache/nutch/indexer/solr/SolrMappingReader.java	(working copy)
@@ -38,16 +38,17 @@
 
 public class SolrMappingReader {
   public static Logger LOG = LoggerFactory.getLogger(SolrMappingReader.class);
-  
+
   private Configuration conf;
-  
+
   private Map<String, String> keyMap = new HashMap<String, String>();
   private Map<String, String> copyMap = new HashMap<String, String>();
   private String uniqueKey = "id";
-  
+
   public static synchronized SolrMappingReader getInstance(Configuration conf) {
     ObjectCache cache = ObjectCache.get(conf);
-    SolrMappingReader instance = (SolrMappingReader)cache.getObject(SolrMappingReader.class.getName());
+    SolrMappingReader instance = (SolrMappingReader) cache
+        .getObject(SolrMappingReader.class.getName());
     if (instance == null) {
       instance = new SolrMappingReader(conf);
       cache.setObject(SolrMappingReader.class.getName(), instance);
@@ -60,9 +61,10 @@
     parseMapping();
   }
 
-  private void parseMapping() {    
+  private void parseMapping() {
     InputStream ssInputStream = null;
-    ssInputStream = conf.getConfResourceAsInputStream(conf.get(SolrConstants.MAPPING_FILE, "solrindex-mapping.xml"));
+    ssInputStream = conf.getConfResourceAsInputStream(conf.get(
+        SolrConstants.MAPPING_FILE, "solrindex-mapping.xml"));
 
     InputSource inputSource = new InputSource(ssInputStream);
     try {
@@ -74,48 +76,50 @@
       if (fieldList.getLength() > 0) {
         for (int i = 0; i < fieldList.getLength(); i++) {
           Element element = (Element) fieldList.item(i);
-          LOG.info("source: " + element.getAttribute("source") + " dest: " + element.getAttribute("dest"));
-          keyMap.put(element.getAttribute("source"), element.getAttribute("dest"));
+          LOG.info("source: " + element.getAttribute("source") + " dest: "
+              + element.getAttribute("dest"));
+          keyMap.put(element.getAttribute("source"),
+              element.getAttribute("dest"));
         }
       }
       NodeList copyFieldList = rootElement.getElementsByTagName("copyField");
       if (copyFieldList.getLength() > 0) {
         for (int i = 0; i < copyFieldList.getLength(); i++) {
           Element element = (Element) copyFieldList.item(i);
-          LOG.info("source: " + element.getAttribute("source") + " dest: " + element.getAttribute("dest"));
-          copyMap.put(element.getAttribute("source"), element.getAttribute("dest"));
+          LOG.info("source: " + element.getAttribute("source") + " dest: "
+              + element.getAttribute("dest"));
+          copyMap.put(element.getAttribute("source"),
+              element.getAttribute("dest"));
         }
       }
       NodeList uniqueKeyItem = rootElement.getElementsByTagName("uniqueKey");
       if (uniqueKeyItem.getLength() > 1) {
         LOG.warn("More than one unique key definitions found in solr index mapping, using default 'id'");
         uniqueKey = "id";
-      }
-      else if (uniqueKeyItem.getLength() == 0) {
+      } else if (uniqueKeyItem.getLength() == 0) {
         LOG.warn("No unique key definition found in solr index mapping using, default 'id'");
+      } else {
+        uniqueKey = uniqueKeyItem.item(0).getFirstChild().getNodeValue();
       }
-      else{
-    	  uniqueKey = uniqueKeyItem.item(0).getFirstChild().getNodeValue();
-      }
     } catch (MalformedURLException e) {
-        LOG.warn(e.toString());
+      LOG.warn(e.toString());
     } catch (SAXException e) {
-        LOG.warn(e.toString());
+      LOG.warn(e.toString());
     } catch (IOException e) {
-    	LOG.warn(e.toString());
+      LOG.warn(e.toString());
     } catch (ParserConfigurationException e) {
-    	LOG.warn(e.toString());
-    } 
+      LOG.warn(e.toString());
+    }
   }
-	  
+
   public Map<String, String> getKeyMap() {
     return keyMap;
   }
-	  
+
   public Map<String, String> getCopyMap() {
     return copyMap;
   }
-	  
+
   public String getUniqueKey() {
     return uniqueKey;
   }
@@ -128,14 +132,14 @@
   }
 
   public String mapKey(String key) throws IOException {
-    if(keyMap.containsKey(key)) {
+    if (keyMap.containsKey(key)) {
       key = (String) keyMap.get(key);
     }
     return key;
   }
 
   public String mapCopyKey(String key) throws IOException {
-    if(copyMap.containsKey(key)) {
+    if (copyMap.containsKey(key)) {
       key = (String) copyMap.get(key);
     }
     return key;
Index: src/java/org/apache/nutch/indexer/NutchDocument.java
===================================================================
--- src/java/org/apache/nutch/indexer/NutchDocument.java	(revision 1188252)
+++ src/java/org/apache/nutch/indexer/NutchDocument.java	(working copy)
@@ -31,12 +31,12 @@
 import org.apache.hadoop.io.WritableUtils;
 import org.apache.nutch.metadata.Metadata;
 
-/** A {@link NutchDocument} is the unit of indexing.*/
-public class NutchDocument
-implements Writable, Iterable<Entry<String, NutchField>> {
+/** A {@link NutchDocument} is the unit of indexing. */
+public class NutchDocument implements Writable,
+    Iterable<Entry<String, NutchField>> {
 
   public static final byte VERSION = 2;
-  
+
   private Map<String, NutchField> fields;
 
   private Metadata documentMeta;
Index: src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java
===================================================================
--- src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java	(revision 1188252)
+++ src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java	(working copy)
@@ -24,66 +24,69 @@
 import org.apache.nutch.util.NutchConfiguration;
 
 /**
- * Reads and parses a URL and run the indexers on it. Displays the fields obtained and the first
- * 100 characters of their value
+ * Reads and parses a URL and run the indexers on it. Displays the fields
+ * obtained and the first 100 characters of their value
  * 
- * Tested with e.g. ./nutch org.apache.nutch.indexer.IndexingFiltersChecker http://www.lemonde.fr
+ * Tested with e.g. ./nutch org.apache.nutch.indexer.IndexingFiltersChecker
+ * http://www.lemonde.fr
+ * 
  * @author Julien Nioche
  **/
 
 public class IndexingFiltersChecker extends Configured implements Tool {
-  
-  public static final Logger LOG = LoggerFactory.getLogger(IndexingFiltersChecker.class);
-  
+
+  public static final Logger LOG = LoggerFactory
+      .getLogger(IndexingFiltersChecker.class);
+
   public IndexingFiltersChecker() {
 
   }
-  
+
   public int run(String[] args) throws Exception {
-    
+
     String contentType = null;
     String url = null;
-    
+
     String usage = "Usage: IndexingFiltersChecker <url>";
-    
+
     if (args.length != 1) {
       System.err.println(usage);
       System.exit(-1);
     }
-    
+
     url = args[0];
-    
+
     if (LOG.isInfoEnabled()) {
       LOG.info("fetching: " + url);
     }
-        
+
     IndexingFilters indexers = new IndexingFilters(conf);
-    
+
     ProtocolFactory factory = new ProtocolFactory(conf);
     Protocol protocol = factory.getProtocol(url);
     CrawlDatum datum = new CrawlDatum();
-    
+
     Content content = protocol.getProtocolOutput(new Text(url), datum)
         .getContent();
-    
+
     if (content == null) {
       System.out.println("No content for " + url);
       return 0;
     }
-    
+
     contentType = content.getContentType();
-    
+
     if (contentType == null) {
       return -1;
     }
-    
+
     if (LOG.isInfoEnabled()) {
       LOG.info("parsing: " + url);
       LOG.info("contentType: " + contentType);
     }
-    
+
     ParseResult parseResult = new ParseUtil(conf).parse(content);
-    
+
     NutchDocument doc = new NutchDocument();
     Text urlText = new Text(url);
 
@@ -107,19 +110,19 @@
     }
     return 0;
   }
-  
+
   public static void main(String[] args) throws Exception {
     final int res = ToolRunner.run(NutchConfiguration.create(),
         new IndexingFiltersChecker(), args);
     System.exit(res);
   }
-  
+
   Configuration conf;
-  
+
   public Configuration getConf() {
     return conf;
   }
-  
+
   @Override
   public void setConf(Configuration arg0) {
     conf = arg0;
Index: src/java/org/apache/nutch/indexer/IndexerMapReduce.java
===================================================================
--- src/java/org/apache/nutch/indexer/IndexerMapReduce.java	(revision 1188252)
+++ src/java/org/apache/nutch/indexer/IndexerMapReduce.java	(working copy)
@@ -48,11 +48,12 @@
 import org.apache.nutch.scoring.ScoringFilterException;
 import org.apache.nutch.scoring.ScoringFilters;
 
-public class IndexerMapReduce extends Configured
-implements Mapper<Text, Writable, Text, NutchWritable>,
-          Reducer<Text, NutchWritable, Text, NutchDocument> {
+public class IndexerMapReduce extends Configured implements
+    Mapper<Text, Writable, Text, NutchWritable>,
+    Reducer<Text, NutchWritable, Text, NutchDocument> {
 
-  public static final Logger LOG = LoggerFactory.getLogger(IndexerMapReduce.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(IndexerMapReduce.class);
 
   private IndexingFilters filters;
   private ScoringFilters scfilters;
@@ -64,13 +65,14 @@
   }
 
   public void map(Text key, Writable value,
-      OutputCollector<Text, NutchWritable> output, Reporter reporter) throws IOException {
+      OutputCollector<Text, NutchWritable> output, Reporter reporter)
+      throws IOException {
     output.collect(key, new NutchWritable(value));
   }
 
   public void reduce(Text key, Iterator<NutchWritable> values,
-                     OutputCollector<Text, NutchDocument> output, Reporter reporter)
-    throws IOException {
+      OutputCollector<Text, NutchDocument> output, Reporter reporter)
+      throws IOException {
     Inlinks inlinks = null;
     CrawlDatum dbDatum = null;
     CrawlDatum fetchDatum = null;
@@ -79,38 +81,38 @@
     while (values.hasNext()) {
       final Writable value = values.next().get(); // unwrap
       if (value instanceof Inlinks) {
-        inlinks = (Inlinks)value;
+        inlinks = (Inlinks) value;
       } else if (value instanceof CrawlDatum) {
-        final CrawlDatum datum = (CrawlDatum)value;
+        final CrawlDatum datum = (CrawlDatum) value;
         if (CrawlDatum.hasDbStatus(datum))
           dbDatum = datum;
         else if (CrawlDatum.hasFetchStatus(datum)) {
           // don't index unmodified (empty) pages
           if (datum.getStatus() != CrawlDatum.STATUS_FETCH_NOTMODIFIED)
             fetchDatum = datum;
-        } else if (CrawlDatum.STATUS_LINKED == datum.getStatus() ||
-                   CrawlDatum.STATUS_SIGNATURE == datum.getStatus() ||
-                   CrawlDatum.STATUS_PARSE_META == datum.getStatus()) {
+        } else if (CrawlDatum.STATUS_LINKED == datum.getStatus()
+            || CrawlDatum.STATUS_SIGNATURE == datum.getStatus()
+            || CrawlDatum.STATUS_PARSE_META == datum.getStatus()) {
           continue;
         } else {
-          throw new RuntimeException("Unexpected status: "+datum.getStatus());
+          throw new RuntimeException("Unexpected status: " + datum.getStatus());
         }
       } else if (value instanceof ParseData) {
-        parseData = (ParseData)value;
+        parseData = (ParseData) value;
       } else if (value instanceof ParseText) {
-        parseText = (ParseText)value;
+        parseText = (ParseText) value;
       } else if (LOG.isWarnEnabled()) {
-        LOG.warn("Unrecognized type: "+value.getClass());
+        LOG.warn("Unrecognized type: " + value.getClass());
       }
     }
 
-    if (fetchDatum == null || dbDatum == null
-        || parseText == null || parseData == null) {
-      return;                                     // only have inlinks
+    if (fetchDatum == null || dbDatum == null || parseText == null
+        || parseData == null) {
+      return; // only have inlinks
     }
 
-    if (!parseData.getStatus().isSuccess() ||
-        fetchDatum.getStatus() != CrawlDatum.STATUS_FETCH_SUCCESS) {
+    if (!parseData.getStatus().isSuccess()
+        || fetchDatum.getStatus() != CrawlDatum.STATUS_FETCH_SUCCESS) {
       return;
     }
 
@@ -127,14 +129,17 @@
     try {
       // extract information from dbDatum and pass it to
       // fetchDatum so that indexing filters can use it
-      final Text url = (Text) dbDatum.getMetaData().get(Nutch.WRITABLE_REPR_URL_KEY);
+      final Text url = (Text) dbDatum.getMetaData().get(
+          Nutch.WRITABLE_REPR_URL_KEY);
       if (url != null) {
         fetchDatum.getMetaData().put(Nutch.WRITABLE_REPR_URL_KEY, url);
       }
       // run indexing filters
       doc = this.filters.filter(doc, parse, key, fetchDatum, inlinks);
     } catch (final IndexingException e) {
-      if (LOG.isWarnEnabled()) { LOG.warn("Error indexing "+key+": "+e); }
+      if (LOG.isWarnEnabled()) {
+        LOG.warn("Error indexing " + key + ": " + e);
+      }
       reporter.incrCounter("IndexerStatus", "Errors", 1);
       return;
     }
@@ -148,8 +153,8 @@
     float boost = 1.0f;
     // run scoring filters
     try {
-      boost = this.scfilters.indexerScore(key, doc, dbDatum,
-              fetchDatum, parse, inlinks, boost);
+      boost = this.scfilters.indexerScore(key, doc, dbDatum, fetchDatum, parse,
+          inlinks, boost);
     } catch (final ScoringFilterException e) {
       if (LOG.isWarnEnabled()) {
         LOG.warn("Error calculating score " + key + ": " + e);
@@ -166,30 +171,32 @@
     output.collect(key, doc);
   }
 
-  public void close() throws IOException { }
+  public void close() throws IOException {
+  }
 
   public static void initMRJob(Path crawlDb, Path linkDb,
-                           Collection<Path> segments,
-                           JobConf job) {
+      Collection<Path> segments, JobConf job) {
 
     LOG.info("IndexerMapReduce: crawldb: " + crawlDb);
-    
-    if (linkDb!=null)
+
+    if (linkDb != null)
       LOG.info("IndexerMapReduce: linkdb: " + linkDb);
 
     for (final Path segment : segments) {
       LOG.info("IndexerMapReduces: adding segment: " + segment);
-      FileInputFormat.addInputPath(job, new Path(segment, CrawlDatum.FETCH_DIR_NAME));
-      FileInputFormat.addInputPath(job, new Path(segment, CrawlDatum.PARSE_DIR_NAME));
+      FileInputFormat.addInputPath(job, new Path(segment,
+          CrawlDatum.FETCH_DIR_NAME));
+      FileInputFormat.addInputPath(job, new Path(segment,
+          CrawlDatum.PARSE_DIR_NAME));
       FileInputFormat.addInputPath(job, new Path(segment, ParseData.DIR_NAME));
       FileInputFormat.addInputPath(job, new Path(segment, ParseText.DIR_NAME));
     }
 
     FileInputFormat.addInputPath(job, new Path(crawlDb, CrawlDb.CURRENT_NAME));
-    
-    if (linkDb!=null)
-	  FileInputFormat.addInputPath(job, new Path(linkDb, LinkDb.CURRENT_NAME));
-    
+
+    if (linkDb != null)
+      FileInputFormat.addInputPath(job, new Path(linkDb, LinkDb.CURRENT_NAME));
+
     job.setInputFormat(SequenceFileInputFormat.class);
 
     job.setMapperClass(IndexerMapReduce.class);
Index: src/java/org/apache/nutch/indexer/NutchField.java
===================================================================
--- src/java/org/apache/nutch/indexer/NutchField.java	(revision 1188252)
+++ src/java/org/apache/nutch/indexer/NutchField.java	(working copy)
@@ -27,34 +27,34 @@
 import org.apache.hadoop.io.Writable;
 
 /**
- * This class represents a multi-valued field with a weight. Values are arbitrary
- * objects.
+ * This class represents a multi-valued field with a weight. Values are
+ * arbitrary objects.
  */
 public class NutchField implements Writable {
   private float weight;
   private List<Object> values = new ArrayList<Object>();
-  
+
   public NutchField() {
-    
+
   }
-  
+
   public NutchField(Object value) {
     this(value, 1.0f);
   }
-  
+
   public NutchField(Object value, float weight) {
     this.weight = weight;
     if (value instanceof Collection) {
-      values.addAll((Collection<Object>)value);
+      values.addAll((Collection<Object>) value);
     } else {
       values.add(value);
     }
   }
-  
+
   public void add(Object value) {
     values.add(value);
   }
-  
+
   public float getWeight() {
     return weight;
   }
@@ -66,7 +66,7 @@
   public List<Object> getValues() {
     return values;
   }
-  
+
   public void reset() {
     weight = 1.0f;
     values.clear();
Index: src/java/org/apache/nutch/indexer/IndexerOutputFormat.java
===================================================================
--- src/java/org/apache/nutch/indexer/IndexerOutputFormat.java	(revision 1188252)
+++ src/java/org/apache/nutch/indexer/IndexerOutputFormat.java	(working copy)
@@ -31,13 +31,13 @@
   @Override
   public RecordWriter<Text, NutchDocument> getRecordWriter(FileSystem ignored,
       JobConf job, String name, Progressable progress) throws IOException {
-    
+
     // populate JobConf with field indexing options
     IndexingFilters filters = new IndexingFilters(job);
-    
-    final NutchIndexWriter[] writers =
-      NutchIndexWriterFactory.getNutchIndexWriters(job);
 
+    final NutchIndexWriter[] writers = NutchIndexWriterFactory
+        .getNutchIndexWriters(job);
+
     for (final NutchIndexWriter writer : writers) {
       writer.open(job, name);
     }
Index: src/java/org/apache/nutch/indexer/IndexingFilter.java
===================================================================
--- src/java/org/apache/nutch/indexer/IndexingFilter.java	(revision 1188252)
+++ src/java/org/apache/nutch/indexer/IndexingFilter.java	(working copy)
@@ -28,9 +28,9 @@
 import org.apache.nutch.crawl.Inlinks;
 import org.apache.nutch.plugin.Pluggable;
 
-
-/** Extension point for indexing.  Permits one to add metadata to the indexed
- * fields.  All plugins found which implement this extension point are run
+/**
+ * Extension point for indexing. Permits one to add metadata to the indexed
+ * fields. All plugins found which implement this extension point are run
  * sequentially on the parse.
  */
 public interface IndexingFilter extends Pluggable, Configurable {
@@ -39,17 +39,23 @@
 
   /**
    * Adds fields or otherwise modifies the document that will be indexed for a
-   * parse. Unwanted documents can be removed from indexing by returning a null value.
+   * parse. Unwanted documents can be removed from indexing by returning a null
+   * value.
    * 
-   * @param doc document instance for collecting fields
-   * @param parse parse data instance
-   * @param url page url
-   * @param datum crawl datum for the page
-   * @param inlinks page inlinks
-   * @return modified (or a new) document instance, or null (meaning the document
-   * should be discarded)
+   * @param doc
+   *          document instance for collecting fields
+   * @param parse
+   *          parse data instance
+   * @param url
+   *          page url
+   * @param datum
+   *          crawl datum for the page
+   * @param inlinks
+   *          page inlinks
+   * @return modified (or a new) document instance, or null (meaning the
+   *         document should be discarded)
    * @throws IndexingException
    */
-  NutchDocument filter(NutchDocument doc, Parse parse, Text url, CrawlDatum datum, Inlinks inlinks)
-    throws IndexingException;
+  NutchDocument filter(NutchDocument doc, Parse parse, Text url,
+      CrawlDatum datum, Inlinks inlinks) throws IndexingException;
 }
Index: src/java/org/apache/nutch/indexer/IndexingFilters.java
===================================================================
--- src/java/org/apache/nutch/indexer/IndexingFilters.java	(revision 1188252)
+++ src/java/org/apache/nutch/indexer/IndexingFilters.java	(working copy)
@@ -32,12 +32,13 @@
 import org.apache.nutch.crawl.Inlinks;
 import org.apache.hadoop.io.Text;
 
-/** Creates and caches {@link IndexingFilter} implementing plugins.*/
+/** Creates and caches {@link IndexingFilter} implementing plugins. */
 public class IndexingFilters {
 
   public static final String INDEXINGFILTER_ORDER = "indexingfilter.order";
 
-  public final static Logger LOG = LoggerFactory.getLogger(IndexingFilters.class);
+  public final static Logger LOG = LoggerFactory
+      .getLogger(IndexingFilters.class);
 
   private IndexingFilter[] indexingFilters;
 
@@ -62,8 +63,7 @@
         if (point == null)
           throw new RuntimeException(IndexingFilter.X_POINT_ID + " not found.");
         Extension[] extensions = point.getExtensions();
-        HashMap<String, IndexingFilter> filterMap =
-          new HashMap<String, IndexingFilter>();
+        HashMap<String, IndexingFilter> filterMap = new HashMap<String, IndexingFilter>();
         for (int i = 0; i < extensions.length; i++) {
           Extension extension = extensions[i];
           IndexingFilter filter = (IndexingFilter) extension
@@ -78,21 +78,19 @@
          * indeterminate order
          */
         if (orderedFilters == null) {
-          objectCache.setObject(IndexingFilter.class.getName(),
-              filterMap.values().toArray(
-                  new IndexingFilter[0]));
+          objectCache.setObject(IndexingFilter.class.getName(), filterMap
+              .values().toArray(new IndexingFilter[0]));
           /* Otherwise run the filters in the required order */
         } else {
           ArrayList<IndexingFilter> filters = new ArrayList<IndexingFilter>();
           for (int i = 0; i < orderedFilters.length; i++) {
-            IndexingFilter filter = filterMap
-                .get(orderedFilters[i]);
+            IndexingFilter filter = filterMap.get(orderedFilters[i]);
             if (filter != null) {
               filters.add(filter);
             }
           }
-          objectCache.setObject(IndexingFilter.class.getName(), filters
-              .toArray(new IndexingFilter[filters.size()]));
+          objectCache.setObject(IndexingFilter.class.getName(),
+              filters.toArray(new IndexingFilter[filters.size()]));
         }
       } catch (PluginRuntimeException e) {
         throw new RuntimeException(e);
@@ -100,15 +98,16 @@
       this.indexingFilters = (IndexingFilter[]) objectCache
           .getObject(IndexingFilter.class.getName());
     }
-  }                  
+  }
 
   /** Run all defined filters. */
-  public NutchDocument filter(NutchDocument doc, Parse parse, Text url, CrawlDatum datum,
-      Inlinks inlinks) throws IndexingException {
+  public NutchDocument filter(NutchDocument doc, Parse parse, Text url,
+      CrawlDatum datum, Inlinks inlinks) throws IndexingException {
     for (int i = 0; i < this.indexingFilters.length; i++) {
       doc = this.indexingFilters[i].filter(doc, parse, url, datum, inlinks);
       // break the loop if an indexing filter discards the doc
-      if (doc == null) return null;
+      if (doc == null)
+        return null;
     }
 
     return doc;
Index: src/java/org/apache/nutch/indexer/NutchIndexWriterFactory.java
===================================================================
--- src/java/org/apache/nutch/indexer/NutchIndexWriterFactory.java	(revision 1188252)
+++ src/java/org/apache/nutch/indexer/NutchIndexWriterFactory.java	(working copy)
@@ -26,8 +26,8 @@
     for (int i = 0; i < classes.length; i++) {
       final String clazz = classes[i];
       try {
-        final Class<NutchIndexWriter> implClass =
-          (Class<NutchIndexWriter>) Class.forName(clazz);
+        final Class<NutchIndexWriter> implClass = (Class<NutchIndexWriter>) Class
+            .forName(clazz);
         writers[i] = implClass.newInstance();
       } catch (final Exception e) {
         throw new RuntimeException("Couldn't create " + clazz, e);
@@ -37,7 +37,7 @@
   }
 
   public static void addClassToConf(Configuration conf,
-                                    Class<? extends NutchIndexWriter> clazz) {
+      Class<? extends NutchIndexWriter> clazz) {
     final String classes = conf.get("indexer.writer.classes");
     final String newClass = clazz.getName();
 
Index: src/java/org/apache/nutch/plugin/PluginRuntimeException.java
===================================================================
--- src/java/org/apache/nutch/plugin/PluginRuntimeException.java	(revision 1188252)
+++ src/java/org/apache/nutch/plugin/PluginRuntimeException.java	(working copy)
@@ -16,6 +16,7 @@
  * limitations under the License.
  */
 package org.apache.nutch.plugin;
+
 /**
  * <code>PluginRuntimeException</code> will be thrown until a exception in the
  * plugin managemnt occurs.
Index: src/java/org/apache/nutch/plugin/CircularDependencyException.java
===================================================================
--- src/java/org/apache/nutch/plugin/CircularDependencyException.java	(revision 1188252)
+++ src/java/org/apache/nutch/plugin/CircularDependencyException.java	(working copy)
@@ -16,7 +16,6 @@
  */
 package org.apache.nutch.plugin;
 
-
 /**
  * <code>CircularDependencyException</code> will be thrown if a circular
  * dependency is detected.
Index: src/java/org/apache/nutch/plugin/Pluggable.java
===================================================================
--- src/java/org/apache/nutch/plugin/Pluggable.java	(revision 1188252)
+++ src/java/org/apache/nutch/plugin/Pluggable.java	(working copy)
@@ -17,16 +17,15 @@
 package org.apache.nutch.plugin;
 
 /**
- * Defines the capability of a class to be plugged into Nutch.
- * This is a common interface that must be implemented by all
- * Nutch Extension Points.
- *
+ * Defines the capability of a class to be plugged into Nutch. This is a common
+ * interface that must be implemented by all Nutch Extension Points.
+ * 
  * @author J&eacute;r&ocirc;me Charron
- *
+ * 
  * @see <a href="http://wiki.apache.org/nutch/AboutPlugins">About Plugins</a>
- * @see <a href="package-summary.html#package_description">
- *      plugin package description</a>
+ * @see <a href="package-summary.html#package_description"> plugin package
+ *      description</a>
  */
 public interface Pluggable {
-  
+
 }
Index: src/java/org/apache/nutch/plugin/PluginManifestParser.java
===================================================================
--- src/java/org/apache/nutch/plugin/PluginManifestParser.java	(revision 1188252)
+++ src/java/org/apache/nutch/plugin/PluginManifestParser.java	(working copy)
@@ -39,8 +39,8 @@
 import org.xml.sax.SAXException;
 
 /**
- * The <code>PluginManifestParser</code> parser just parse the manifest file
- * in all plugin directories.
+ * The <code>PluginManifestParser</code> parser just parse the manifest file in
+ * all plugin directories.
  * 
  * @author joa23
  */
@@ -185,7 +185,7 @@
     PluginDescriptor pluginDescriptor = new PluginDescriptor(id, version, name,
         providerName, pluginClazz, pPath, this.conf);
     LOG.debug("plugin: id=" + id + " name=" + name + " version=" + version
-          + " provider=" + providerName + "class=" + pluginClazz);
+        + " provider=" + providerName + "class=" + pluginClazz);
     parseExtension(rootElement, pluginDescriptor);
     parseExtensionPoints(rootElement, pluginDescriptor);
     parseLibraries(rootElement, pluginDescriptor);
@@ -292,8 +292,8 @@
             if (parameters != null) {
               for (int k = 0; k < parameters.getLength(); k++) {
                 Element param = (Element) parameters.item(k);
-                extension.addAttribute(param.getAttribute(ATTR_NAME), param
-                    .getAttribute("value"));
+                extension.addAttribute(param.getAttribute(ATTR_NAME),
+                    param.getAttribute("value"));
               }
             }
             pPluginDescriptor.addExtension(extension);
Index: src/java/org/apache/nutch/plugin/ExtensionPoint.java
===================================================================
--- src/java/org/apache/nutch/plugin/ExtensionPoint.java	(revision 1188252)
+++ src/java/org/apache/nutch/plugin/ExtensionPoint.java	(working copy)
@@ -15,6 +15,7 @@
  * limitations under the License.
  */
 package org.apache.nutch.plugin;
+
 import java.util.ArrayList;
 
 /**
@@ -76,7 +77,8 @@
   /**
    * Sets the extensionPointId.
    * 
-   * @param pId extension point id
+   * @param pId
+   *          extension point id
    */
   private void setId(String pId) {
     ftId = pId;
Index: src/java/org/apache/nutch/plugin/MissingDependencyException.java
===================================================================
--- src/java/org/apache/nutch/plugin/MissingDependencyException.java	(revision 1188252)
+++ src/java/org/apache/nutch/plugin/MissingDependencyException.java	(working copy)
@@ -17,8 +17,8 @@
 package org.apache.nutch.plugin;
 
 /**
- * <code>MissingDependencyException</code> will be thrown if a plugin
- * dependency cannot be found.
+ * <code>MissingDependencyException</code> will be thrown if a plugin dependency
+ * cannot be found.
  * 
  * @author J&eacute;r&ocirc;me Charron
  */
Index: src/java/org/apache/nutch/plugin/Extension.java
===================================================================
--- src/java/org/apache/nutch/plugin/Extension.java	(revision 1188252)
+++ src/java/org/apache/nutch/plugin/Extension.java	(working copy)
@@ -98,8 +98,10 @@
    * Adds a attribute and is only used until model creation at plugin system
    * start up.
    * 
-   * @param pKey a key
-   * @param pValue a value
+   * @param pKey
+   *          a key
+   * @param pValue
+   *          a value
    */
   public void addAttribute(String pKey, String pValue) {
     fAttributes.put(pKey, pValue);
@@ -109,7 +111,8 @@
    * Sets the Class that implement the concret extension and is only used until
    * model creation at system start up.
    * 
-   * @param extensionClazz The extensionClasname to set
+   * @param extensionClazz
+   *          The extensionClasname to set
    */
   public void setClazz(String extensionClazz) {
     fClazz = extensionClazz;
@@ -119,7 +122,8 @@
    * Sets the unique extension Id and is only used until model creation at
    * system start up.
    * 
-   * @param extensionID The extensionID to set
+   * @param extensionID
+   *          The extensionID to set
    */
   public void setId(String extensionID) {
     fId = extensionID;
Index: src/java/org/apache/nutch/plugin/PluginDescriptor.java
===================================================================
--- src/java/org/apache/nutch/plugin/PluginDescriptor.java	(revision 1188252)
+++ src/java/org/apache/nutch/plugin/PluginDescriptor.java	(working copy)
@@ -30,12 +30,11 @@
 import org.apache.hadoop.conf.Configuration;
 
 /**
- * The <code>PluginDescriptor</code> provide access to all meta information of
- * a nutch-plugin, as well to the internationalizable resources and the plugin
- * own classloader. There are meta information about <code>Plugin</code>,
- * <code>ExtensionPoint</code> and <code>Extension</code>. To provide
- * access to the meta data of a plugin via a descriptor allow a lazy loading
- * mechanism.
+ * The <code>PluginDescriptor</code> provide access to all meta information of a
+ * nutch-plugin, as well to the internationalizable resources and the plugin own
+ * classloader. There are meta information about <code>Plugin</code>,
+ * <code>ExtensionPoint</code> and <code>Extension</code>. To provide access to
+ * the meta data of a plugin via a descriptor allow a lazy loading mechanism.
  * 
  * @author joa23
  */
@@ -53,7 +52,8 @@
   private ArrayList<URL> fNotExportedLibs = new ArrayList<URL>();
   private ArrayList<Extension> fExtensions = new ArrayList<Extension>();
   private PluginClassLoader fClassLoader;
-  public static final Logger LOG = LoggerFactory.getLogger(PluginDescriptor.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(PluginDescriptor.class);
   private Configuration fConf;
 
   /**
@@ -206,7 +206,8 @@
   /**
    * Adds a dependency
    * 
-   * @param pId id of the dependent plugin
+   * @param pId
+   *          id of the dependent plugin
    */
   public void addDependency(String pId) {
     fDependencies.add(pId);
@@ -285,8 +286,8 @@
       LOG.debug(getPluginId() + " " + e.toString());
     }
     URL[] urls = arrayList.toArray(new URL[arrayList.size()]);
-    fClassLoader = new PluginClassLoader(urls, PluginDescriptor.class
-        .getClassLoader());
+    fClassLoader = new PluginClassLoader(urls,
+        PluginDescriptor.class.getClassLoader());
     return fClassLoader;
   }
 
@@ -308,7 +309,7 @@
     for (String id : pDescriptor.getDependencies()) {
       PluginDescriptor descriptor = PluginRepository.get(fConf)
           .getPluginDescriptor(id);
-      for (URL url: descriptor.getExportedLibUrls()) {
+      for (URL url : descriptor.getExportedLibUrls()) {
         pLibs.add(url);
       }
       collectLibs(pLibs, descriptor);
Index: src/java/org/apache/nutch/plugin/Plugin.java
===================================================================
--- src/java/org/apache/nutch/plugin/Plugin.java	(revision 1188252)
+++ src/java/org/apache/nutch/plugin/Plugin.java	(working copy)
@@ -33,8 +33,8 @@
  * The <code>Plugin</code> will be startuped and shutdown by the nutch plugin
  * management system.
  * 
- * A possible usecase of the <code>Plugin</code> implementation is to create
- * or close a database connection.
+ * A possible usecase of the <code>Plugin</code> implementation is to create or
+ * close a database connection.
  * 
  * @author joa23
  */
@@ -81,7 +81,8 @@
   }
 
   /**
-   * @param descriptor The descriptor to set
+   * @param descriptor
+   *          The descriptor to set
    */
   private void setDescriptor(PluginDescriptor descriptor) {
     fDescriptor = descriptor;
Index: src/java/org/apache/nutch/plugin/PluginRepository.java
===================================================================
--- src/java/org/apache/nutch/plugin/PluginRepository.java	(revision 1188252)
+++ src/java/org/apache/nutch/plugin/PluginRepository.java	(working copy)
@@ -55,7 +55,8 @@
 
   private Configuration conf;
 
-  public static final Logger LOG = LoggerFactory.getLogger(PluginRepository.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(PluginRepository.class);
 
   /**
    * @throws PluginRuntimeException
@@ -80,7 +81,7 @@
     try {
       installExtensions(fRegisteredPlugins);
     } catch (PluginRuntimeException e) {
-        LOG.error(e.toString());
+      LOG.error(e.toString());
       throw new RuntimeException(e.getMessage());
     }
     displayStatus();
@@ -107,8 +108,8 @@
       return;
     }
 
-    for (PluginDescriptor plugin: plugins) {
-      for(ExtensionPoint point:plugin.getExtenstionPoints()) {
+    for (PluginDescriptor plugin : plugins) {
+      for (ExtensionPoint point : plugin.getExtenstionPoints()) {
         String xpId = point.getId();
         LOG.debug("Adding extension point " + xpId);
         fExtensionPoints.put(xpId, point);
@@ -123,7 +124,7 @@
       throws PluginRuntimeException {
 
     for (PluginDescriptor descriptor : pRegisteredPlugins) {
-      for(Extension extension:descriptor.getExtensions()) {
+      for (Extension extension : descriptor.getExtensions()) {
         String xpId = extension.getTargetPoint();
         ExtensionPoint point = getExtensionPoint(xpId);
         if (point == null) {
@@ -151,7 +152,7 @@
     branch.put(plugin.getPluginId(), plugin);
 
     // Otherwise, checks each dependency
-    for(String id:plugin.getDependencies()) {
+    for (String id : plugin.getDependencies()) {
       PluginDescriptor dependency = plugins.get(id);
       if (dependency == null) {
         throw new MissingDependencyException("Missing dependency " + id
Index: src/plugin/index-anchor/src/java/org/apache/nutch/indexer/anchor/AnchorIndexingFilter.java
===================================================================
--- src/plugin/index-anchor/src/java/org/apache/nutch/indexer/anchor/AnchorIndexingFilter.java	(revision 1188252)
+++ src/plugin/index-anchor/src/java/org/apache/nutch/indexer/anchor/AnchorIndexingFilter.java	(working copy)
@@ -30,12 +30,12 @@
 import org.apache.nutch.parse.Parse;
 
 /**
- * Indexing filter that indexes all inbound anchor text for a document. 
+ * Indexing filter that indexes all inbound anchor text for a document.
  */
-public class AnchorIndexingFilter
-  implements IndexingFilter {
+public class AnchorIndexingFilter implements IndexingFilter {
 
-  public static final Logger LOG = LoggerFactory.getLogger(AnchorIndexingFilter.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(AnchorIndexingFilter.class);
   private Configuration conf;
   private boolean deduplicate = false;
 
@@ -50,14 +50,13 @@
     return this.conf;
   }
 
-  public NutchDocument filter(NutchDocument doc, Parse parse, Text url, CrawlDatum datum,
-    Inlinks inlinks) throws IndexingException {
+  public NutchDocument filter(NutchDocument doc, Parse parse, Text url,
+      CrawlDatum datum, Inlinks inlinks) throws IndexingException {
 
-    String[] anchors = (inlinks != null ? inlinks.getAnchors()
-      : new String[0]);
+    String[] anchors = (inlinks != null ? inlinks.getAnchors() : new String[0]);
 
     // https://issues.apache.org/jira/browse/NUTCH-1037
-    WeakHashMap<String,Integer> map = new WeakHashMap<String,Integer>();
+    WeakHashMap<String, Integer> map = new WeakHashMap<String, Integer>();
 
     for (int i = 0; i < anchors.length; i++) {
       if (deduplicate) {
@@ -77,5 +76,4 @@
 
     return doc;
   }
-
 }
Index: src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagIndexingFilter.java
===================================================================
--- src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagIndexingFilter.java	(revision 1188252)
+++ src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagIndexingFilter.java	(working copy)
@@ -16,7 +16,6 @@
  */
 package org.apache.nutch.microformats.reltag;
 
-
 // Nutch imports
 import org.apache.nutch.crawl.CrawlDatum;
 import org.apache.nutch.crawl.Inlinks;
@@ -29,29 +28,27 @@
 // Hadoop imports
 import org.apache.hadoop.conf.Configuration;
 
-
 /**
- * An {@link org.apache.nutch.indexer.IndexingFilter} that 
- * add <code>tag</code> field(s) to the document.
- *
+ * An {@link org.apache.nutch.indexer.IndexingFilter} that add <code>tag</code>
+ * field(s) to the document.
+ * 
  * @see <a href="http://www.microformats.org/wiki/rel-tag">
  *      http://www.microformats.org/wiki/rel-tag</a>
  * @author J&eacute;r&ocirc;me Charron
  */
 public class RelTagIndexingFilter implements IndexingFilter {
-  
 
   private Configuration conf;
 
-
   // Inherited JavaDoc
-  public NutchDocument filter(NutchDocument doc, Parse parse, Text url, CrawlDatum datum, Inlinks inlinks)
-    throws IndexingException {
+  public NutchDocument filter(NutchDocument doc, Parse parse, Text url,
+      CrawlDatum datum, Inlinks inlinks) throws IndexingException {
 
     // Check if some Rel-Tags found, possibly put there by RelTagParser
-    String[] tags = parse.getData().getParseMeta().getValues(RelTagParser.REL_TAG);
+    String[] tags = parse.getData().getParseMeta()
+        .getValues(RelTagParser.REL_TAG);
     if (tags != null) {
-      for (int i=0; i<tags.length; i++) {
+      for (int i = 0; i < tags.length; i++) {
         doc.add("tag", tags[i]);
       }
     }
@@ -59,10 +56,11 @@
     return doc;
   }
 
-  /* ----------------------------- *
-   * <implementation:Configurable> *
-   * ----------------------------- */
-  
+  /*
+   * ----------------------------- * <implementation:Configurable> *
+   * -----------------------------
+   */
+
   public void setConf(Configuration conf) {
     this.conf = conf;
   }
@@ -70,9 +68,10 @@
   public Configuration getConf() {
     return this.conf;
   }
-  
-  /* ------------------------------ *
-   * </implementation:Configurable> *
-   * ------------------------------ */
-  
+
+  /*
+   * ------------------------------ * </implementation:Configurable> *
+   * ------------------------------
+   */
+
 }
Index: src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagParser.java
===================================================================
--- src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagParser.java	(revision 1188252)
+++ src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagParser.java	(working copy)
@@ -44,30 +44,27 @@
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.Text;
 
-
 /**
  * Adds microformat rel-tags of document if found.
- *
+ * 
  * @see <a href="http://www.microformats.org/wiki/rel-tag">
  *      http://www.microformats.org/wiki/rel-tag</a>
  * @author J&eacute;r&ocirc;me Charron
  */
 public class RelTagParser implements HtmlParseFilter {
-  
+
   public final static Logger LOG = LoggerFactory.getLogger(RelTagParser.class);
 
   public final static String REL_TAG = "Rel-Tag";
-  
-  
+
   private Configuration conf = null;
-  
-  
+
   /**
    * Scan the HTML document looking at possible rel-tags
    */
   public ParseResult filter(Content content, ParseResult parseResult,
-    HTMLMetaTags metaTags, DocumentFragment doc) {
-    
+      HTMLMetaTags metaTags, DocumentFragment doc) {
+
     // get parse obj
     Parse parse = parseResult.get(content.getUrl());
     // Trying to find the document's rel-tags
@@ -84,16 +81,16 @@
   private static class Parser {
 
     Set tags = null;
-    
+
     Parser(Node node) {
       tags = new TreeSet();
       parse(node);
     }
-  
+
     Set getRelTags() {
       return tags;
     }
-    
+
     void parse(Node node) {
 
       if (node.getNodeType() == Node.ELEMENT_NODE) {
@@ -117,34 +114,35 @@
           }
         }
       }
-      
+
       // Recurse
       NodeList children = node.getChildNodes();
-      for (int i=0; children != null && i<children.getLength(); i++) {
+      for (int i = 0; children != null && i < children.getLength(); i++) {
         parse(children.item(i));
       }
     }
-    
+
     private final static String parseTag(String url) {
       String tag = null;
       try {
         URL u = new URL(url);
         String path = u.getPath();
-        tag = URLDecoder.decode(path.substring(path.lastIndexOf('/') + 1), "UTF-8");
+        tag = URLDecoder.decode(path.substring(path.lastIndexOf('/') + 1),
+            "UTF-8");
       } catch (Exception e) {
         // Malformed tag...
         tag = null;
       }
       return tag;
     }
-    
+
   }
 
+  /*
+   * ----------------------------- * <implementation:Configurable> *
+   * -----------------------------
+   */
 
-  /* ----------------------------- *
-   * <implementation:Configurable> *
-   * ----------------------------- */
-  
   public void setConf(Configuration conf) {
     this.conf = conf;
   }
@@ -152,9 +150,10 @@
   public Configuration getConf() {
     return this.conf;
   }
-  
-  /* ------------------------------ *
-   * </implementation:Configurable> *
-   * ------------------------------ */
-  
+
+  /*
+   * ------------------------------ * </implementation:Configurable> *
+   * ------------------------------
+   */
+
 }
Index: src/plugin/tld/src/java/org/apache/nutch/scoring/tld/TLDScoringFilter.java
===================================================================
--- src/plugin/tld/src/java/org/apache/nutch/scoring/tld/TLDScoringFilter.java	(revision 1188252)
+++ src/plugin/tld/src/java/org/apache/nutch/scoring/tld/TLDScoringFilter.java	(working copy)
@@ -35,9 +35,9 @@
 import org.apache.nutch.util.domain.DomainSuffix;
 import org.apache.nutch.util.domain.DomainSuffixes;
 
-
 /**
  * Scoring filter to boost tlds.
+ * 
  * @author Enis Soztutar &lt;enis.soz.nutch@gmail.com&gt;
  */
 public class TLDScoringFilter implements ScoringFilter {
@@ -56,10 +56,10 @@
     NutchField tlds = doc.getField("tld");
     float boost = 1.0f;
 
-    if(tlds != null) {
-      for(Object tld : tlds.getValues()) {
+    if (tlds != null) {
+      for (Object tld : tlds.getValues()) {
         DomainSuffix entry = tldEntries.get(tld.toString());
-        if(entry != null)
+        if (entry != null)
           boost *= entry.getBoost();
       }
     }
@@ -93,9 +93,8 @@
       throws ScoringFilterException {
   }
 
-  public void updateDbScore(Text url, CrawlDatum old,
-                            CrawlDatum datum, List<CrawlDatum> inlinked)
-  throws ScoringFilterException {
+  public void updateDbScore(Text url, CrawlDatum old, CrawlDatum datum,
+      List<CrawlDatum> inlinked) throws ScoringFilterException {
   }
 
   public Configuration getConf() {
@@ -105,9 +104,10 @@
   public void setConf(Configuration conf) {
     this.conf = conf;
   }
-  public CrawlDatum distributeScoreToOutlinks(Text fromUrl, ParseData parseData, 
-          Collection<Entry<Text, CrawlDatum>> targets, CrawlDatum adjust,
-          int allCount) throws ScoringFilterException {
+
+  public CrawlDatum distributeScoreToOutlinks(Text fromUrl,
+      ParseData parseData, Collection<Entry<Text, CrawlDatum>> targets,
+      CrawlDatum adjust, int allCount) throws ScoringFilterException {
     return adjust;
   }
 
Index: src/plugin/tld/src/java/org/apache/nutch/indexer/tld/TLDIndexingFilter.java
===================================================================
--- src/plugin/tld/src/java/org/apache/nutch/indexer/tld/TLDIndexingFilter.java	(revision 1188252)
+++ src/plugin/tld/src/java/org/apache/nutch/indexer/tld/TLDIndexingFilter.java	(working copy)
@@ -34,23 +34,25 @@
 
 /**
  * Adds the Top level domain extensions to the index
+ * 
  * @author Enis Soztutar &lt;enis.soz.nutch@gmail.com&gt;
  */
 public class TLDIndexingFilter implements IndexingFilter {
-  public static final Logger LOG = LoggerFactory.getLogger(TLDIndexingFilter.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(TLDIndexingFilter.class);
 
   private Configuration conf;
 
-  public NutchDocument filter(NutchDocument doc, Parse parse, Text urlText, CrawlDatum datum, Inlinks inlinks)
-  throws IndexingException {
+  public NutchDocument filter(NutchDocument doc, Parse parse, Text urlText,
+      CrawlDatum datum, Inlinks inlinks) throws IndexingException {
 
     try {
       URL url = new URL(urlText.toString());
       DomainSuffix d = URLUtil.getDomainSuffix(url);
-      
+
       doc.add("tld", d.getDomain());
-      
-    }catch (Exception ex) {
+
+    } catch (Exception ex) {
       LOG.warn(ex.toString());
     }
 
Index: src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java
===================================================================
--- src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java	(revision 1188252)
+++ src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java	(working copy)
@@ -1,19 +1,19 @@
 /*
-* Licensed to the Apache Software Foundation (ASF) under one or more
-* contributor license agreements.  See the NOTICE file distributed with
-* this work for additional information regarding copyright ownership.
-* The ASF licenses this file to You under the Apache License, Version 2.0
-* (the "License"); you may not use this file except in compliance with
-* the License.  You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 package org.apache.nutch.parse.js;
 
 import java.io.BufferedReader;
@@ -57,11 +57,10 @@
 import org.w3c.dom.NodeList;
 
 /**
- * This class is a heuristic link extractor for JavaScript files and
- * code snippets. The general idea of a two-pass regex matching comes from
- * Heritrix. Parts of the code come from OutlinkExtractor.java
- * by Stephan Strittmatter.
- *
+ * This class is a heuristic link extractor for JavaScript files and code
+ * snippets. The general idea of a two-pass regex matching comes from Heritrix.
+ * Parts of the code come from OutlinkExtractor.java by Stephan Strittmatter.
+ * 
  * @author Andrzej Bialecki &lt;ab@getopt.org&gt;
  */
 public class JSParseFilter implements HtmlParseFilter, Parser {
@@ -70,9 +69,9 @@
   private static final int MAX_TITLE_LEN = 80;
 
   private Configuration conf;
-  
+
   public ParseResult filter(Content content, ParseResult parseResult,
-    HTMLMetaTags metaTags, DocumentFragment doc) {
+      HTMLMetaTags metaTags, DocumentFragment doc) {
 
     Parse parse = parseResult.get(content.getUrl());
 
@@ -86,37 +85,43 @@
       outlinks.addAll(list);
       ParseStatus status = parse.getData().getStatus();
       String text = parse.getText();
-      Outlink[] newlinks = (Outlink[])outlinks.toArray(new Outlink[outlinks.size()]);
-      ParseData parseData = new ParseData(status, title, newlinks,
-                                          parse.getData().getContentMeta(),
-                                          parse.getData().getParseMeta());
+      Outlink[] newlinks = (Outlink[]) outlinks.toArray(new Outlink[outlinks
+          .size()]);
+      ParseData parseData = new ParseData(status, title, newlinks, parse
+          .getData().getContentMeta(), parse.getData().getParseMeta());
 
       // replace original parse obj with new one
       parseResult.put(content.getUrl(), new ParseText(text), parseData);
     }
     return parseResult;
   }
-  
-  private void walk(Node n, Parse parse, HTMLMetaTags metaTags, String base, List outlinks) {
+
+  private void walk(Node n, Parse parse, HTMLMetaTags metaTags, String base,
+      List outlinks) {
     if (n instanceof Element) {
       String name = n.getNodeName();
       if (name.equalsIgnoreCase("script")) {
         String lang = null;
         Node lNode = n.getAttributes().getNamedItem("language");
-        if (lNode == null) lang = "javascript";
-        else lang = lNode.getNodeValue();
+        if (lNode == null)
+          