#!/usr/bin/perl -w
use strict;
#################################
# History:
#
# version 10
#    * require min_overlap of names in promote_entity_mention_to_mention_entity
#    * give 50% discount to precision and recall for attribute recognition errors
#    * trim leading/trailing whitespace from tag attributes
#    * relation type/subtype "OTHER-AFF"/"Other" added to symmetric list
#    * bug fix - always treat multi-line input strings as single string in regexp ops
#    * both TYPE and SUBTYPE must match for an entity to be "correct"
#
# version 09
#    * bug fix in calculation of relation score
#    * addition of relation mention evaluation
#    * value formula for TERN evaluation changed and simplified
#
# version 08
#    * bug fix in print_timex2_mapping (some attribute errors weren't detected)
#    * Recall/Precision/F-measure added to conditional error analysis
#    * TERN evaluation restricted to timex type = "TIMEX2"
#    * Change in timex2 value parameters
#    * in print_timex2_mapping:
#        - add attributes to printout of false alarms
#        - limit attribute listing to those being evaluated
#        - eliminate NON_SPECIFIC from the list of evaluated attributes
#    * added subtype "other" to entities of type LOCATION
#    * bug fix in extract_sgml_tag_attribute
#    * change timex2 value formula to weight value according to attributes
#
# version 07
#    * bug fix in span_overlap
#    * timex2 evaluation added
#    * printout suppressed for entities/relations/events/timex2
#      whenever there is no reference data for them
#
# version 06
#    * scoring upgrade to match ace04-evalplan-v2 (May 22, 2004)
#    * input format conversion from ALF to APF
#    * LEXICALCONDITION made optional in relation_mention
#    * bug fix - eliminate demand for extent in get_relation_argument
#
# version 05
#    * parameter sets contain modified mention coreference error weight
#
# version 04
#    * bug fix in calculating entity FA and relation argument FA values
#
# version 03
#    * Added evaluation of the entity mention task:  to detect mentions
#      separate from entity assignment and associated coreference issues.
#    * Mention false alarms are deweighted when they exist in the reference
#      as valid mentions (i.e., deweight for "mere" coreference errors).
#    * Upgraded relation and event scoring to comprehend document-level
#      (cross-document) scoring, as is done for entities.
#
# version 02
#    * Mapping speed-up by grouping entities/relations/events to be mapped
#      into minimum sized cohort sets and mapping on these smaller sets.
#    * Bug in mapping algorithm fixed.
#
# version 01
#    This utility provides evaluation functionality for the ACE program.
#    * Derives from ace-eval-v03.
#    * Upgraded to include ACE events.
#    * Converted to read data from the new DTD (v3.0.2).
#    * Uses new scoring formulas
#    * Provides scoring mode control - selection of different sets
#      of scoring parameters
#
#################################
# SCORING PARAMETERS:

# min_overlap is the minimum mutual fractional overlap allowed
# for a mention head or a name to be declared as successfully detected.
use vars qw ($min_overlap);
    $min_overlap = 0.3; #minimum fractional overlap for mention detection

# max_diff is the maximum extent difference allowed for names and 
#mentions to be declared a "match".
use vars qw ($max_diff_chars $max_diff_time $max_diff_xy);
#       max_diff_chars is the maximum extent difference
#       in characters for text sources.
    $max_diff_chars = 4;
#       max_diff_time is the maximum extent difference
#       in seconds for audio sources.
    $max_diff_time = 0.4;
#       max_diff_xy is the maximum extent difference
#       in centimeters for image mentions.
    $max_diff_xy = 0.4;

my (%entity_err_wgt, $entity_fa_wgt);
my (%entity_mention_err_wgt, $entity_mention_fa_wgt, $entity_mention_ref_fa_wgt);
my (%relation_err_wgt, $relation_fa_wgt, $relation_argument_threshold);
my (%event_err_wgt, $event_fa_wgt);
my ($event_participant_role_err_wgt, $event_participant_fa_wgt, $event_participant_threshold);
my ($timex2_fa_wgt);

my %parameter_set =
    (MINSCORE => {entity_err_wgt => {TYPE => 0.00, SUBTYPE => 0.00, CLASS => 0.00},
		  entity_fa_wgt => 1.00,
		  entity_mention_err_wgt => {TYPE => 0.00, ROLE => 0.00, STYLE => 0.00},
		  entity_mention_fa_wgt => 1.00,
		  entity_mention_ref_fa_wgt => 1.00,

		  relation_err_wgt => {TYPE => 0.00, SUBTYPE => 0.00},
		  relation_fa_wgt => 1.00,
		  relation_argument_threshold => 0.00,

		  event_err_wgt => {TYPE => 0.00, MODALITY => 0.00},
		  event_fa_wgt => 1.00,
		  event_participant_role_err_wgt => 0.00,
		  event_participant_fa_wgt => 1.00,
		  event_participant_threshold => 0.75,

		  timex2_fa_wgt => 1.00,
	      },

     HARD => {entity_err_wgt => {TYPE => 0.00, SUBTYPE => 0.50, CLASS => 0.50},
	      entity_fa_wgt => 1.00,
	      entity_mention_err_wgt => {TYPE => 0.50, ROLE => 0.50, STYLE => 0.50},
	      entity_mention_fa_wgt => 1.00,
	      entity_mention_ref_fa_wgt => 0.50,

	      relation_err_wgt => {TYPE => 0.00, SUBTYPE => 0.50},
	      relation_fa_wgt => 1.00,
	      relation_argument_threshold => 0.00,

	      event_err_wgt => {TYPE => 0.00, MODALITY => 0.50},
	      event_fa_wgt => 1.00,
	      event_participant_role_err_wgt => 0.00,
	      event_participant_fa_wgt => 1.00,
	      event_participant_threshold => 0.50,

	      timex2_fa_wgt => 1.00,
	  },

     DEFAULT => {entity_err_wgt => {TYPE => 0.50, SUBTYPE => 0.90, CLASS => 0.75},
		 entity_fa_wgt => 0.75,
		 entity_mention_err_wgt => {TYPE => 0.90, ROLE => 0.90, STYLE => 0.90},
		 entity_mention_fa_wgt => 0.75,
		 entity_mention_ref_fa_wgt => 0.00,

		 relation_err_wgt => {TYPE => 0.50, SUBTYPE => 0.90},
		 relation_fa_wgt => 0.75,
		 relation_argument_threshold => 0.00,

		 event_err_wgt => {TYPE => 0.50, MODALITY => 0.75},
		 event_fa_wgt => 0.75,
		 event_participant_role_err_wgt => 0.50,
		 event_participant_fa_wgt => 0.75,
		 event_participant_threshold => 0.25,

		 timex2_fa_wgt => 0.75,
	     },

     EASY => {entity_err_wgt => {TYPE => 0.75, SUBTYPE => 1.00, CLASS => 1.00},
	      entity_fa_wgt => 0.50,
	      entity_mention_err_wgt => {TYPE => 1.00, ROLE => 1.00, STYLE => 1.00},
	      entity_mention_fa_wgt => 0.50,
	      entity_mention_ref_fa_wgt => 0.00,

	      relation_err_wgt => {TYPE => 0.75, SUBTYPE => 1.00},
	      relation_fa_wgt => 0.50,
	      relation_argument_threshold => 0.00,

	      event_err_wgt => {TYPE => 0.75, MODALITY => 1.00},
	      event_fa_wgt => 0.50,
	      event_participant_role_err_wgt => 0.75,
	      event_participant_fa_wgt => 0.50,
	      event_participant_threshold => 0.00,

	      timex2_fa_wgt => 0.50,
	  },

     MAXSCORE => {entity_err_wgt => {TYPE => 1.00, SUBTYPE => 1.00, CLASS => 1.00},
		  entity_fa_wgt => 0.00,
		  entity_mention_err_wgt => {TYPE => 1.00, ROLE => 1.00, STYLE => 1.00},
		  entity_mention_fa_wgt => 0.00,
		  entity_mention_ref_fa_wgt => 0.00,

		  relation_err_wgt => {TYPE => 1.00, SUBTYPE => 1.00},
		  relation_fa_wgt => 0.00,
		  relation_argument_threshold => 0.00,

		  event_err_wgt => {TYPE => 1.00, MODALITY => 1.00},
		  event_fa_wgt => 0.00,
		  event_participant_role_err_wgt => 1.00,
		  event_participant_fa_wgt => 0.00,
		  event_participant_threshold => 0.00,

		  timex2_fa_wgt => 0.00,
	       },
     );

#Entity parameters

my %normalize_mention_type =
    (NAM => "NAME",
     NOM => "NOMINAL",
     PRE => "PREMODIFIER",
     PRO => "PRONOUN");

my %mention_type_wgt =
    (NAME        => 1.00,
     NOMINAL     => 0.20,
     PREMODIFIER => 0.20,
     PRONOUN     => 0.04);

my %normalize_entity_type =
    (PER => "PERSON",
     ORG => "ORGANIZATION",
     VEH => "VEHICLE",
     WEA => "WEAPON",
     GPE => "GPE",
     GSP => "GPE",
     LOC => "LOCATION",
     FAC => "FACILITY",
     TMP => "TMP");

my %entity_type_wgt =
    (PERSON       => 1.00,
     ORGANIZATION => 0.50,
     VEHICLE      => 0.50,
     WEAPON       => 0.50,
     GPE          => 0.25,
     LOCATION     => 0.10,
     FACILITY     => 0.05,
     TMP          => 0.05);

my %normalize_entity_class =
    (SPE => "SPECIFIC",
     SPC => "SPECIFIC",
     GEN => "GENERIC",
     NEG => "NEGATIVE",
     UND => "UNDER-SPECIFIED",
     USP => "UNDER-SPECIFIED");

my %entity_class_wgt = 
    (SPECIFIC          => 1.00,
     GENERIC           => 0.00,
     NEGATIVE          => 0.00,
     "UNDER-SPECIFIED" => 0.00);

#Relation parameters

my %relation_type_wgt =
    (PHYS        => 1.00,
     "PER-SOC"   => 1.00,
     "EMP-ORG"   => 1.00,
     ART         => 1.00,
     "OTHER-AFF" => 1.00,
     "GPE-AFF"   => 1.00,
     DISC        => 1.00,
     METONYMY    => 1.00);

#Event parameters

my %normalize_event_type = 
    (BRK => "destroy",
     MAK => "create",
     GIV => "transfer",
     MOV => "move",
     INT => "interact",
     OTH => "other");

my %event_type_wgt = 
    (destroy  => 1.00,
     create   => 1.00,
     transfer => 1.00,
     move     => 1.00,
     interact => 1.00,
     other    => 1.00);

my %normalize_event_modality =
    (Real => "Real",
     NotReal => "NotReal");

my %event_modality_wgt =
    (Real    => 1.00,
     NotReal => 1.00);

#Timex2 parameters

my $timex2_detection_wgt = 0.10;

my %timex2_attribute_wgt =
    (ANCHOR_DIR => 0.25,
     ANCHOR_VAL => 0.50,
     MOD        => 0.10,
     SET        => 0.10,
     VAL        => 1.00);

#Entities are mapped subject to the following constraints:
#  * Each system output entity may map to only one reference entity.
#  * Each reference entity may map to at most one system output entity.
#  * An entity is mapped only if it improves the overall entity score.
#  * The mappings are chosen to maximize the overall entity score.

my $epsilon = 1E-8;
my $required_precision = 1E-12;

my @mention_types = sort {$mention_type_wgt{$b} <=> $mention_type_wgt{$a} ?
			  $mention_type_wgt{$b} <=> $mention_type_wgt{$a} : $a cmp $b} keys %mention_type_wgt;

my @entity_types = sort {$entity_type_wgt{$b} <=> $entity_type_wgt{$a} ?
			 $entity_type_wgt{$b} <=> $entity_type_wgt{$a} : $a cmp $b} keys %entity_type_wgt;

my @entity_classes = sort {$entity_class_wgt{$b} <=> $entity_class_wgt{$a} ?
			   $entity_class_wgt{$b} <=> $entity_class_wgt{$a} : $a cmp $b} keys %entity_class_wgt;

my @relation_types = sort {$relation_type_wgt{$b} <=> $relation_type_wgt{$a} ?
			   $relation_type_wgt{$b} <=> $relation_type_wgt{$a} : $a cmp $b} keys %relation_type_wgt;

my @event_types = sort {$event_type_wgt{$b} <=> $event_type_wgt{$a} ?
			$event_type_wgt{$b} <=> $event_type_wgt{$a} : $a cmp $b} keys %event_type_wgt;

my @event_modalities = sort {$event_modality_wgt{$b} <=> $event_modality_wgt{$a} ?
			     $event_modality_wgt{$b} <=> $event_modality_wgt{$a} : $a cmp $b} keys %event_modality_wgt;

my %entity_subtypes =
    (PERSON       => {"<NONE>" => 1},
     ORGANIZATION => {Government => 1, Commercial => 1, Educational => 1, "Non-Profit" => 1, Other => 1},
     LOCATION     => {Address => 1, Boundary => 1, Celestial => 1, "Water-Body" => 1, "Land-Region-Natural" => 1,
		      "Region-Local" => 1, "Region-Subnational" => 1, "Region-National" => 1,
		      "Region-International" => 1, Other => 1},
     GPE          => {Continent => 1, Nation => 1, "State-or-Province" => 1, "County-or-District" => 1,
		      "Population-Center" => 1, Other => 1},
     FACILITY     => {Building => 1, "Subarea-Building" => 1, "Bounded-Area" => 1, Conduit => 1, Path => 1,
		      Barrier => 1, Plant => 1, Other => 1},
     VEHICLE      => {Land => 1, Air => 1, Water => 1, "Subarea-Vehicle" => 1, Other => 1},
     WEAPON       => {Blunt => 1, Exploding => 1, Sharp => 1, Chemical => 1, Biological => 1, Shooting => 1,
		      Projectile => 1, Nuclear => 1, Other => 1},
     TMP          => {"<NONE>" => 1});

my %relation_subtypes =
    (PHYS        => {Located => 1, Near => 1, "Part-Whole" => 1},
     "PER-SOC"   => {Business => 1, Family => 1, Other => 1},
     "EMP-ORG"   => {"Employ-Executive" => 1, "Employ-Staff" => 1, "Employ-Undetermined" => 1, "Member-of-Group" => 1,
		     Subsidiary => 1, Partner => 1, Other => 1},
     ART         => {"User-or-Owner" => 1, "Inventor-or-Manufacturer" => 1, Other => 1},
     "OTHER-AFF" => {Ethnic => 1, Ideology => 1, Other => 1},
     "GPE-AFF"   => {"Citizen-or-Resident" => 1, "Based-In" => 1, Other => 1},
     DISC        => {"<NONE>" => 1},
     METONYMY    => {"<NONE>" => 1});

my %relation_symmetry =
    ("PHYS"      => {Near => 1},
     "PER-SOC"   => {Business => 1, Family => 1, Other => 1},
     "EMP-ORG"   => {Partner => 1, Other => 1},
     "OTHER-AFF" => {Other => 1});

my @relation_time_attributes = ("TYPE", "VAL", "MOD", "DIR");

#################################
# GLOBAL DATA

use vars qw (%ref_database %tst_database %eval_docs %sys_docs);
use vars qw ($print_entities $print_relations $print_events $print_timex2 $print_all_data $print_err_data);
use vars qw ($input_file $input_doc $input_entity $input_relation $input_event $input_timex2);
use vars qw ($usage);
use vars qw ($best_score);
use vars qw (%entity_type_statistics %entity_subtype_statistics %entity_class_statistics);
use vars qw (%mention_detection_statistics %attribute_confusion_statistics);
use vars qw (%mention_role_statistics %mention_style_statistics);
use vars qw (%name_detection_statistics);
use vars qw (%mapped_entity_value %mapped_entity_document_value);
use vars qw (%mapped_relation_value %mapped_relation_document_value);
use vars qw (%mapped_event_value %mapped_event_document_value);
use vars qw (%mapped_timex2_value %mapped_timex2_document_value);
use vars qw (%relation_type_statistics %relation_subtype_statistics %relation_class_statistics);
use vars qw (%source_types $source_type $data_type);
use vars qw ($total_words);
use vars qw ($t_start_mapping);
use vars qw (@entity_attributes);
use vars qw (@error_types @xdoc_types @entity_count_types);
use vars qw (@entity_value_types @entity_classes @entity_style_types);

my @origins = ("DATABASE", "CORPUS");

@error_types = ("correct", "miss", "fa", "error");
@entity_attributes = ("ID", "TYPE", "SUBTYPE", "CLASS", "ORIGIN", "COUNTRY", "CONTINENT", "NATIONALITY");
@xdoc_types = ("1", ">1");
@entity_value_types = ("<0.1", "0.1-0.3", "0.3-1.0", "1-3", "3-10", ">10");
@entity_count_types = ("1", "2", "3-4", "5-8", ">8");
@entity_style_types = ("LITERAL", "METONYMIC");

use vars qw (@relation_attributes);
use vars qw (@relation_class_types @relation_count_types @relation_argument_errors);
@relation_attributes = ("ID", "TYPE", "SUBTYPE", "CLASS", "ORIGIN");
@relation_count_types = ("1", ">1");
@relation_class_types = ("EXPLICIT", "IMPLICIT");
@relation_argument_errors = ("0", "1", ">1");

my %normalize_relation_type;
foreach my $type (@relation_types) {
    $normalize_relation_type{$type} = $type;
}
my %normalize_relation_class;
foreach my $class (@relation_class_types) {
    $normalize_relation_class{$class} = $class;
}

use vars qw (@event_attributes);
use vars qw (@event_count_types @event_participant_errors);
@event_attributes = ("ID", "TYPE", "MODALITY", "ORIGIN");
@event_count_types = ("1", ">1");
@event_participant_errors = ("0", "1", ">1");

my %normalize_event_participant_role =
    (Agent => "Agent", Object => "Object", Source => "Source", Target => "Target",
     Location => "Location", Time => "Time", Modifier => "Modifier");
my @event_participant_roles = sort keys %normalize_event_participant_role;

use vars qw (%normalize_event_mention_type %normalize_event_mention_source);
%normalize_event_mention_type =
    (NOM => "nominal", SEN => "clausal");
%normalize_event_mention_source =
    (Event => "Event", Relation => "Relation");

use vars qw (%event_type_statistics %event_modality_statistics %participant_role_statistics);

my $max_string_length = 40;

my ($entity_serial_number, $relation_serial_number, $event_serial_number);
my ($score_bound, $max_delta);
my ($date, $time) = date_time_stamp();
print "ACE evaluation run on $date at $time\n";
print "command line:  ", $0, " ", join(" ", @ARGV), "\n";

$usage = "\n\n$0 [-bghsaxemnid] -r <ref_list> -t <tst_list>\n\n".
    "Description:  This Perl program evaluates ACE system performance.\n".
    "\n".
    "Required arguments:\n".
    "  -r <ref_list> is a file containing a list of ACE reference data for\n".
    "      all documents to be evaluated.  These data must be in standard\n".
    "      ACE apf format\n".
    "  -t <tst_list> is a file containing a list of system ACE output data\n".
    "      for all documents to be evaluated.  These data must be in standard\n".
    "      ACE apf format\n".
    "\n".
    "Optional arguments:\n".
    "  -b <ref_database> is a file containing an ACE database\n".
    "         of entities, relations and events\n".
    "  -g includes GENERIC entities\n".
    "  -h prints this help message to STDOUT\n".
    "  -s prints ACE annotation data summary to STDOUT\n".
    "  -m <parameter_set_name> controls scoring mode by providing a selection of\n".
    "         different parameters (pre)defined in named parameter sets\n".
    "\n".
    "      The following flags control the output of comparison data for diagnosis:\n".
    "  -a outputs report data for all entities, relations and events\n".
    "  -e outputs report data for entities, relations and events with errors\n".
    "\n";

#################################
#MAIN

{
    use vars qw ($opt_b $opt_g $opt_h $opt_s $opt_a $opt_e);
    use vars qw ($opt_r $opt_t $opt_d $opt_m);
    use Getopt::Std;
    getopts ('ghsaedr:t:b:m:');
    die $usage if defined($opt_h);
    $print_entities = defined($opt_s);
    $print_relations = defined($opt_s);
    $print_events = defined($opt_s);
    $print_timex2 = defined($opt_s);
    $print_all_data = defined($opt_a);
    $print_err_data = defined($opt_e);
    defined($opt_r) and defined($opt_t) or die "Error in command line$usage";
    $entity_class_wgt{GENERIC} = $entity_class_wgt{SPECIFIC} if $opt_g;
    select_parameter_set ($opt_m ? $opt_m : "DEFAULT");
    print_parameters ();

#read in the data
    my $t0 = (times)[0];
    get_database ($opt_b) if $opt_b;
    get_data ("REF", \%ref_database, \%eval_docs, $opt_r);
    get_data ("TST", \%tst_database, \%sys_docs, $opt_t);
    check_docs ();

#evaluate entities
    my $t1 = (times)[0];
    compute_entity_values ();
    my $t2 = (times)[0];
    map_entities ();
    my $t3 = (times)[0];
    if ((keys %{$ref_database{entities}})>0) {
	print_entity_mapping (\%ref_database, \%tst_database, "entity mapping")
	    if ((keys %{$tst_database{entities}})>0 and ($print_all_data or $print_err_data));
	score_entity_detection ("entity scoring");
	score_entity_attribute_recognition ();
	score_mention_detection ();
	score_mention_attribute_recognition ();
    }

#evaluate relations
    my $t4 = (times)[0];
    compute_relation_values ();
    my $t5 = (times)[0];
    map_relations ();
    my $t6 = (times)[0];
    if ((keys %{$ref_database{relations}})>0) {
	print_relation_mapping (\%ref_database, \%tst_database, "relation mapping")
	    if ((keys %{$tst_database{relations}})>0 and ($print_all_data or $print_err_data));
	score_relation_detection ("relation scoring");
	score_relation_attribute_recognition ();
    }

#evaluate events
    my $t7 = (times)[0];
    compute_event_values ();
    my $t8 = (times)[0];
    map_events ();
    my $t9 = (times)[0];
    if ((keys %{$ref_database{events}})>0) {
	print_event_mapping (\%ref_database, \%tst_database, "event mapping")
	    if ((keys %{$tst_database{events}})>0 and ($print_all_data or $print_err_data));
	score_event_detection ("event scoring");
	score_event_attribute_recognition ();
    }

#evaluate entity mentions without regard to entity affiliation
    my $t10 = (times)[0];
    $ref_database{entities} = $ref_database{mention_entities};
    $tst_database{entities} = $tst_database{mention_entities};
    compute_entity_values ();
    my $t11 = (times)[0];
    map_entities ();
    my $t12 = (times)[0];
    if ((keys %{$ref_database{entities}})>0) {
	print_entity_mapping (\%ref_database, \%tst_database, "entity mention mapping")
	    if ((keys %{$tst_database{entities}})>0 and ($print_all_data or $print_err_data));
	score_entity_detection ("entity mention scoring");
	score_entity_attribute_recognition ();
    }

#evaluate relation mentions without regard to relation affiliation
    my $t13 = (times)[0];
    $ref_database{relations} = $ref_database{mention_relations};
    $tst_database{relations} = $tst_database{mention_relations};
    compute_relation_values ();
    my $t14 = (times)[0];
    map_relations ();
    my $t15 = (times)[0];
    if ((keys %{$ref_database{relations}})>0) {
	print_relation_mapping (\%ref_database, \%tst_database, "relation mention mapping")
	    if ((keys %{$tst_database{relations}})>0 and ($print_all_data or $print_err_data));
	score_relation_detection ("relation mention scoring");
	score_relation_attribute_recognition ();
    }

#evaluate timex2 expressions
    my $t16 = (times)[0];
    compute_timex2_values ();
    my $t17 = (times)[0];
    map_timex2s ();
    my $t18 = (times)[0];
    if ((keys %{$ref_database{timex2s}})>0) {
	print_timex2_mapping (\%ref_database, \%tst_database, "timex2 mapping")
	    if ((keys %{$tst_database{timex2s}})>0 and ($print_all_data or $print_err_data));
	score_timex2_detection ("timex2 scoring");
	score_timex2_attribute_recognition ();
	score_timex2_mention_detection ();
    }
    my $t19 = (times)[0];

    printf "\ndata input:           %8.2f secs to load data\n", $t1-$t0;
    printf "entity eval:          %8.2f secs to compute values,%8.2f secs to map,%8.2f secs to score\n", $t2-$t1, $t3-$t2, $t4-$t3;
    printf "relation eval:        %8.2f secs to compute values,%8.2f secs to map,%8.2f secs to score\n", $t5-$t4, $t6-$t5, $t7-$t6;
    printf "event eval:           %8.2f secs to compute values,%8.2f secs to map,%8.2f secs to score\n", $t8-$t7, $t9-$t8, $t10-$t9;
    printf "entity mention eval:  %8.2f secs to compute values,%8.2f secs to map,%8.2f secs to score\n", $t11-$t10, $t12-$t11, $t13-$t12;
    printf "relation mention eval:%8.2f secs to compute values,%8.2f secs to map,%8.2f secs to score\n", $t14-$t13, $t15-$t14, $t16-$t15;
    printf "timex2 eval:          %8.2f secs to compute values,%8.2f secs to map,%8.2f secs to score\n", $t17-$t16, $t18-$t17, $t19-$t18;
}

#################################

sub get_data {

    my ($label, $db, $docs, $list) = @_;

    open (LIST, $list) or die "\nUnable to open file list '$list'", $usage;
    while ($input_file = <LIST>) {
	chomp $input_file;
	get_document_data ($db, $docs, $input_file);
    }
    close (LIST);
    check_relation_data ($db);
    print_document_data ($label, $docs) if $opt_s;
    print_entity_data ($label, $db) if $print_entities;
    print_relation_data ($label, $db) if $print_relations;
    print_event_data ($label, $db) if $print_events;
    print_timex2_data ($label, $db) if $print_timex2;
}

#################################

sub check_relation_data {

    my ($db) = @_;
    my ($id, $relation, $arg);

    foreach $id (keys %{$db->{relations}}) {
	$relation = $db->{relations}{$id};
	foreach $arg (@{$relation->{arguments}}) {
	    exists $db->{entities}{$arg->{ID}} or die
		"\n\nFATAL INPUT ERROR:  relation '$relation->{ID}' references a non-existent argument".
		    " ('$arg->{ID}')\n\n";
	}
    }
}

#################################

sub select_parameter_set {

    my ($name) = @_;
    my ($p, $type);

    $name = uc $name;
    if (not defined $parameter_set{$name}) {
	print STDERR "\n\nFATAL ERROR:  unknown parameter set name ($name)\n".
	    "    available parameter set names are:\n";
	foreach $name (sort keys %parameter_set) {
	    printf STDERR "        %s\n", $name;
	}
	print STDERR "\n";
	die;
    }

#Entity parameters
    foreach $type (@mention_types) {
	$p = $parameter_set{$name}{mention_type_wgt}{$type};
	$mention_type_wgt{$type} = $p if defined $p;
    }
    foreach $type (@entity_types) {
	$p = $parameter_set{$name}{entity_type_wgt}{$type};
	$entity_type_wgt{$type} = $p if defined $p;
    }
    foreach $type (keys %{$parameter_set{$name}{entity_err_wgt}}) {
	$p = $parameter_set{$name}{entity_err_wgt}{$type};
	$entity_err_wgt{$type} = $p if defined $p;
    }
    foreach $type (keys %{$parameter_set{$name}{entity_mention_err_wgt}}) {
	$p = $parameter_set{$name}{entity_mention_err_wgt}{$type};
	$entity_mention_err_wgt{$type} = $p if defined $p;
    }
    $p = $parameter_set{$name}{entity_fa_wgt};
    $entity_fa_wgt = $p if defined $p;
    $p = $parameter_set{$name}{entity_mention_fa_wgt};
    $entity_mention_fa_wgt = $p if defined $p;
    $p = $parameter_set{$name}{entity_mention_ref_fa_wgt};
    $entity_mention_ref_fa_wgt = $p if defined $p;

#Relation parameters
    foreach $type (keys %{$parameter_set{$name}{relation_type_wgt}}) {
	$p = $parameter_set{$name}{relation_type_wgt}{$type};
	$relation_type_wgt{$type} = $p if defined $p;
    }
    foreach $type (keys %{$parameter_set{$name}{relation_err_wgt}}) {
	$p = $parameter_set{$name}{relation_err_wgt}{$type};
	$relation_err_wgt{$type} = $p if defined $p;
    }
    $p = $parameter_set{$name}{relation_fa_wgt};
    $relation_fa_wgt = $p if defined $p;
    $p = $parameter_set{$name}{relation_argument_threshold};
    $relation_argument_threshold = $p if defined $p;

#Event parameters
    foreach $type (keys %{$parameter_set{$name}{event_type_wgt}}) {
	$p = $parameter_set{$name}{event_type_wgt}{$type};
	$event_type_wgt{$type} = $p if defined $p;
    }
    foreach $type (keys %{$parameter_set{$name}{event_modality_wgt}}) {
	$p = $parameter_set{$name}{event_modality_wgt}{$type};
	$event_modality_wgt{$type} = $p if defined $p;
    }
    foreach $type (keys %{$parameter_set{$name}{event_err_wgt}}) {
	$p = $parameter_set{$name}{event_err_wgt}{$type};
	$event_err_wgt{$type} = $p if defined $p;
    }
    $p = $parameter_set{$name}{event_participant_role_err_wgt};
    $event_participant_role_err_wgt = $p if defined $p;
    $p = $parameter_set{$name}{event_fa_wgt};
    $event_fa_wgt = $p if defined $p;
    $p = $parameter_set{$name}{event_participant_fa_wgt};
    $event_participant_fa_wgt = $p if defined $p;
    $p = $parameter_set{$name}{event_participant_threshold};
    $event_participant_threshold = $p if defined $p;

#Timex2 parameters
    $p = $parameter_set{$name}{timex2_detection_wgt};
    $timex2_detection_wgt = $p if defined $p;
    foreach $type (keys %{$parameter_set{$name}{timex2_attribute_wgt}}) {
	$p = $parameter_set{$name}{timex2_attribute_wgt}{$type};
	$timex2_attribute_wgt{$type} = $p if defined $p;
    }
    $p = $parameter_set{$name}{timex2_fa_wgt};
    $timex2_fa_wgt = $p if defined $p;
}

#################################

sub print_parameters {

    printf "PARAMETERS:\n".
	"  min mutual fractional overlap of matching mention heads or names:\n".
	"%11.3f\n".
	"  max allowable extent difference for names and mentions to match:\n".
	"%11d chars for text sources\n".
	"%11.3f sec for audio sources\n".
	"%11.3f cm for image sources\n",
	    $min_overlap, $max_diff_chars, $max_diff_time, $max_diff_xy;

#Entity parameters
    print "\n";

    print "  Entity mention values:\n";
    foreach my $type (@mention_types) {
	printf "%11.3f for type %s\n", $mention_type_wgt{$type}, $type;
    }

    print "  Entity value weights for entity types:\n";
    foreach my $type (@entity_types) {
	printf "%11.3f for type %s\n", $entity_type_wgt{$type}, $type;
    }

    print "  Entity value weights for entity classes:\n";
    foreach my $class (@entity_classes) {
	printf "%11.3f for class %s\n", $entity_class_wgt{$class}, $class;
    }

    print "  Entity value discounts for entity attribute recognition errors:\n";
    foreach my $type (sort keys %entity_err_wgt) {
	printf "%11.3f for $type errors\n", $entity_err_wgt{$type};
    }

    print "  Entity mention value discounts for mention attribute recognition errors:\n";
    foreach my $type (sort keys %entity_mention_err_wgt) {
	printf "%11.3f for $type errors\n", $entity_mention_err_wgt{$type};
    }

    printf "  Entity value (cost) weight for spurious (false alarm) entities:%6.3f\n", $entity_fa_wgt;
    printf "  Entity mention value (cost) weight for spurious entity mentions:%6.3f\n", $entity_mention_fa_wgt;
    printf "  Entity mention value (cost) discount for incorrect coreference:%6.3f\n", $entity_mention_ref_fa_wgt;

#Relation parameters
    print "\n";

    print "  Relation value weights for relation types:\n";
    foreach my $type (@relation_types) {
	printf "%11.3f for type %s\n", $relation_type_wgt{$type}, $type;
    }

    print "  Relation value discounts for relation attribute recognition errors:\n";
    foreach my $type (sort keys %relation_err_wgt) {
	printf "%11.3f for $type errors\n", $relation_err_wgt{$type};
    }

    printf "  Relation value (cost) weight for spurious (false alarm) relations:%6.3f\n", $relation_fa_wgt;
    printf "  Minimum fractional value for candidate relation arguments:%6.3f\n", $relation_argument_threshold;

#Event parameters
    print "\n";

    print "  Event value weights for event types:\n";
    foreach my $type (@event_types) {
	printf "%11.3f for type %s\n", $event_type_wgt{$type}, $type;
    }

    print "  Event value weights for event modalities:\n";
    foreach my $type (@event_modalities) {
	printf "%11.3f for modality %s\n", $event_modality_wgt{$type}, $type;
    }

    print "  Event value discounts for event attribute recognition errors:\n";
    foreach my $type (sort keys %event_err_wgt) {
	printf "%11.3f for $type errors\n", $event_err_wgt{$type};
    }

    printf "  Event value (cost) weight for spurious (false alarm) events:%6.3f\n", $event_fa_wgt;
    printf "  Event value (cost) weight for spurious event participants:%6.3f\n", $event_participant_fa_wgt;
    printf "  Event participant value (cost) discount for participant role errors:%6.3f\n", $event_participant_role_err_wgt;
    printf "  Minimum fracional value for candidate event participants:%6.3f\n", $event_participant_threshold;

#Timex2 parameters
    print "\n";

    print "  Timex2 attribute value weights for timex2 attributes:\n";
    printf "%11.3f for timex2 detection\n", $timex2_detection_wgt;
    foreach my $type (sort keys %timex2_attribute_wgt) {
	printf "%11.3f for type %s\n", $timex2_attribute_wgt{$type}, $type;
    }

    printf "  Timex2 value (cost) weight for spurious (false alarm) timex2's:%6.3f\n", $timex2_fa_wgt;

    print "\n";
}

#################################

sub check_docs {

    my ($doc_id, $eval_doc, $sys_doc);

    foreach $doc_id (keys %eval_docs) {
	$eval_doc = $eval_docs{$doc_id};
	$sys_doc = $sys_docs{$doc_id};
	$sys_doc or warn
	    "\n\nWARNING:  ref doc '$doc_id' has no corresponding tst doc\n\n";
	next unless $sys_doc;
	$sys_doc->{TYPE} eq $eval_doc->{TYPE} or die
	    "\n\nFATAL ERROR:  different reference and system output data types for document '$doc_id'\n".
		"                      ref type is '$eval_doc->{TYPE}' but system output type is '$sys_doc->{TYPE}'\n\n";
    }
}

#################################

sub score_entity_detection {

    print "\n======== $_[0] ========\n";
    edt_eval_1_condition ("entity", "type", "ENTITY TYPE", \@entity_types);
    edt_eval_1_condition ("entity", "value", "ENTITY VALUE", \@entity_value_types);
    edt_eval_1_condition ("mention", "count", "COUNT TYPE", \@entity_count_types);
    edt_eval_1_condition ("class", "type", "ENTITY CLASS", \@entity_classes);
    my @types = sort keys %source_types;
    edt_eval_1_condition ("source", "type", "SOURCE TYPE", \@types);
#   edt_eval_2_conditions ("Ndoc", "type", "CROSS-DOC TYPE", \@xdoc_types,
#                          "entities of type", "ENTITY TYPE", \@entity_types);
#   edt_eval_2_conditions ("entity", "type", "ENTITY TYPE", \@entity_types,
#                          "entities of origin", "ENTITY ORIGIN", \@origins);
#   link_eval_2_conditions ("entity", "type", "ENTITY TYPE", \@entity_types,
#                           "entities of origin", "ENTITY ORIGIN", \@origins);
}

#################################

sub edt_eval_1_condition {

    my ($label1, $label2, $condition, $types) = @_;
    my ($type, $count, $cost, $nrm_cost);
    my $hdr1 = "________Count________     __________Count_(%)__________        _________________Cost_(%)___________________       _____Unconditioned_Cost_(%)_____";
    my $hdr2 = "Ent   Detection  Type     Detection  Type    Unweighted        Detection  Type  Type   Value    Value-based       Max      Detection   Type   Type";
    my $hdr3 = "Tot    FA  Miss   Err      FA  Miss   Err    Pre--Rec--F        FA  Miss   Err  Corr     (%)    Pre--Rec--F      Value     FA   Miss    Err   Corr";

    ($count, $cost, $nrm_cost) = conditional_EDT_statistics ($condition);
    print "\nEntity Detection and Type Recognition statistics for \"$source_type\" sources:\n";
    printf " ref      %s\n %-8s %s\n %-8s %s\n", $hdr1, $label1, $hdr2, $label2, $hdr3;
    foreach $type (@$types, "total") {
	print_eval ($type, $count, $cost, $nrm_cost, $nrm_cost->{total}, $entity_fa_wgt);
    }
}

#################################

sub print_eval {

    my ($type, $count, $cost, $ref_value, $total_value, $fa_wgt) = @_;
    my $format = "%7.7s%6d%6d%6d%6d%8.1f%6.1f%6.1f%7.1f%5.1f%5.1f%8.1f%6.1f%6.1f%6.1f%8.1f%7.1f%5.1f%5.1f%9.2f%7.2f%7.2f%7.2f%7.2f\n";

    $count = $count->{$type};
    $cost = $cost->{$type};
    $ref_value = $ref_value->{$type};
    my $nref = $count->{correct}+$count->{error}+$count->{miss};
    my $nsys = $count->{correct}+$count->{error}+$count->{fa};
    my $pn = 100/max(1E-30, $nref);
    my $cn = 100/max(1E-30, $ref_value);

    my $recall = ($count->{correct}+0.5*$count->{error})/max($nref,1E-30);
    my $precision = ($count->{correct}+0.5*$count->{error})/max($nsys,1E-30);
    my $fmeasure = 2*$precision*$recall/max($precision+$recall, 1E-30);

    my $value_correct = $ref_value-$cost->{miss}-$cost->{error}-$cost->{correct};
    my $value_recall = ($value_correct+0.5*($cost->{error}+$cost->{correct}))/max(1E-30, $ref_value);
    my $sys_value = $ref_value-$cost->{miss}+$cost->{fa}/max($fa_wgt,1E-30);
    my $value_precision = ($value_correct+0.5*($cost->{error}+$cost->{correct}))/max(1E-30, $sys_value);
    my $value_fmeasure = 2*$value_precision*$value_recall/max($value_precision+$value_recall, 1E-30);

    my $un = 100/max($total_value,1E-30);
    printf $format, $type, $nref, $count->{fa}, $count->{miss}, $count->{error},
    min(999.9,$pn*$count->{fa}), $pn*$count->{miss}, $pn*$count->{error}, 100*$precision, 100*$recall, 100*$fmeasure,
    min(999.9,$cn*$cost->{fa}), $cn*$cost->{miss}, $cn*$cost->{error}, $cn*$cost->{correct},
    max(-999.9,$cn*($value_correct-$cost->{fa})), 100*$value_precision, 100*$value_recall, 100*$value_fmeasure,
    $un*$ref_value, min(999.99,$un*$cost->{fa}), $un*$cost->{miss}, $un*$cost->{error}, $un*$cost->{correct};
}

#################################

sub edt_eval_2_conditions {

    my ($label1, $label2, $condition1, $types1, $label3, $condition2, $types2) = @_;
    my ($type, $count, $cost, $nrm_cost);
    my $hdr1 = "________Count________     __________Count_(%)__________        _________________Cost_(%)___________________       _____Unconditioned_Cost_(%)_____";
    my $hdr2 = "Ent   Detection  Type     Detection  Type    Unweighted        Detection  Type  Type   Value    Value-based       Max      Detection   Type   Type";
    my $hdr3 = "Tot    FA  Miss   Err      FA  Miss   Err    Pre--Rec--F        FA  Miss   Err  Corr     (%)    Pre--Rec--F      Value     FA   Miss    Err   Corr";

    ($count, $cost, $nrm_cost) = conditional_EDT_statistics ($condition1, $condition2);
    foreach my $cond ("ALL", @$types2) {
	print "\nEntity Detection and Type Recognition statistics for \"$source_type\" sources and $label3 \"$cond\":\n";
	printf " ref      %s\n %-8s %s\n %-8s %s\n", $hdr1, $label1, $hdr2, $label2, $hdr3;
	foreach $type (@$types1, "total") {
	    print_eval ($type, $count->{$cond}, $cost->{$cond}, $nrm_cost->{$cond}, $nrm_cost->{ALL}{total}, $entity_fa_wgt);
	}
    }
}

#################################

sub link_eval_2_conditions {

    my ($label1, $label2, $condition1, $types1, $label3, $condition2, $types2) = @_;
    my ($type, $count, $cost, $nrm_cost);
    my $hdr1 = "________Count________     __________Count_(%)__________        _________________Cost_(%)___________________       _____Unconditioned_Cost_(%)_____";
    my $hdr2 = "Ent   Detection  Link     Detection  Link    Unweighted        Detection  Link  Link   Value    Value-based       Max      Detection   Link   Link";
    my $hdr3 = "Tot    FA  Miss   Err      FA  Miss   Err    Pre--Rec--F        FA  Miss   Err  Corr     (%)    Pre--Rec--F      Value     FA   Miss    Err   Corr";

    ($count, $cost, $nrm_cost) = conditional_entity_link_statistics ($condition1, $condition2);
    foreach my $cond ("ALL", @$types2) {
	print "\nEntity Detection and Database Linking statistics for \"$source_type\" sources and $label3 \"$cond\":\n";
	printf " ref      %s\n %-8s %s\n %-8s %s\n", $hdr1, $label1, $hdr2, $label2, $hdr3;
	foreach $type (@$types1, "total") {
	    print_eval ($type, $count->{$cond}, $cost->{$cond}, $nrm_cost->{cond}, $nrm_cost->{ALL}{total}, $entity_fa_wgt);
	}
    }
}

#################################

sub score_relation_detection {

    print "\n======== $_[0] ========\n";
    rdc_eval_1_condition ("relation", "type", "RELATION TYPE", \@relation_types);
    rdc_eval_1_condition ("class", "type", "RELATION CLASS", \@relation_class_types);
    rdc_eval_1_condition ("mention", "count", "COUNT TYPE", \@relation_count_types);
    my @types = sort keys %source_types;
    rdc_eval_1_condition ("source", "type", "SOURCE TYPE", \@types);
    rdc_eval_1_condition ("argument", "errors", "ARGUMENT ERRORS", \@relation_argument_errors);
#   rdc_eval_2_conditions ("relation", "type", "RELATION TYPE", \@relation_types,
#			   "argument errors of type", "ARGUMENT ERRORS", \@relation_argument_errors);
}

#################################

sub rdc_eval_1_condition {

    my ($label1, $label2, $condition, $types) = @_;
    my ($type, $count, $cost, $nrm_cost);
    my $hdr1 = "________Count________     __________Count_(%)__________        _________________Cost_(%)___________________       _____Unconditioned_Cost_(%)_____";
    my $hdr2 = "Rel   Detection  ATSt     Detection  ATSt    Unweighted        Detection  ATSt  ATSt   Value    Value-based       Max      Detection   ATSt   ATSt";
    my $hdr3 = "Tot    FA  Miss   Err      FA  Miss   Err    Pre--Rec--F        FA  Miss   Err  Corr     (%)    Pre--Rec--F      Value     FA   Miss    Err   Corr";

    ($count, $cost, $nrm_cost) = conditional_RDC_statistics ($condition);
    print "\nRelation Detection and Type Recognition statistics for \"$source_type\" sources:\n";
    printf " ref      %s\n %-8s %s\n %-8s %s\n", $hdr1, $label1, $hdr2, $label2, $hdr3;
    foreach $type (@$types, "total") {
	print_eval ($type, $count, $cost, $nrm_cost, $nrm_cost->{total}, $relation_fa_wgt);
    }
}

#################################

sub rdc_eval_2_conditions {

    my ($label1, $label2, $condition1, $types1, $label3, $condition2, $types2) = @_;
    my ($type, $count, $cost, $nrm_cost);
    my $hdr1 = "________Count________     __________Count_(%)__________        _________________Cost_(%)___________________       _____Unconditioned_Cost_(%)_____";
    my $hdr2 = "Rel   Detection  ATSt     Detection  ATSt    Unweighted        Detection  ATSt  ATSt   Value    Value-based       Max      Detection   ATSt   ATSt";
    my $hdr3 = "Tot    FA  Miss   Err      FA  Miss   Err    Pre--Rec--F        FA  Miss   Err  Corr     (%)    Pre--Rec--F      Value     FA   Miss    Err   Corr";

    ($count, $cost, $nrm_cost) = conditional_RDC_statistics ($condition1, $condition2);
    foreach my $cond ("ALL", @$types2) {
	print "\nRelation Detection and Type Recognition statistics for \"$source_type\" sources and $label3 \"$cond\":\n";
	printf " ref      %s\n %-8s %s\n %-8s %s\n", $hdr1, $label1, $hdr2, $label2, $hdr3;
	foreach $type (@$types1, "total") {
	    print_eval ($type, $count->{$cond}, $cost->{$cond}, $nrm_cost->{$cond}, $nrm_cost->{ALL}{total}, $relation_fa_wgt);
	}
    }
}

#################################

sub score_event_detection {

    print "\n======== $_[0] ========\n";
    edc_eval_1_condition ("event", "type", "EVENT TYPE", \@event_types);
    edc_eval_1_condition ("modality", "type", "EVENT MODALITY", \@event_modalities);
    edc_eval_1_condition ("mention", "count", "COUNT TYPE", \@event_count_types);
    my @types = sort keys %source_types;
    edc_eval_1_condition ("source", "type", "SOURCE TYPE", \@types);
    edc_eval_1_condition ("participant", "errors", "PARTICIPANT ERRORS", \@event_participant_errors);
#   edc_eval_2_conditions ("event", "type", "EVENT TYPE", \@event_types,
#			   "participant errors of type", "PARTICIPANT ERRORS", \@event_participant_errors);
}

#################################

sub edc_eval_1_condition {

    my ($label1, $label2, $condition, $types) = @_;
    my ($type, $count, $cost, $nrm_cost);
    my $hdr1 = "________Count________     __________Count_(%)__________        _________________Cost_(%)___________________       _____Unconditioned_Cost_(%)_____";
    my $hdr2 = "Evt   Detection   PTM     Detection   PTM    Unweighted        Detection   PTM   PTM   Value    Value-based       Max      Detection    PTM    PTM";
    my $hdr3 = "Tot    FA  Miss   Err      FA  Miss   Err    Pre--Rec--F        FA  Miss   Err  Corr     (%)    Pre--Rec--F      Value     FA   Miss    Err   Corr";

    ($count, $cost, $nrm_cost) = conditional_EDC_statistics ($condition);
    print "\nEvent Detection and Type Recognition statistics for \"$source_type\" sources:\n";
    printf " ref      %s\n %-8.8s %s\n %-8.8s %s\n", $hdr1, $label1, $hdr2, $label2, $hdr3;
    foreach $type (@$types, "total") {
	print_eval ($type, $count, $cost, $nrm_cost, $nrm_cost->{total}, $event_fa_wgt);
    }
}

#################################

sub edc_eval_2_conditions {

    my ($label1, $label2, $condition1, $types1, $label3, $condition2, $types2) = @_;
    my ($type, $count, $cost, $nrm_cost);
    my $hdr1 = "________Count________     __________Count_(%)__________        _________________Cost_(%)___________________       _____Unconditioned_Cost_(%)_____";
    my $hdr2 = "Evt   Detection   PTM     Detection   PTM    Unweighted        Detection   PTM   PTM   Value    Value-based       Max      Detection    PTM    PTM";
    my $hdr3 = "Tot    FA  Miss   Err      FA  Miss   Err    Pre--Rec--F        FA  Miss   Err  Corr     (%)    Pre--Rec--F      Value     FA   Miss    Err   Corr";

    ($count, $cost, $nrm_cost) = conditional_EDC_statistics ($condition1, $condition2);
    foreach my $cond ("ALL", @$types2) {
	print "\nEvent Detection and Type Recognition statistics for \"$source_type\" sources and $label3 \"$cond\":\n";
	printf " ref      %s\n %-8.8s %s\n %-8.8s %s\n", $hdr1, $label1, $hdr2, $label2, $hdr3;
	foreach $type (@$types1, "total") {
	    print_eval ($type, $count->{$cond}, $cost->{$cond}, $nrm_cost->{$cond}, $nrm_cost->{ALL}{total}, $event_fa_wgt);
	}
    }
}

#################################

sub score_timex2_detection {

    print "\n======== $_[0] ========\n";
    my @types = sort keys %source_types;
    timex2_eval_1_condition ("source", "type", "SOURCE TYPE", \@types);
}

#################################

sub timex2_eval_1_condition {

    my ($label1, $label2, $condition, $types) = @_;
    my ($type, $count, $cost, $nrm_cost);
    my $hdr1 = "________Count________     __________Count_(%)__________        _________________Cost_(%)___________________       _____Unconditioned_Cost_(%)_____";
    my $hdr2 = "Tmx   Detection   Atr     Detection   Atr    Unweighted        Detection   Atr   Atr   Value    Value-based       Max      Detection    Atr    Atr";
    my $hdr3 = "Tot    FA  Miss   Err      FA  Miss   Err    Pre--Rec--F        FA  Miss   Err  Corr     (%)    Pre--Rec--F      Value     FA   Miss    Err   Corr";

    ($count, $cost, $nrm_cost) = conditional_TIMEX2_statistics ($condition);
    print "\nTimex2 Detection and Type Recognition statistics for \"$source_type\" sources:\n";
    printf " ref      %s\n %-8.8s %s\n %-8.8s %s\n", $hdr1, $label1, $hdr2, $label2, $hdr3;
    foreach $type (@$types, "total") {
	print_eval ($type, $count, $cost, $nrm_cost, $nrm_cost->{total}, $timex2_fa_wgt);
    }
}

#################################

sub score_timex2_attribute_recognition {

    my ($attribute, $ref_values, $ref_value, $tst_values, $tst_value);
    my $maxdisplay = 8;

#display dominant confusion statistics for each timex2 attribute
    foreach $attribute (sort keys %attribute_confusion_statistics) {
	my (%ref_count, %tst_count, %sort_count, @display_values, $ndisplay);
	my ($count, $ntot, $nref, $ncor, $nfa, $nmiss);
	$ntot = $nref = $ncor = $nfa = $nmiss = 0;
	$ref_values = $attribute_confusion_statistics{$attribute};

#select attribute values that contribute the most confusions
	foreach $ref_value (keys %$ref_values) {
	    $tst_values = $ref_values->{$ref_value};
	    foreach $tst_value (keys %$tst_values) {
		$count = $tst_values->{$tst_value};
		$ref_count{$ref_value} += $count;
		$tst_count{$tst_value} += $count;
		$ntot += $count;
		$nref += $count unless $ref_value eq "<undef>";
		if ($tst_value eq $ref_value) {
		    $sort_count{$ref_value} += $epsilon*$count;
		    $ncor += $count unless $ref_value eq "<undef>";
		}
		else {
		    $sort_count{$tst_value} += $count;
		    $sort_count{$ref_value} += $count;
		    $nfa += $count if $ref_value eq "<undef>";
		    $nmiss += $count if $tst_value eq "<undef>";
		}
	    }
	}
	@display_values = sort {$sort_count{$b} <=> $sort_count{$a}} keys %sort_count;
	$ndisplay = min($maxdisplay, scalar @display_values);
	splice (@display_values, $ndisplay);

#tabulate confusion statistics for "other" attribute values
	my $others = "all others";
	foreach my $value (@display_values, $others) {
	    $ref_values->{$value}{$others} = $ref_count{$value};
	    $ref_values->{$others}{$value} = $tst_count{$value};
	}
	$ref_values->{$others}{$others} = $ntot;
	foreach $ref_value (@display_values) {
	    foreach $tst_value (@display_values) {
		$count = $ref_values->{$ref_value}{$tst_value};
		next unless $count;
		$ref_values->{$ref_value}{$others} -= $count;
		$ref_values->{$others}{$tst_value} -= $count;
		$ref_values->{$others}{$others} -= $count;
	    }
	}

#output results
	print "\nTimex2 recognition statistics for attribute $attribute (for mapped timex2's):\n";
	my $nerr = $nfa+$nref-$ncor;
	my $nsub = $nref-$nmiss-$ncor;
	my $nsys = $ncor+$nsub+$nfa;
	$nref = max($nref,$epsilon);
	$nsys = max($nsys,$epsilon);
	my $pfa = $nfa/$nref;
	my $psub = $nsub/$nref;
	my $pmiss = $nmiss/$nref;
	my $perror = $nerr/$nref;
	my $recall = ($ncor+0.5*$nsub)/$nref;
	my $precision = ($ncor+0.5*$nsub)/$nsys;
	my $f = 2*$precision*$recall/max($precision+$recall,$epsilon);
	printf "    Summary (count/percent):  Nref=%d/%.1f%s, Nfa=%d/%.1f%s, Nmiss=%d/%.1f%s, Nsub=%d/%.1f%s, Nerr=%d/%.1f%s"
	    .", P/R/F=%.1f%s/%.1f%s/%.1f%s\n",
	    $nref, 100, "%", $nfa, min(999.9,100*$pfa), "%", $nmiss, 100*$pmiss, "%",
	    $nsub, min(999.9,100*$psub), "%", $nerr, min(999.9,100*$perror), "%",
	    100*$precision, "%", 100*$recall, "%", 100*$f, "%";
	print "    Confusion matrix for major error contributors (count/percent):\n        ref\\tst:";
	push @display_values, $others;
	foreach $tst_value (@display_values) {
	    printf "%11.11s ", $tst_value;
	}
	print "\n";
	foreach $ref_value (@display_values) {
	    printf "  %14.14s", $ref_value;
	    foreach $tst_value (@display_values) {
		$count = $ref_values->{$ref_value}{$tst_value};
		printf "%s", $count ? 
		    (sprintf "%6d/%4.1f%s", $count, min(99.9,100*$count/max($ntot,$epsilon)), "%") :
			"      -     ";
	    }
	print "\n";
	}
    }
}

#################################

sub score_timex2_mention_detection {

    my (%men_count, $type, $men_type, $rol_type, $sty_type, $err_type);
    my ($pn);

#scoring conditioned on mention type
    undef %men_count;
    foreach $err_type (@error_types) {
	my $nent = $mention_detection_statistics{$err_type};
	$men_count    {total}{$err_type} += $nent ? $nent : 0;
    }
    print "\nTimex2 Mention Detection and EXACT Extent Recognition statistics for \"$source_type\" sources (for mapped timex2's):\n",
    "         ____________count______________       ____________percent____________\n",
    "         Detection     Extent_Recognition      Detection     Extent_Recognition\n",
    "         miss    fa     miss   err  corr       miss    fa     miss   err  corr\n";
    foreach $type ("total") {
	$pn = 100/max($epsilon, $men_count{$type}{miss}+$men_count{$type}{error}+$men_count{$type}{correct});
	printf "%5.5s%8d%6d%9d%6d%6d%11.1f%6.1f%9.1f%6.1f%6.1f\n", $type,
	$men_count{$type}{miss}, $men_count{$type}{fa},
	$men_count{$type}{miss}, $men_count{$type}{error}, $men_count{$type}{correct},
	$pn*$men_count{$type}{miss}, min(999.9,$pn*$men_count{$type}{fa}),
	$pn*$men_count{$type}{miss}, $pn*$men_count{$type}{error}, $pn*$men_count{$type}{correct};
    }
}

#################################

sub score_entity_attribute_recognition {

# type attributes
    my ($type, $ref_type);
    my (%entity_type_total);
    foreach $ref_type (@entity_types) {
	foreach $type (@entity_types) {
	    $entity_type_statistics{$ref_type}{$type} = 0 unless defined $entity_type_statistics{$ref_type}{$type};
	    $entity_type_total{$ref_type} += $entity_type_statistics{$ref_type}{$type};
	}
    }
    print "\nEntity Type confusion matrix for \"$source_type\" sources (for mapped entities):\n",
    "               ___________count___________        __________percent__________\n",
    "    ref\\tst:  ";
    foreach $type (@entity_types) {
	printf " %3.3s  ", $type;
    }
    print "   ";
    foreach $type (@entity_types) {
	printf "   %3.3s", $type;
    }
    print "\n";
    foreach $ref_type (@entity_types) {
	printf "  %3.3s       ", $ref_type;
	foreach $type (@entity_types) {
	    printf "%6d", $entity_type_statistics{$ref_type}{$type};
	}
	print "     ";
	foreach $type (@entity_types) {
	    printf "%6.1f", 100*$entity_type_statistics{$ref_type}{$type} /
		max($entity_type_total{$ref_type},1);
	}
	print "\n";
    }

# class attributes
    my ($class, $ref_class);
    my (%entity_class_total);
    foreach $ref_class (@entity_classes) {
	foreach $class (@entity_classes) {
	    $entity_class_statistics{$ref_class}{$class} = 0 unless defined $entity_class_statistics{$ref_class}{$class};
	    $entity_class_total{$ref_class} += $entity_class_statistics{$ref_class}{$class};
	}
    }
    print "\nEntity Class confusion matrix for \"$source_type\" sources (for mapped entities):\n",
    "               __count__        _percent_\n",
    "    ref\\tst:  ";
    foreach $class (@entity_classes) {
	printf " %3.3s  ", $class;
    }
    print "   ";
    foreach $class (@entity_classes) {
	printf "   %3.3s", $class;
    }
    print "\n";
    foreach $ref_class (@entity_classes) {
	printf "%5.5s       ", $ref_class;
	foreach $class (@entity_classes) {
	    printf "%6d", $entity_class_statistics{$ref_class}{$class};
	}
	print "     ";
	foreach $class (@entity_classes) {
	    printf "%6.1f", 100*$entity_class_statistics{$ref_class}{$class} /
		max($entity_class_total{$ref_class},1);
	}
	print "\n";
    }

# name attributes
    foreach $type (@entity_types) {
	foreach my $err (@error_types) {
	    $name_detection_statistics{$type}{$err} = 0 unless defined $name_detection_statistics {$type}{$err};
	}
	$name_detection_statistics{total}{miss} += $name_detection_statistics{$type}{miss};
	$name_detection_statistics{total}{fa} += $name_detection_statistics{$type}{fa};
	$name_detection_statistics{total}{correct} += $name_detection_statistics{$type}{correct};
	$name_detection_statistics{total}{error} += $name_detection_statistics{$type}{error};
    }
    print "\nName Detection and Extent Recognition statistics for \"$source_type\" sources (for mapped entities):\n",
    " ref     ____________count______________       ____________percent____________\n",
    " entity  Detection     Extent_Recognition      Detection     Extent_Recognition\n",
    " type    miss    fa     miss   err  corr       miss    fa     miss   err  corr\n";
    foreach $type (@entity_types, "total") {
	my $total = ($name_detection_statistics{$type}{miss} +
		     $name_detection_statistics{$type}{error} +
		     $name_detection_statistics{$type}{correct});
	printf "%5.5s%8d%6d%9d%6d%6d%11.1f%6.1f%9.1f%6.1f%6.1f\n", $type,
	$name_detection_statistics{$type}{miss},
	$name_detection_statistics{$type}{fa},
	$name_detection_statistics{$type}{miss},
	$name_detection_statistics{$type}{error},
	$name_detection_statistics{$type}{correct},
	100*$name_detection_statistics{$type}{miss}/max($total,1),
	100*$name_detection_statistics{$type}{fa}/max($total,1),
	100*$name_detection_statistics{$type}{miss}/max($total,1),
	100*$name_detection_statistics{$type}{error}/max($total,1),
	100*$name_detection_statistics{$type}{correct}/max($total,1);
    }
}

#################################

sub score_mention_detection {

    my (%men_count, $type, $men_type, $rol_type, $sty_type, $err_type);
    my ($pn);

#scoring conditioned on mention type
    undef %men_count;
    foreach $men_type (@mention_types) {
	foreach $rol_type (@entity_types) {
	    foreach $sty_type (@entity_style_types) {
		foreach $err_type (@error_types) {
		    my $nent = $mention_detection_statistics{$men_type}{$rol_type}{$sty_type}{$err_type};
		    $men_count{$men_type}{$err_type} += $nent ? $nent : 0;
		    $men_count    {total}{$err_type} += $nent ? $nent : 0;
		}
	    }
	}
    }
    print "\nMention Detection and Extent Recognition statistics for \"$source_type\" sources (for mapped entities):\n",
    " ref     ____________count______________       ____________percent____________\n",
    " mention Detection     Extent_Recognition      Detection     Extent_Recognition\n",
    " type    miss    fa     miss   err  corr       miss    fa     miss   err  corr\n";
    foreach $type (@mention_types, "total") {
	$pn = 100/max($epsilon, $men_count{$type}{miss}+$men_count{$type}{error}+$men_count{$type}{correct});
	printf "%5.5s%8d%6d%9d%6d%6d%11.1f%6.1f%9.1f%6.1f%6.1f\n", $type,
	$men_count{$type}{miss}, $men_count{$type}{fa},
	$men_count{$type}{miss}, $men_count{$type}{error}, $men_count{$type}{correct},
	$pn*$men_count{$type}{miss}, min(999.9,$pn*$men_count{$type}{fa}),
	$pn*$men_count{$type}{miss}, $pn*$men_count{$type}{error}, $pn*$men_count{$type}{correct};
    }

#scoring conditioned on mention style
    undef %men_count;
    foreach $men_type (@mention_types) {
	foreach $rol_type (@entity_types) {
	    foreach $sty_type (@entity_style_types) {
		foreach $err_type (@error_types) {
		    my $nent = $mention_detection_statistics{$men_type}{$rol_type}{$sty_type}{$err_type};
		    $men_count{$sty_type}{$err_type} += $nent ? $nent : 0;
		    $men_count    {total}{$err_type} += $nent ? $nent : 0;
		}
	    }
	}
    }
    print "\nMention Detection and Extent Recognition statistics for \"$source_type\" sources (for mapped entities):\n",
    " ref     ____________count______________       ____________percent____________\n",
    " mention Detection     Extent_Recognition      Detection     Extent_Recognition\n",
    " style   miss    fa     miss   err  corr       miss    fa     miss   err  corr\n";
    foreach $type (@entity_style_types, "total") {
	$pn = 100/max($epsilon, $men_count{$type}{miss}+$men_count{$type}{error}+$men_count{$type}{correct});
	printf "%5.5s%8d%6d%9d%6d%6d%11.1f%6.1f%9.1f%6.1f%6.1f\n", $type,
	$men_count{$type}{miss}, $men_count{$type}{fa},
	$men_count{$type}{miss}, $men_count{$type}{error}, $men_count{$type}{correct},
	$pn*$men_count{$type}{miss}, min(999.9,$pn*$men_count{$type}{fa}),
	$pn*$men_count{$type}{miss}, $pn*$men_count{$type}{error}, $pn*$men_count{$type}{correct};
    }

#scoring conditioned on mention role
    undef %men_count;
    foreach $men_type (@mention_types) {
	foreach $rol_type (@entity_types) {
	    foreach $sty_type (@entity_style_types) {
		foreach $err_type (@error_types) {
		    my $nent = $mention_detection_statistics{$men_type}{$rol_type}{$sty_type}{$err_type};
		    $men_count{$rol_type}{$err_type} += $nent ? $nent : 0;
		    $men_count    {total}{$err_type} += $nent ? $nent : 0;
		}
	    }
	}
    }
    print "\nMention Detection and Extent Recognition statistics for \"$source_type\" sources (for mapped entities):\n",
    " ref     ____________count______________       ____________percent____________\n",
    " mention Detection     Extent_Recognition      Detection     Extent_Recognition\n",
    " role    miss    fa     miss   err  corr       miss    fa     miss   err  corr\n";
    foreach $type (@entity_types, "total") {
	$pn = 100/max($epsilon, $men_count{$type}{miss}+$men_count{$type}{error}+$men_count{$type}{correct});
	printf "%5.5s%8d%6d%9d%6d%6d%11.1f%6.1f%9.1f%6.1f%6.1f\n", $type,
	$men_count{$type}{miss}, $men_count{$type}{fa},
	$men_count{$type}{miss}, $men_count{$type}{error}, $men_count{$type}{correct},
	$pn*$men_count{$type}{miss}, min(999.9,$pn*$men_count{$type}{fa}),
	$pn*$men_count{$type}{miss}, $pn*$men_count{$type}{error}, $pn*$men_count{$type}{correct};
    }
}

#################################

sub score_mention_attribute_recognition {

# role attributes
    my ($role, $ref_role, $ent_type);
    my (%mention_role_count, %mention_role_total);

    print "\nMention Role confusion matrix for \"$source_type\" sources (for mapped mentions)\n",
    "  For all mapped mentions:\n",
    "               ___________count___________        __________percent__________\n",
    "    ref\\tst:  ";
    foreach $role (@entity_types) {
	printf " %3.3s  ", $role;
    }
    print "   ";
    foreach $role (@entity_types) {
	printf "   %3.3s", $role;
    }
    print "\n";
    foreach $ref_role (@entity_types) {
	foreach $role (@entity_types) {
	    foreach $ent_type (@entity_types) {
		$mention_role_statistics{$ent_type}{$ref_role}{$role} = 0 unless defined $mention_role_statistics{$ent_type}{$ref_role}{$role};
		$mention_role_count{$ref_role}{$role} += $mention_role_statistics{$ent_type}{$ref_role}{$role};
	    }
	    $mention_role_total{$ref_role} += $mention_role_count{$ref_role}{$role};
	}
	printf "%5.5s       ", $ref_role;
	foreach $role (@entity_types) {
	    printf "%6d", $mention_role_count{$ref_role}{$role};
	}
	print "     ";
	foreach $role (@entity_types) {
	    printf "%6.1f", 100*$mention_role_count{$ref_role}{$role} /
		max($mention_role_total{$ref_role},1);
	}
	print "\n";
    }

    foreach $ent_type (@entity_types) {
	print "  For mapped mentions whose entity is of type \"$ent_type\":\n",
	"               ___________count___________        __________percent__________\n",
	"    ref\\tst:  ";
	foreach $role (@entity_types) {
	    printf " %3.3s  ", $role;
	}
	print "   ";
	foreach $role (@entity_types) {
	    printf "   %3.3s", $role;
	}
	print "\n";
	foreach $ref_role (@entity_types) {
	    $mention_role_total{$ref_role} = 0;
	    foreach $role (@entity_types) {
		$mention_role_total{$ref_role} += $mention_role_statistics{$ent_type}{$ref_role}{$role};
	    }
	    printf "%5.5s       ", $ref_role;
	    foreach $role (@entity_types) {
		printf "%6d", $mention_role_statistics{$ent_type}{$ref_role}{$role};
	    }
	    print "     ";
	    foreach $role (@entity_types) {
		printf "%6.1f", 100*$mention_role_statistics{$ent_type}{$ref_role}{$role} /
		    max($mention_role_total{$ref_role},1);
	    }
	    print "\n";
	}
    }

# style attributes
    my ($style, $ref_style);
    my (%mention_style_total);
    foreach $ref_style (@entity_style_types) {
	foreach $style (@entity_style_types) {
	    $mention_style_statistics{$ref_style}{$style} = 0 unless defined $mention_style_statistics{$ref_style}{$style};
	    $mention_style_total{$ref_style} += $mention_style_statistics{$ref_style}{$style};
	}
    }
    print "\nMention Style confusion matrix for \"$source_type\" sources (for mapped mentions):\n",
    "               __count__        _percent_\n",
    "    ref\\tst:  ";
    foreach $style (@entity_style_types) {
	printf " %3.3s  ", $style;
    }
    print "   ";
    foreach $style (@entity_style_types) {
	printf "   %3.3s", $style;
    }
    print "\n";
    foreach $ref_style (@entity_style_types) {
	printf "%5.5s       ", $ref_style;
	foreach $style (@entity_style_types) {
	    printf "%6d", $mention_style_statistics{$ref_style}{$style};
	}
	print "     ";
	foreach $style (@entity_style_types) {
	    printf "%6.1f", 100*$mention_style_statistics{$ref_style}{$style} /
		max($mention_style_total{$ref_style},1);
	}
	print "\n";
    }
}

#################################

sub score_relation_attribute_recognition {

# type attributes
    my ($type, $ref_type);
    my (%relation_type_total);
    foreach $ref_type (@relation_types) {
	foreach $type (@relation_types) {
	    $relation_type_statistics{$ref_type}{$type} = 0 unless defined $relation_type_statistics{$ref_type}{$type};
	    $relation_type_total{$ref_type} += $relation_type_statistics{$ref_type}{$type};
	}
    }
    print "\nRelation Type confusion matrix for \"$source_type\" sources (for mapped relations):\n"
	."             COUNT", "." x (6*(@relation_types-1)), "    PERCENT", "." x (6*(@relation_types-1)), "\n    ref\\tst:";
    foreach $type (@relation_types) {
	printf " %5.5s", $type;
    }
    print "     ";
    foreach $type (@relation_types) {
	printf " %5.5s", $type;
    }
    print "\n";
    foreach $ref_type (@relation_types) {
	printf "%11.11s ", $ref_type;
	foreach $type (@relation_types) {
	    printf "%6d", $relation_type_statistics{$ref_type}{$type};
	}
	print "     ";
	foreach $type (@relation_types) {
	    printf "%6.1f", 100*$relation_type_statistics{$ref_type}{$type} /
		max($relation_type_total{$ref_type},1);
	}
	print "\n";
    }

# subtype attributes
    my ($subtype);
    foreach $ref_type (@relation_types) {
	my (@relation_subtypes, %relation_subtype_total);
	@relation_subtypes = sort keys %{$relation_subtypes{$ref_type}};
	foreach $subtype (@relation_subtypes) {
	    foreach $type (@relation_subtypes) {
		$relation_subtype_statistics{$ref_type}{$subtype}{$type} = 0 unless defined $relation_subtype_statistics{$ref_type}{$subtype}{$type};
		$relation_subtype_total{$subtype} += $relation_subtype_statistics{$ref_type}{$subtype}{$type};
	    }
	}
	printf "\nRelation Subtype confusion matrix for \"$source_type\" sources (for mapped relations):\n"
	    ."  type=%-5.5s", $ref_type;
	print " COUNT", "." x (6*(@relation_subtypes-1)), "    PERCENT", "." x (6*(@relation_subtypes-1)), "\n    ref\\tst: ";
	foreach $subtype (@relation_subtypes) {
	    printf "%5.5s ", $subtype;
	}
	print "     ";
	foreach $subtype (@relation_subtypes) {
	    printf "%5.5s ", $subtype;
	}
	print "\n";
	foreach $subtype (@relation_subtypes) {
	    printf "%11.11s ", $subtype;
	    foreach $type (@relation_subtypes) {
		printf "%6d", $relation_subtype_statistics{$ref_type}{$subtype}{$type};
	    }
	    print "     ";
	    foreach $type (@relation_subtypes) {
		printf "%6.1f", 100*$relation_subtype_statistics{$ref_type}{$subtype}{$type} /
		    max($relation_subtype_total{$subtype},1);
	    }
	    print "\n";
	}
    }

# class attributes
    my ($class, $ref_class);
    my (%relation_class_total);
    foreach $ref_class (@relation_class_types) {
	foreach $class (@relation_class_types) {
	    $relation_class_statistics{$ref_class}{$class} = 0 unless defined $relation_class_statistics{$ref_class}{$class};
	    $relation_class_total{$ref_class} += $relation_class_statistics{$ref_class}{$class};
	}
    }
    print "\nRelation Class confusion matrix for \"$source_type\" sources (for mapped relations):\n"
	."             COUNT", "." x (6*(@relation_class_types-1)), "    PERCENT", "." x (6*(@relation_class_types-1)), "\n    ref\\tst:";
    foreach $class (@relation_class_types) {
	printf " %5.5s", $class;
    }
    print "     ";
    foreach $class (@relation_class_types) {
	printf " %5.5s", $class;
    }
    print "\n";
    foreach $ref_class (@relation_class_types) {
	printf "%11.11s ", $ref_class;
	foreach $class (@relation_class_types) {
	    printf "%6d", $relation_class_statistics{$ref_class}{$class};
	}
	print "     ";
	foreach $class (@relation_class_types) {
	    printf "%6.1f", 100*$relation_class_statistics{$ref_class}{$class} /
		max($relation_class_total{$ref_class},1);
	}
	print "\n";
    }
}

#################################

sub score_event_attribute_recognition {

# type attributes
    my ($type, $ref_type);
    my (%event_type_total);
    foreach $ref_type (@event_types) {
	foreach $type (@event_types) {
	    $event_type_statistics{$ref_type}{$type} = 0 unless defined $event_type_statistics{$ref_type}{$type};
	    $event_type_total{$ref_type} += $event_type_statistics{$ref_type}{$type};
	}
    }
    print "\nEvent Type confusion matrix for \"$source_type\" sources (for mapped events):\n"
	."             COUNT", "." x (6*(@event_types-1)), "    PERCENT", "." x (6*(@event_types-1)), "\n    ref\\tst:";
    foreach $type (@event_types) {
	printf " %5.5s", $type;
    }
    print "     ";
    foreach $type (@event_types) {
	printf " %5.5s", $type;
    }
    print "\n";
    foreach $ref_type (@event_types) {
	printf "%11.11s ", $ref_type;
	foreach $type (@event_types) {
	    printf "%6d", $event_type_statistics{$ref_type}{$type};
	}
	print "     ";
	foreach $type (@event_types) {
	    printf "%6.1f", 100*$event_type_statistics{$ref_type}{$type} /
		max($event_type_total{$ref_type},1);
	}
	print "\n";
    }

# modality attributes
    my ($modality, $ref_modality);
    my (%event_modality_total);
    foreach $ref_modality (@event_modalities) {
	foreach $modality (@event_modalities) {
	    $event_modality_statistics{$ref_modality}{$modality} = 0 unless defined $event_modality_statistics{$ref_modality}{$modality};
	    $event_modality_total{$ref_modality} += $event_modality_statistics{$ref_modality}{$modality};
	}
    }
    print "\nEvent Modality confusion matrix for \"$source_type\" sources (for mapped events):\n"
	."             COUNT", "." x (6*(@event_modalities-1)), "    PERCENT", "." x (6*(@event_modalities-1)), "\n    ref\\tst:";
    foreach $modality (@event_modalities) {
	printf " %5.5s", $modality;
    }
    print "     ";
    foreach $modality (@event_modalities) {
	printf " %5.5s", $modality;
    }
    print "\n";
    foreach $ref_modality (@event_modalities) {
	printf "%11.11s ", $ref_modality;
	foreach $modality (@event_modalities) {
	    printf "%6d", $event_modality_statistics{$ref_modality}{$modality};
	}
	print "     ";
	foreach $modality (@event_modalities) {
	    printf "%6.1f", 100*$event_modality_statistics{$ref_modality}{$modality} /
		max($event_modality_total{$ref_modality},1);
	}
	print "\n";
    }

# participant role attributes
    my ($role, $ref_role);
    my (%participant_role_total);
    my @participant_roles = (@event_participant_roles, "<UNMAPPED>");
    foreach $ref_role (@participant_roles) {
	foreach $role (@participant_roles) {
	    $participant_role_statistics{$ref_role}{$role} = 0 unless defined $participant_role_statistics{$ref_role}{$role};
	    $participant_role_total{$ref_role} += $participant_role_statistics{$ref_role}{$role};
	}
    }
    print "\nEvent Role confusion matrix for \"$source_type\" sources (for mapped events):\n"
	."             COUNT", "." x (6*(@participant_roles-1)), "    PERCENT", "." x (6*(@participant_roles-1)), "\n    ref\\tst:";
    foreach $role (@participant_roles) {
	printf " %5.5s", $role;
    }
    print "     ";
    foreach $role (@participant_roles) {
	printf " %5.5s", $role;
    }
    print "\n";
    foreach $ref_role (@participant_roles) {
	printf "%11.11s ", $ref_role;
	foreach $role (@participant_roles) {
	    printf "%6d", $participant_role_statistics{$ref_role}{$role};
	}
	print "     ";
	foreach $role (@participant_roles) {
	    printf "%6.1f", 100*$participant_role_statistics{$ref_role}{$role} /
		max($participant_role_total{$ref_role},1);
	}
	print "\n";
    }
}

#################################

sub entity_value_type {

    my ($value) = @_;
    my ($value_type);

    foreach my $type (@entity_value_types) {
	my $upper_value = $value_type = $type;
	$upper_value =~ s/.*[^0-9\.]+//;
	return $value_type if $value < $upper_value;
    }
    return $value_type;
}

#################################

sub entity_count_type {

    my ($entity) = @_;
    my ($count, $count_type);

#determine the number of mentions of this entity in the document
    $count = @{$entity->{mentions}};

    foreach my $type (@entity_count_types) {
	my $upper_count = $count_type = $type;
	$upper_count =~ s/.*[^0-9\.]+//;
	return $count_type if $count <= $upper_count;
    }
    return $count_type;
}

#################################

sub relation_count_type {

    my ($relation) = @_;
    my ($count, $count_type);

#determine the number of mentions of this relation in the document
    $count = @{$relation->{mentions}};

    foreach my $type (@relation_count_types) {
	my $upper_count = $count_type = $type;
	$upper_count =~ s/.*[^0-9\.]+//;
	return $count_type if $count <= $upper_count;
    }
    return $count_type;
}

#################################

sub event_count_type {

    my ($event) = @_;
    my ($count, $count_type);

#determine the number of mentions of this relation in the document
    $count = @{$event->{mentions}};

    foreach my $type (@event_count_types) {
	my $upper_count = $count_type = $type;
	$upper_count =~ s/.*[^0-9\.]+//;
	return $count_type if $count <= $upper_count;
    }
    return $count_type;
}

#################################

sub crossdoc_type {

    my ($entity) = @_;
    my ($count, $count_type);

#determine the number of documents in which this entity is mentioned
    $count = scalar keys %{$entity->{documents}};

    foreach my $type (@xdoc_types) {
	my $upper_count = $count_type = $type;
	$upper_count =~ s/.*[^0-9\.]+//;
	return $count_type if $count <= $upper_count;
    }
    return $count_type;
}

#################################

sub conditional_EDT_statistics {

    my ($cond1, $cond2) = @_;

    my ($ref_entity, $tst_entity, $ref_occ, $tst_occ, $ref_id, $tst_id, $doc);
    my ($ref_mentions, $tst_mentions, $ref_mention, $tst_mention, $mention_type);
    my ($ref_names, $tst_names, $ref_name, $tst_name);
    my ($entity_type, $entity_subtype, $entity_value, $count_type, $entity_class);
    my ($src_type, $entity_origin, $xdoc_type);
    my ($role_type, $style_type);
    my ($norm_cost, $cost, $err_type);
    my (%error_count, %cumulative_cost, %normalizing_cost);
    my ($c1_value, $c2_value, $value);

#initialize output statistics
    undef %entity_type_statistics;
    undef %entity_subtype_statistics;
    undef %entity_class_statistics;
    undef %name_detection_statistics;
    undef %mention_role_statistics;
    undef %mention_style_statistics;
    undef %mention_detection_statistics;

    my @c1_types = $cond1 eq "ENTITY TYPE" ? @entity_types :
                   $cond1 eq "ENTITY VALUE" ? @entity_value_types :
                   $cond1 eq "COUNT TYPE" ? @entity_count_types :
                   $cond1 eq "SOURCE TYPE" ? keys %source_types :
                   $cond1 eq "ENTITY ORIGIN" ? @origins :
                   $cond1 eq "ENTITY CLASS" ? @entity_classes :
                   $cond1 eq "CROSS-DOC TYPE" ? @xdoc_types : undef;
    die "\n\nFATAL ERROR in conditional_EDT_statistics:  '$cond1' is not a recognized condition\n\n"
	unless @c1_types;
    if ($cond2) {
        my @c2_types = $cond2 eq "ENTITY TYPE" ? @entity_types :
                       $cond2 eq "ENTITY VALUE" ? @entity_value_types :
                       $cond2 eq "COUNT TYPE" ? @entity_count_types :
                       $cond2 eq "SOURCE TYPE" ? keys %source_types :
                       $cond2 eq "ENTITY ORIGIN" ? @origins :
                       $cond2 eq "ENTITY CLASS" ? @entity_classes :
                       $cond2 eq "CROSS-DOC TYPE" ? @xdoc_types : undef;
	die "\n\nFATAL ERROR in conditional_EDT_statistics:  '$cond2' is not a recognized condition\n\n"
	    unless @c2_types;
	foreach $c2_value (@c2_types, "ALL") {
	    foreach $c1_value (@c1_types, "total") {
		$normalizing_cost{$c2_value}{$c1_value} = 0;
		foreach $err_type (@error_types) {
		    $error_count{$c2_value}{$c1_value}{$err_type} = 0;
		    $cumulative_cost{$c2_value}{$c1_value}{$err_type} = 0;
		}
	    }
	}
    }
    else {
	foreach $c1_value (@c1_types, "total") {
	    $normalizing_cost{$c1_value} = 0;
	    foreach $err_type (@error_types) {
		$error_count{$c1_value}{$err_type} = 0;
		$cumulative_cost{$c1_value}{$err_type} = 0;
	    }
	}
    }

#accumulate recognition statistics over all entity occurrences in all documents
    foreach $ref_id (keys %{$ref_database{entities}}) {
	$ref_entity = $ref_database{entities}{$ref_id};
	$entity_type = $ref_entity->{TYPE};
	$entity_subtype = $ref_entity->{SUBTYPE};
	$entity_class = $ref_entity->{CLASS};
	$entity_origin = $ref_entity->{ORIGIN};
	$xdoc_type = crossdoc_type ($ref_entity);
	foreach $doc (keys %{$ref_entity->{documents}}) {
	    next unless exists $eval_docs{$doc};
	    $ref_occ = $ref_entity->{documents}{$doc};
	    $src_type = $ref_occ->{SOURCE};
	    $entity_value = entity_value_type($ref_occ->{VALUE});
	    $count_type = entity_count_type ($ref_occ);
	    if ($tst_occ = $ref_occ->{MAP}) {

#update entity attribute recognition statistics
		$entity_type_statistics{$entity_type}{$tst_occ->{TYPE}}++;
		$entity_subtype_statistics{$entity_subtype}{$tst_occ->{TYPE}}++;
		$entity_class_statistics{$entity_class}{$tst_occ->{CLASS}}++;

#update entity name statistics
		$ref_names = $ref_occ->{names};
		foreach $ref_name (@$ref_names) {
		    $err_type = "miss";
		    $tst_name = $ref_name->{MAP};
		    $err_type = $tst_name ? (extent_mismatch($ref_name->{locator}, $tst_name->{locator}) <= 1 ?
					     "correct" : "error") : "miss";
		    $name_detection_statistics{$entity_type}{$err_type}++;
		}
		$tst_names = $tst_occ->{names};
		foreach $tst_name (@$tst_names) {
		    $name_detection_statistics{$entity_type}{fa}++ unless $tst_name->{MAP};
		}

#update entity mention statistics
		$ref_mentions = $ref_occ->{mentions};
		$tst_mentions = $tst_occ->{mentions};
		foreach $ref_mention (@$ref_mentions) {
		    $mention_type = $ref_mention->{TYPE};
		    $role_type = $ref_mention->{ROLE};
		    $style_type = $ref_mention->{STYLE};
		    $err_type = "miss";
		    if ($ref_mention->{MAP}) {
			$mention_role_statistics{$entity_type}{$role_type}{$ref_mention->{MAP}{ROLE}}++;
			$mention_style_statistics{$style_type}{$ref_mention->{MAP}{STYLE}}++;
			$err_type = extent_mismatch ($ref_mention->{extent}{locator}, $ref_mention->{MAP}->{extent}{locator}) <= 1 ?
			    "correct" : "error";
		    }
		    $mention_detection_statistics{$mention_type}{$role_type}{$style_type}{$err_type}++;
		}
		foreach $tst_mention (@$tst_mentions) {
		    next if $tst_mention->{MAP};
		    $mention_type = $tst_mention->{TYPE};
		    $role_type = $tst_mention->{ROLE};
		    $style_type = $tst_mention->{STYLE};
		    $mention_detection_statistics{$mention_type}{$role_type}{$style_type}{fa}++;
		}
	    }

#update entity detection statistics
	    $cost = $norm_cost = $ref_occ->{VALUE};
	    $cost -= $mapped_entity_document_value{$doc}{$ref_occ->{ID}}{$tst_occ->{ID}} if $tst_occ;
	    $err_type = $tst_occ ? (($entity_type eq $tst_occ->{TYPE} and
				     $entity_subtype eq $tst_occ->{SUBTYPE}) ? "correct" : "error") : "miss";
	    $c1_value = $cond1 eq "ENTITY TYPE" ? $entity_type :
		        $cond1 eq "ENTITY VALUE" ? $entity_value :
			$cond1 eq "COUNT TYPE" ? $count_type :
	                $cond1 eq "SOURCE TYPE" ? $src_type :
			$cond1 eq "ENTITY ORIGIN" ? $entity_origin :
			$cond1 eq "ENTITY CLASS" ? $entity_class :
		        $cond1 eq "CROSS-DOC TYPE" ? $xdoc_type : undef;
	    if ($cond2) {
		$c2_value = $cond2 eq "ENTITY TYPE" ? $entity_type :
		            $cond2 eq "ENTITY VALUE" ? $entity_value :
                            $cond2 eq "COUNT TYPE" ? $count_type :
	                    $cond2 eq "SOURCE TYPE" ? $src_type :
                            $cond2 eq "ENTITY ORIGIN" ? $entity_origin :
                            $cond2 eq "ENTITY CLASS" ? $entity_class :
                            $cond2 eq "CROSS-DOC TYPE" ? $xdoc_type : undef;

		$error_count{$c2_value}{$c1_value}{$err_type}++;
	        $error_count{$c2_value}  {total}  {$err_type}++;
		$error_count   {ALL}   {$c1_value}{$err_type}++;
		$error_count   {ALL}     {total}  {$err_type}++;
		    
		$cumulative_cost{$c2_value}{$c1_value}{$err_type} += $cost;
		$cumulative_cost{$c2_value}  {total}  {$err_type} += $cost;
		$cumulative_cost   {ALL}   {$c1_value}{$err_type} += $cost;
		$cumulative_cost   {ALL}     {total}  {$err_type} += $cost;
		
		$normalizing_cost{$c2_value}{$c1_value} += $norm_cost;
		$normalizing_cost{$c2_value}  {total}   += $norm_cost;
		$normalizing_cost   {ALL}   {$c1_value} += $norm_cost;
		$normalizing_cost   {ALL}     {total}   += $norm_cost;
	    }
	    else {
		$error_count{$c1_value}{$err_type}++;
		$error_count  {total}  {$err_type}++;
		
		$cumulative_cost{$c1_value}{$err_type} += $cost;
		$cumulative_cost  {total}  {$err_type} += $cost;
		
		$normalizing_cost{$c1_value} += $norm_cost;
		$normalizing_cost  {total}   += $norm_cost;
	    }
	}
    }

#update entity false alarm statistics
    foreach $tst_id (keys %{$tst_database{entities}}) {
	$tst_entity = $tst_database{entities}{$tst_id};
	$entity_type = $tst_entity->{TYPE};
	$entity_class = $tst_entity->{CLASS};
	$entity_origin = $tst_entity->{ORIGIN};
	$xdoc_type = crossdoc_type ($tst_entity);
	foreach $doc (keys %{$tst_entity->{documents}}) {
	    next unless exists $eval_docs{$doc};
	    $tst_occ = $tst_entity->{documents}{$doc};
	    next if $tst_occ->{MAP};
	    $src_type = $tst_occ->{SOURCE};
	    $entity_value = entity_value_type($tst_occ->{VALUE});
	    $count_type = entity_count_type ($tst_occ);
	    $norm_cost = 0;
	    $cost = -$tst_occ->{FA_VALUE};
	    $err_type = "fa";
	    $c1_value = $cond1 eq "ENTITY TYPE" ? $entity_type :
		        $cond1 eq "ENTITY VALUE" ? $entity_value :
			$cond1 eq "COUNT TYPE" ? $count_type :
	                $cond1 eq "SOURCE TYPE" ? $src_type :
			$cond1 eq "ENTITY ORIGIN" ? $entity_origin :
			$cond1 eq "ENTITY CLASS" ? $entity_class :
		        $cond1 eq "CROSS-DOC TYPE" ? $xdoc_type : undef;
	    if ($cond2) {
		$c2_value = $cond2 eq "ENTITY TYPE" ? $entity_type :
		            $cond2 eq "ENTITY VALUE" ? $entity_value :
                            $cond2 eq "COUNT TYPE" ? $count_type :
	                    $cond2 eq "SOURCE TYPE" ? $src_type :
                            $cond2 eq "ENTITY ORIGIN" ? $entity_origin :
                            $cond2 eq "ENTITY CLASS" ? $entity_class :
                            $cond2 eq "CROSS-DOC TYPE" ? $xdoc_type : undef;

		$error_count{$c2_value}{$c1_value}{$err_type}++;
		$error_count{$c2_value}  {total}  {$err_type}++;
		$error_count   {ALL}   {$c1_value}{$err_type}++;
		$error_count   {ALL}     {total}  {$err_type}++;
		
		$cumulative_cost{$c2_value}{$c1_value}{$err_type} += $cost;
		$cumulative_cost{$c2_value}  {total}  {$err_type} += $cost;
		$cumulative_cost   {ALL}   {$c1_value}{$err_type} += $cost;
		$cumulative_cost   {ALL}     {total}  {$err_type} += $cost;
		
		$normalizing_cost{$c2_value}{$c1_value} += $norm_cost;
		$normalizing_cost{$c2_value}  {total}   += $norm_cost;
		$normalizing_cost   {ALL}   {$c1_value} += $norm_cost;
		$normalizing_cost   {ALL}     {total}   += $norm_cost;
	    }
	    else {
		$error_count{$c1_value}{$err_type}++;
		$error_count  {total}  {$err_type}++;
		
		$cumulative_cost{$c1_value}{$err_type} += $cost;
		$cumulative_cost  {total}  {$err_type} += $cost;
		
		$normalizing_cost{$c1_value} += $norm_cost;
		$normalizing_cost  {total}   += $norm_cost;
	    }
	}
    }
    return ({%error_count}, {%cumulative_cost}, {%normalizing_cost});
}

#################################

sub conditional_entity_link_statistics {

    my ($cond1, $cond2) = @_;

    my ($ref_entity, $tst_entity, $ref_occ, $tst_occ, $ref_id, $tst_id, $doc);
    my ($entity_type, $entity_value, $count_type, $entity_class);
    my ($src_type, $entity_origin, $xdoc_type);
    my ($norm_cost, $cost, $err_type);
    my (%error_count, %cumulative_cost, %normalizing_cost);
    my ($c1_value, $c2_value, $value);

#initialize output statistics
    my @c1_types = $cond1 eq "ENTITY TYPE" ? @entity_types :
                   $cond1 eq "ENTITY VALUE" ? @entity_value_types :
                   $cond1 eq "COUNT TYPE" ? @entity_count_types :
                   $cond1 eq "SOURCE TYPE" ? keys %source_types :
                   $cond1 eq "ENTITY ORIGIN" ? @origins :
                   $cond1 eq "ENTITY CLASS" ? @entity_classes :
                   $cond1 eq "CROSS-DOC TYPE" ? @xdoc_types : undef;
    die "\n\nFATAL ERROR in conditional_entity_link_statistics:  '$cond1' is not a recognized condition\n\n"
	unless @c1_types;
    if ($cond2) {
        my @c2_types = $cond2 eq "ENTITY TYPE" ? @entity_types :
                       $cond2 eq "ENTITY VALUE" ? @entity_value_types :
                       $cond2 eq "COUNT TYPE" ? @entity_count_types :
                       $cond2 eq "SOURCE TYPE" ? keys %source_types :
                       $cond2 eq "ENTITY ORIGIN" ? @origins :
                       $cond2 eq "ENTITY CLASS" ? @entity_classes :
                       $cond2 eq "CROSS-DOC TYPE" ? @xdoc_types : undef;
	die "\n\nFATAL ERROR in conditional_entity_link_statistics:  '$cond2' is not a recognized condition\n\n"
	    unless @c2_types;
	foreach $c2_value (@c2_types, "ALL") {
	    foreach $c1_value (@c1_types, "total") {
		$normalizing_cost{$c2_value}{$c1_value} = 0;
		foreach $err_type (@error_types) {
		    $error_count{$c2_value}{$c1_value}{$err_type} = 0;
		    $cumulative_cost{$c2_value}{$c1_value}{$err_type} = 0;
		}
	    }
	}
    }
    else {
	foreach $c1_value (@c1_types, "total") {
	    $normalizing_cost{$c1_value} = 0;
	    foreach $err_type (@error_types) {
		$error_count{$c1_value}{$err_type} = 0;
		$cumulative_cost{$c1_value}{$err_type} = 0;
	    }
	}
    }

#accumulate recognition statistics over all entity occurrences in all documents
    foreach $ref_id (keys %{$ref_database{entities}}) {
	$ref_entity = $ref_database{entities}{$ref_id};
	$entity_type = $ref_entity->{TYPE};
	$entity_class = $ref_entity->{CLASS};
	$entity_origin = $ref_entity->{ORIGIN};
	$tst_entity = $ref_entity->{MAP};
	$xdoc_type = crossdoc_type ($ref_entity);
	foreach $doc (keys %{$ref_entity->{documents}}) {
	    next unless exists $eval_docs{$doc};
	    $ref_occ = $ref_entity->{documents}{$doc};
	    $src_type = $ref_occ->{SOURCE};
	    $entity_value = entity_value_type($ref_occ->{VALUE});
	    $count_type = entity_count_type ($ref_occ);

#update entity detection statistics
	    $tst_occ = $ref_occ->{MAP};
	    $cost = $norm_cost = $ref_occ->{VALUE};
	    $cost -= $mapped_entity_document_value{$doc}{$ref_occ->{ID}}{$tst_occ->{ID}} if $tst_occ;
	    $err_type = $tst_occ ? ((($ref_entity->{ORIGIN} eq "DATABASE" and $tst_entity->{ID} eq $ref_id) or
				     ($ref_entity->{ORIGIN} eq "CORPUS" and $tst_entity->{ORIGIN} eq "CORPUS")) ?
				    "correct" : "error") : "miss";
	    $c1_value = $cond1 eq "ENTITY TYPE" ? $entity_type :
		        $cond1 eq "ENTITY VALUE" ? $entity_value :
			$cond1 eq "COUNT TYPE" ? $count_type :
	                $cond1 eq "SOURCE TYPE" ? $src_type :
			$cond1 eq "ENTITY ORIGIN" ? $entity_origin :
			$cond1 eq "ENTITY CLASS" ? $entity_class :
		        $cond1 eq "CROSS-DOC TYPE" ? $xdoc_type : undef;
	    if ($cond2) {
		$c2_value = $cond2 eq "ENTITY TYPE" ? $entity_type :
		            $cond2 eq "ENTITY VALUE" ? $entity_value :
                            $cond2 eq "COUNT TYPE" ? $count_type :
	                    $cond2 eq "SOURCE TYPE" ? $src_type :
                            $cond2 eq "ENTITY ORIGIN" ? $entity_origin :
                            $cond2 eq "ENTITY CLASS" ? $entity_class :
                            $cond2 eq "CROSS-DOC TYPE" ? $xdoc_type : undef;

		$error_count{$c2_value}{$c1_value}{$err_type}++;
	        $error_count{$c2_value}  {total}  {$err_type}++;
		$error_count   {ALL}   {$c1_value}{$err_type}++;
		$error_count   {ALL}     {total}  {$err_type}++;
		    
		$cumulative_cost{$c2_value}{$c1_value}{$err_type} += $cost;
		$cumulative_cost{$c2_value}  {total}  {$err_type} += $cost;
		$cumulative_cost   {ALL}   {$c1_value}{$err_type} += $cost;
		$cumulative_cost   {ALL}     {total}  {$err_type} += $cost;
		
		$normalizing_cost{$c2_value}{$c1_value} += $norm_cost;
		$normalizing_cost{$c2_value}  {total}   += $norm_cost;
		$normalizing_cost   {ALL}   {$c1_value} += $norm_cost;
		$normalizing_cost   {ALL}     {total}   += $norm_cost;
	    }
	    else {
		$error_count{$c1_value}{$err_type}++;
		$error_count  {total}  {$err_type}++;
		
		$cumulative_cost{$c1_value}{$err_type} += $cost;
		$cumulative_cost  {total}  {$err_type} += $cost;
		
		$normalizing_cost{$c1_value} += $norm_cost;
		$normalizing_cost  {total}   += $norm_cost;
	    }
	}
    }

#update entity false alarm statistics
    foreach $tst_id (keys %{$tst_database{entities}}) {
	$tst_entity = $tst_database{entities}{$tst_id};
	$entity_type = $tst_entity->{TYPE};
	$entity_class = $tst_entity->{CLASS};
	$entity_origin = $tst_entity->{ORIGIN};
	$xdoc_type = crossdoc_type ($tst_entity);
	foreach $doc (keys %{$tst_entity->{documents}}) {
	    next unless exists $eval_docs{$doc};
	    $tst_occ = $tst_entity->{documents}{$doc};
	    $src_type = $tst_occ->{SOURCE};
	    next if $tst_occ->{MAP};
	    $entity_value = entity_value_type($tst_occ->{VALUE});
	    $count_type = entity_count_type ($tst_occ);
	    $err_type = "fa";
	    $cost = -$tst_occ->{FA_VALUE};
	    $norm_cost = 0;
	    $c1_value = $cond1 eq "ENTITY TYPE" ? $entity_type :
		        $cond1 eq "ENTITY VALUE" ? $entity_value :
			$cond1 eq "COUNT TYPE" ? $count_type :
	                $cond1 eq "SOURCE TYPE" ? $src_type :
			$cond1 eq "ENTITY ORIGIN" ? $entity_origin :
			$cond1 eq "ENTITY CLASS" ? $entity_class :
		        $cond1 eq "CROSS-DOC TYPE" ? $xdoc_type : undef;
	    if ($cond2) {
		$c2_value = $cond2 eq "ENTITY TYPE" ? $entity_type :
		            $cond2 eq "ENTITY VALUE" ? $entity_value :
                            $cond2 eq "COUNT TYPE" ? $count_type :
	                    $cond2 eq "SOURCE TYPE" ? $src_type :
                            $cond2 eq "ENTITY ORIGIN" ? $entity_origin :
                            $cond2 eq "ENTITY CLASS" ? $entity_class :
                            $cond2 eq "CROSS-DOC TYPE" ? $xdoc_type : undef;

		$error_count{$c2_value}{$c1_value}{$err_type}++;
		$error_count{$c2_value}  {total}  {$err_type}++;
		$error_count   {ALL}   {$c1_value}{$err_type}++;
		$error_count   {ALL}     {total}  {$err_type}++;
		
		$cumulative_cost{$c2_value}{$c1_value}{$err_type} += $cost;
		$cumulative_cost{$c2_value}  {total}  {$err_type} += $cost;
		$cumulative_cost   {ALL}   {$c1_value}{$err_type} += $cost;
		$cumulative_cost   {ALL}     {total}  {$err_type} += $cost;
		
		$normalizing_cost{$c2_value}{$c1_value} += $norm_cost;
		$normalizing_cost{$c2_value}  {total}   += $norm_cost;
		$normalizing_cost   {ALL}   {$c1_value} += $norm_cost;
		$normalizing_cost   {ALL}     {total}   += $norm_cost;
	    }
	    else {
		$error_count{$c1_value}{$err_type}++;
		$error_count  {total}  {$err_type}++;
		
		$cumulative_cost{$c1_value}{$err_type} += $cost;
		$cumulative_cost  {total}  {$err_type} += $cost;
		
		$normalizing_cost{$c1_value} += $norm_cost;
		$normalizing_cost  {total}   += $norm_cost;
	    }
	}
    }
    return ({%error_count}, {%cumulative_cost}, {%normalizing_cost});
}

#################################

sub conditional_RDC_statistics {

    my ($cond1, $cond2) = @_;

    my ($ref_relation, $tst_relation, $ref_occ, $tst_occ, $ref_id, $tst_id, $doc);
    my ($ref_mentions, $tst_mentions, $ref_mention, $tst_mention);
    my ($relation_type, $relation_subtype, $count_type, $relation_class, $relation_origin);
    my ($src_type, $xdoc_type, $type_err, $subt_err, $time_err);
    my ($narg_errs, $arg_errs);
    my ($norm_value, $cost, $err_type);
    my (%error_count, %cumulative_cost, %normalizing_cost);
    my ($c1_value, $c2_value);

#initialize output statistics
    undef %relation_type_statistics;
    undef %relation_subtype_statistics;
    undef %relation_class_statistics;

    my @c1_types = $cond1 eq "RELATION TYPE" ? @relation_types :
                   $cond1 eq "RELATION CLASS" ? @relation_class_types :
                   $cond1 eq "COUNT TYPE" ? @relation_count_types :
                   $cond1 eq "SOURCE TYPE" ? keys %source_types :
                   $cond1 eq "ARGUMENT ERRORS" ? @relation_argument_errors : undef;
    die "\n\nFATAL ERROR in conditional_RDC_statistics:  '$cond1' is not a recognized condition\n\n"
	unless @c1_types;
    if ($cond2) {
	my @c2_types = $cond2 eq "RELATION TYPE" ? @relation_types :
                       $cond2 eq "RELATION CLASS" ? @relation_class_types :
                       $cond2 eq "COUNT TYPE" ? @relation_count_types :
                       $cond2 eq "SOURCE TYPE" ? keys %source_types :
                       $cond2 eq "ARGUMENT ERRORS" ? @relation_argument_errors : undef;
	die "\n\nFATAL ERROR in conditional_RDC_statistics:  '$cond2' is not a recognized condition\n\n"
	    unless @c2_types;
	foreach $c2_value (@c2_types, "ALL") {
	    foreach $c1_value (@c1_types, "total") {
		$normalizing_cost{$c2_value}{$c1_value} = 0;
		foreach $err_type (@error_types) {
		    $error_count{$c2_value}{$c1_value}{$err_type} = 0;
		    $cumulative_cost{$c2_value}{$c1_value}{$err_type} = 0;
		}
	    }
	}
    }
    else {
	foreach $c1_value (@c1_types, "total") {
	    $normalizing_cost{$c1_value} = 0;
	    foreach $err_type (@error_types) {
		$error_count{$c1_value}{$err_type} = 0;
		$cumulative_cost{$c1_value}{$err_type} = 0;
	    }
	}
    }

#accumulate recognition statistics over all relation occurrences in all documents
    foreach $ref_id (keys %{$ref_database{relations}}) {
	$ref_relation = $ref_database{relations}{$ref_id};
	$relation_type = $ref_relation->{TYPE};
	$relation_subtype = $ref_relation->{SUBTYPE};
	$relation_class = $ref_relation->{CLASS};
	$relation_origin = $ref_relation->{ORIGIN};
	$xdoc_type = crossdoc_type ($ref_relation);
	foreach $doc (keys %{$ref_relation->{documents}}) {
	    next unless exists $eval_docs{$doc};
	    $ref_occ = $ref_relation->{documents}{$doc};
	    $src_type = $ref_occ->{SOURCE};
	    $count_type = relation_count_type ($ref_occ);
	    if ($tst_occ=$ref_occ->{MAP}) {

#update relation attribute recognition statistics
		$relation_type_statistics{$relation_type}{$tst_occ->{TYPE}}++;
		$relation_subtype_statistics{$relation_type}{$relation_subtype}{$tst_occ->{SUBTYPE}}++;
		$relation_class_statistics{$relation_class}{$tst_occ->{CLASS}}++;

#update relation detection statistics		
		$cost = $norm_value = $ref_occ->{VALUE};
		$cost -= $mapped_relation_document_value{$doc}{$ref_occ->{ID}}{$tst_occ->{ID}};
		$type_err = $relation_type ne $tst_occ->{TYPE};
		$subt_err = $relation_subtype ne $tst_occ->{SUBTYPE};
		$time_err = (num_relation_time_mismatches ($ref_relation->{times}, $ref_relation->{MAP}{times}) == 0) ? "correct" : "error";
		$narg_errs = num_relation_argument_mapping_errors ($ref_relation, $ref_relation->{MAP});
		$arg_errs = $narg_errs > 1 ? ">1" : $narg_errs;
		$err_type = ($arg_errs or $type_err eq "error" or $subt_err eq "error") ? "error" : "correct";
	    }
	    else {
		$arg_errs = 0;
		$err_type = "miss";
		$cost = $norm_value = $ref_occ->{VALUE};
	    }

	    $c1_value = $cond1 eq "RELATION TYPE" ? $relation_type :
                        $cond1 eq "RELATION CLASS" ? $relation_class :
                        $cond1 eq "COUNT TYPE" ? $count_type :
                        $cond1 eq "SOURCE TYPE" ? $src_type :
                        $cond1 eq "ARGUMENT ERRORS" ? $arg_errs : undef;
	    if ($cond2) {
		$c2_value = $cond2 eq "RELATION TYPE" ? $relation_type :
                            $cond2 eq "RELATION CLASS" ? $relation_class :
                            $cond2 eq "COUNT TYPE" ? $count_type :
                            $cond2 eq "SOURCE TYPE" ? $src_type :
                            $cond2 eq "ARGUMENT ERRORS" ? $arg_errs : undef;

		$error_count{$c2_value}{$c1_value}{$err_type}++;
		$error_count{$c2_value}  {total}  {$err_type}++;
		$error_count   {ALL}   {$c1_value}{$err_type}++;
		$error_count   {ALL}     {total}  {$err_type}++;
		
		$cumulative_cost{$c2_value}{$c1_value}{$err_type} += $cost;
		$cumulative_cost{$c2_value}  {total}  {$err_type} += $cost;
		$cumulative_cost   {ALL}   {$c1_value}{$err_type} += $cost;
		$cumulative_cost   {ALL}     {total}  {$err_type} += $cost;
		
		$normalizing_cost{$c2_value}{$c1_value} += $norm_value;
		$normalizing_cost{$c2_value}  {total}   += $norm_value;
		$normalizing_cost   {ALL}   {$c1_value} += $norm_value;
		$normalizing_cost   {ALL}     {total}   += $norm_value;
	    }
	    else {
		$error_count{$c1_value}{$err_type}++;
		$error_count  {total} {$err_type}++;
		
		$cumulative_cost{$c1_value}{$err_type} += $cost;
		$cumulative_cost  {total} {$err_type} += $cost;
		
		$normalizing_cost{$c1_value} += $norm_value;
		$normalizing_cost  {total}  += $norm_value;
	    }
	}
    }

#update relation false alarm statistics
    foreach $tst_id (keys %{$tst_database{relations}}) {
	$tst_relation = $tst_database{relations}{$tst_id};
	$relation_type = $tst_relation->{TYPE};
	$relation_subtype = $tst_relation->{SUBTYPE};
	$relation_class = $tst_relation->{CLASS};
	$relation_origin = $tst_relation->{ORIGIN};
	$xdoc_type = crossdoc_type ($tst_relation);
	foreach $doc (keys %{$tst_relation->{documents}}) {
	    next unless exists $eval_docs{$doc};
	    $tst_occ = $tst_relation->{documents}{$doc};
	    next if $tst_occ->{MAP};
	    $src_type = $tst_occ->{SOURCE};
	    $count_type = relation_count_type ($tst_occ);
	    $arg_errs = 0;
	    $norm_value = 0;
	    $cost = $relation_fa_wgt * $tst_occ->{VALUE};
	    $err_type = "fa";

	    $c1_value = $cond1 eq "RELATION TYPE" ? $relation_type :
                        $cond1 eq "RELATION CLASS" ? $relation_class :
                        $cond1 eq "COUNT TYPE" ? $count_type :
                        $cond1 eq "SOURCE TYPE" ? $src_type :
                        $cond1 eq "ARGUMENT ERRORS" ? $arg_errs : undef;
	    if ($cond2) {
		$c2_value = $cond2 eq "RELATION TYPE" ? $relation_type :
                            $cond2 eq "RELATION CLASS" ? $relation_class :
                            $cond2 eq "COUNT TYPE" ? $count_type :
                            $cond2 eq "SOURCE TYPE" ? $src_type :
                            $cond2 eq "ARGUMENT ERRORS" ? $arg_errs : undef;

		$error_count{$c2_value}{$c1_value}{$err_type}++;
		$error_count{$c2_value}  {total}  {$err_type}++;
		$error_count   {ALL}   {$c1_value}{$err_type}++;
		$error_count   {ALL}     {total}  {$err_type}++;
		
		$cumulative_cost{$c2_value}{$c1_value}{$err_type} += $cost;
		$cumulative_cost{$c2_value}  {total}  {$err_type} += $cost;
		$cumulative_cost   {ALL}   {$c1_value}{$err_type} += $cost;
		$cumulative_cost   {ALL}     {total}  {$err_type} += $cost;
		
		$normalizing_cost{$c2_value}{$c1_value} += $norm_value;
		$normalizing_cost{$c2_value}  {total}   += $norm_value;
		$normalizing_cost   {ALL}   {$c1_value} += $norm_value;
		$normalizing_cost   {ALL}     {total}   += $norm_value;
	    }
	    else {
		$error_count{$c1_value}{$err_type}++;
		$error_count  {total} {$err_type}++;
		
		$cumulative_cost{$c1_value}{$err_type} += $cost;
		$cumulative_cost  {total} {$err_type} += $cost;
		
		$normalizing_cost{$c1_value} += $norm_value;
		$normalizing_cost  {total}  += $norm_value;
	    }
	}
    }
    return ({%error_count}, {%cumulative_cost}, {%normalizing_cost});
}

#################################

sub num_relation_argument_mapping_errors {

    my ($ref, $tst) = @_;

    my ($score, $arg_order, $narg, $ref_arg, $tst_arg, $nmapped, $num_errors);

    ($score, $arg_order) = relation_document_value ($ref, $tst);

    $nmapped = 0;
    for ($narg=0; $narg<@{$ref->{arguments}}; $narg++) {
	$ref_arg = $ref->{arguments}[$narg];
	$tst_arg = $tst->{arguments}[$arg_order->[$narg]];
	$nmapped++ if ($ref_database{entities}{$ref_arg->{ID}}{MAP} and $tst_arg->{ID} and
		       $ref_database{entities}{$ref_arg->{ID}}{MAP}{ID} eq $tst_arg->{ID});
    }

    $num_errors = @{$ref->{arguments}} + @{$tst->{arguments}} - 2*$nmapped;
    return $num_errors;
}

#################################

sub num_event_participant_mapping_errors {

    my ($ref_event) = @_;

    my ($num_errs, $role, $id, $tst_event, $ref_entity, $ref_participant);
    
    $num_errs = num_event_participants ($ref_event);
    $tst_event = $ref_event->{MAP};
    return $num_errs unless $tst_event;
    $num_errs += num_event_participants ($tst_event);

    foreach $role (sort keys %{$ref_event->{participants}}) {
	foreach $id (sort keys %{$ref_event->{participants}{$role}}) {
	    $ref_entity = $ref_database{entities}{$id};
	    $ref_participant = $ref_event->{participants}{$role}{$id};
	    $num_errs -= 2 if ($ref_participant->{MAP} and
			       $ref_entity->{MAP} and
			       $ref_entity->{MAP}{ID} eq $ref_participant->{MAP}{ID});
	}
    }

    return $num_errs;
}

#################################

sub conditional_EDC_statistics {

    my ($cond1, $cond2) = @_;

    my ($ref_event, $tst_event, $ref_occ, $tst_occ, $ref_id, $tst_id, $doc);
    my ($ref_mentions, $tst_mentions, $ref_mention, $tst_mention);
    my ($event_type, $count_type, $event_modality, $event_origin);
    my ($src_type, $xdoc_type, $type_err, $modality_err);
    my ($nerrs, $participant_errs, $ref_role, $tst_role, $tst_participant);
    my ($norm_value, $cost, $err_type);
    my (%error_count, %cumulative_cost, %normalizing_cost);
    my ($c1_value, $c2_value);

#initialize output statistics
    undef %event_type_statistics;
    undef %event_modality_statistics;
    undef %participant_role_statistics;

    my @c1_types = $cond1 eq "EVENT TYPE" ? @event_types :
                   $cond1 eq "EVENT MODALITY" ? @event_modalities :
                   $cond1 eq "COUNT TYPE" ? @event_count_types :
                   $cond1 eq "SOURCE TYPE" ? keys %source_types :
                   $cond1 eq "PARTICIPANT ERRORS" ? @event_participant_errors : undef;
    die "\n\nFATAL ERROR in conditional_EDC_statistics:  '$cond1' is not a recognized condition\n\n"
	unless @c1_types;
    if ($cond2) {
	my @c2_types = $cond2 eq "EVENT TYPE" ? @event_types :
                       $cond2 eq "EVENT MODALITY" ? @event_modalities :
                       $cond2 eq "COUNT TYPE" ? @event_count_types :
                       $cond2 eq "SOURCE TYPE" ? keys %source_types :
                       $cond2 eq "PARTICIPANT ERRORS" ? @event_participant_errors : undef;
	die "\n\nFATAL ERROR in conditional_EDC_statistics:  '$cond2' is not a recognized condition\n\n"
	    unless @c2_types;
	foreach $c2_value (@c2_types, "ALL") {
	    foreach $c1_value (@c1_types, "total") {
		$normalizing_cost{$c2_value}{$c1_value} = 0;
		foreach $err_type (@error_types) {
		    $error_count{$c2_value}{$c1_value}{$err_type} = 0;
		    $cumulative_cost{$c2_value}{$c1_value}{$err_type} = 0;
		}
	    }
	}
    }
    else {
	foreach $c1_value (@c1_types, "total") {
	    $normalizing_cost{$c1_value} = 0;
	    foreach $err_type (@error_types) {
		$error_count{$c1_value}{$err_type} = 0;
		$cumulative_cost{$c1_value}{$err_type} = 0;
	    }
	}
    }

#accumulate recognition statistics over all event occurrences in all documents
    foreach $ref_id (keys %{$ref_database{events}}) {
	$ref_event = $ref_database{events}{$ref_id};
	$event_type = $ref_event->{TYPE};
	$event_modality = $ref_event->{MODALITY};
	$event_origin = $ref_event->{ORIGIN};
	$xdoc_type = crossdoc_type ($ref_event);
	foreach $doc (keys %{$ref_event->{documents}}) {
	    next unless exists $eval_docs{$doc};
	    $ref_occ = $ref_event->{documents}{$doc};
	    $src_type = $ref_occ->{SOURCE};
	    $count_type = event_count_type ($ref_occ);
	    if ($tst_occ=$ref_occ->{MAP}) {

#update event attribute recognition statistics
		$event_type_statistics{$event_type}{$ref_occ->{MAP}{TYPE}}++;
		$event_modality_statistics{$event_modality}{$ref_occ->{MAP}{MODALITY}}++;
		foreach $ref_role (keys %{$ref_occ->{participants}}) {
		    foreach $ref_id (keys %{$ref_occ->{participants}{$ref_role}}) {
			$tst_participant = $ref_occ->{participants}{$ref_role}{$ref_id}->{MAP};
			$tst_role = $tst_participant ? $tst_participant->{ROLE} : "<UNMAPPED>";
			$participant_role_statistics{$ref_role}{$tst_role}++;
		    }
		}
		foreach $tst_role (keys %{$tst_occ->{participants}}) {
		    foreach $tst_id (keys %{$tst_occ->{participants}{$tst_role}}) {
			next if $tst_occ->{participants}{$tst_role}{$tst_id}->{MAP};
			$participant_role_statistics{"<UNMAPPED>"}{$tst_role}++;
		    }
		}

#update event detection statistics
		$cost = $norm_value = $ref_occ->{VALUE};
		$cost -= $mapped_event_document_value{$doc}{$ref_occ->{ID}}{$tst_occ->{ID}};
		$type_err = $event_type ne $tst_occ->{TYPE};
		$modality_err = $event_modality ne $tst_occ->{MODALITY};
		$nerrs = num_event_participant_mapping_errors ($ref_event, $tst_event);
		$participant_errs = $nerrs > 1 ? ">1" : $nerrs;
		$err_type = ($participant_errs or $type_err or $modality_err) ? "error" : "correct";
	    }
	    else {
		$participant_errs = 0;
		$err_type = "miss";
		$cost = $norm_value = $ref_occ->{VALUE};
	    }

	    $c1_value = $cond1 eq "EVENT TYPE" ? $event_type :
                        $cond1 eq "EVENT MODALITY" ? $event_modality :
                        $cond1 eq "COUNT TYPE" ? $count_type :
                        $cond1 eq "SOURCE TYPE" ? $src_type :
                        $cond1 eq "PARTICIPANT ERRORS" ? $participant_errs : undef;
	    if ($cond2) {
		$c2_value = $cond2 eq "EVENT TYPE" ? $event_type :
                            $cond2 eq "EVENT MODALITY" ? $event_modality :
                            $cond2 eq "COUNT TYPE" ? $count_type :
                            $cond2 eq "SOURCE TYPE" ? $src_type :
                            $cond2 eq "PARTICIPANT ERRORS" ? $participant_errs : undef;

		$error_count{$c2_value}{$c1_value}{$err_type}++;
		$error_count{$c2_value}  {total}  {$err_type}++;
		$error_count   {ALL}   {$c1_value}{$err_type}++;
		$error_count   {ALL}     {total}  {$err_type}++;
		
		$cumulative_cost{$c2_value}{$c1_value}{$err_type} += $cost;
		$cumulative_cost{$c2_value}  {total}  {$err_type} += $cost;
		$cumulative_cost   {ALL}   {$c1_value}{$err_type} += $cost;
		$cumulative_cost   {ALL}     {total}  {$err_type} += $cost;
		
		$normalizing_cost{$c2_value}{$c1_value} += $norm_value;
		$normalizing_cost{$c2_value}  {total}   += $norm_value;
		$normalizing_cost   {ALL}   {$c1_value} += $norm_value;
		$normalizing_cost   {ALL}     {total}   += $norm_value;
	    }
	    else {
		$error_count{$c1_value}{$err_type}++;
		$error_count  {total} {$err_type}++;
		
		$cumulative_cost{$c1_value}{$err_type} += $cost;
		$cumulative_cost  {total} {$err_type} += $cost;
		
		$normalizing_cost{$c1_value} += $norm_value;
		$normalizing_cost  {total}  += $norm_value;
	    }
	}
    }

#update event false alarm statistics
    foreach $tst_id (keys %{$tst_database{events}}) {
	$tst_event = $tst_database{events}{$tst_id};
	$event_type = $tst_event->{TYPE};
	$event_modality = $tst_event->{MODALITY};
	$event_origin = $tst_event->{ORIGIN};
	$xdoc_type = crossdoc_type ($tst_event);
	foreach $doc (keys %{$tst_event->{documents}}) {
	    next unless exists $eval_docs{$doc};
	    $tst_occ = $tst_event->{documents}{$doc};
	    next if $tst_occ->{MAP};
	    $src_type = $tst_occ->{SOURCE};
	    $count_type = event_count_type ($tst_occ);
	    $participant_errs = 0;
	    $norm_value = 0;
	    $cost = $event_fa_wgt * $tst_occ->{VALUE};
	    $err_type = "fa";

	    $c1_value = $cond1 eq "EVENT TYPE" ? $event_type :
                        $cond1 eq "EVENT MODALITY" ? $event_modality :
                        $cond1 eq "COUNT TYPE" ? $count_type :
                        $cond1 eq "SOURCE TYPE" ? $src_type :
                        $cond1 eq "PARTICIPANT ERRORS" ? $participant_errs : undef;
	    if ($cond2) {
		$c2_value = $cond2 eq "EVENT TYPE" ? $event_type :
                            $cond2 eq "EVENT MODALITY" ? $event_modality :
                            $cond2 eq "COUNT TYPE" ? $count_type :
                            $cond2 eq "SOURCE TYPE" ? $src_type :
                            $cond2 eq "PARTICIPANT ERRORS" ? $participant_errs : undef;

		$error_count{$c2_value}{$c1_value}{$err_type}++;
		$error_count{$c2_value}  {total}  {$err_type}++;
		$error_count   {ALL}   {$c1_value}{$err_type}++;
		$error_count   {ALL}     {total}  {$err_type}++;
		
		$cumulative_cost{$c2_value}{$c1_value}{$err_type} += $cost;
		$cumulative_cost{$c2_value}  {total}  {$err_type} += $cost;
		$cumulative_cost   {ALL}   {$c1_value}{$err_type} += $cost;
		$cumulative_cost   {ALL}     {total}  {$err_type} += $cost;
		
		$normalizing_cost{$c2_value}{$c1_value} += $norm_value;
		$normalizing_cost{$c2_value}  {total}   += $norm_value;
		$normalizing_cost   {ALL}   {$c1_value} += $norm_value;
		$normalizing_cost   {ALL}     {total}   += $norm_value;
	    }
	    else {
		$error_count{$c1_value}{$err_type}++;
		$error_count  {total} {$err_type}++;
		
		$cumulative_cost{$c1_value}{$err_type} += $cost;
		$cumulative_cost  {total} {$err_type} += $cost;
		
		$normalizing_cost{$c1_value} += $norm_value;
		$normalizing_cost  {total}  += $norm_value;
	    }
	}
    }
    return ({%error_count}, {%cumulative_cost}, {%normalizing_cost});
}

#################################

sub conditional_TIMEX2_statistics {

    my ($cond1, $cond2) = @_;

    my ($ref_timex2, $tst_timex2, $ref_occ, $tst_occ, $ref_id, $tst_id, $doc);
    my ($ref_mentions, $tst_mentions, $ref_mention, $tst_mention);
    my ($timex2_type, $timex2_value, $count_type, $timex2_class);
    my ($src_type, $timex2_origin);
    my ($norm_cost, $cost, $err_type);
    my (%error_count, %cumulative_cost, %normalizing_cost);
    my ($c1_value, $c2_value, $value);

#initialize output statistics
    undef %attribute_confusion_statistics;
    undef %mention_detection_statistics;

    my @c1_types = $cond1 eq "SOURCE TYPE" ? keys %source_types :
		   $cond1 eq "TIMEX2 ORIGIN" ? @origins : undef;
    die "\n\nFATAL ERROR in conditional_TIMEX2_statistics:  '$cond1' is not a recognized condition\n\n"
	unless @c1_types;
    if ($cond2) {
        my @c2_types = $cond2 eq "SOURCE TYPE" ? keys %source_types :
		       $cond2 eq "TIMEX2 ORIGIN" ? @origins : undef;
	die "\n\nFATAL ERROR in conditional_TIMEX2_statistics:  '$cond2' is not a recognized condition\n\n"
	    unless @c2_types;
	foreach $c2_value (@c2_types, "ALL") {
	    foreach $c1_value (@c1_types, "total") {
		$normalizing_cost{$c2_value}{$c1_value} = 0;
		foreach $err_type (@error_types) {
		    $error_count{$c2_value}{$c1_value}{$err_type} = 0;
		    $cumulative_cost{$c2_value}{$c1_value}{$err_type} = 0;
		}
	    }
	}
    }
    else {
	foreach $c1_value (@c1_types, "total") {
	    $normalizing_cost{$c1_value} = 0;
	    foreach $err_type (@error_types) {
		$error_count{$c1_value}{$err_type} = 0;
		$cumulative_cost{$c1_value}{$err_type} = 0;
	    }
	}
    }

#accumulate recognition statistics over all timex2 occurrences in all documents
    foreach $ref_id (keys %{$ref_database{timex2s}}) {
	$ref_timex2 = $ref_database{timex2s}{$ref_id};
	$timex2_type = $ref_timex2->{TYPE};
	$timex2_origin = $ref_timex2->{ORIGIN};

#update timex2 attribute recognition statistics
	$tst_timex2 = $ref_timex2->{MAP};
	my $attribute_errors;
	if ($tst_timex2) {
	    foreach my $attribute (keys %timex2_attribute_wgt) {
		my $ref_att = $ref_timex2->{attributes}{$attribute};
		$ref_att = "<undef>" unless defined $ref_att;
		my $tst_att = $tst_timex2->{attributes}{$attribute} if $tst_timex2;
		$tst_att = "<undef>" unless defined $tst_att;
		$attribute_confusion_statistics{$attribute}{$ref_att}{$tst_att}++;
		$attribute_errors++ if $ref_att ne $tst_att;
	    }
	}

#update document-level statistics
	foreach $doc (keys %{$ref_timex2->{documents}}) {
	    next unless exists $eval_docs{$doc};
	    $ref_occ = $ref_timex2->{documents}{$doc};
	    $src_type = $ref_occ->{SOURCE};
	    $cost = $norm_cost = $ref_occ->{VALUE};
	    $tst_occ = $ref_occ->{MAP};
	    $cost -= $mapped_timex2_document_value{$doc}{$ref_occ->{ID}}{$tst_occ->{ID}} if $tst_occ;

#update timex2 detection statistics
	    $err_type = $tst_occ ? ($attribute_errors ? "error" : "correct") : "miss";
	    $c1_value = $cond1 eq "TIMEX2 TYPE" ? $timex2_type :
		        $cond1 eq "SOURCE TYPE" ? $src_type :
			$cond1 eq "TIMEX2 ORIGIN" ? $timex2_origin : undef;
	    if ($cond2) {
		$c2_value = $cond2 eq "TIMEX2 TYPE" ? $timex2_type :
	                    $cond2 eq "SOURCE TYPE" ? $src_type :
			    $cond2 eq "TIMEX2 ORIGIN" ? $timex2_origin : undef;

		$error_count{$c2_value}{$c1_value}{$err_type}++;
	        $error_count{$c2_value}  {total}  {$err_type}++;
		$error_count   {ALL}   {$c1_value}{$err_type}++;
		$error_count   {ALL}     {total}  {$err_type}++;
		    
		$cumulative_cost{$c2_value}{$c1_value}{$err_type} += $cost;
		$cumulative_cost{$c2_value}  {total}  {$err_type} += $cost;
		$cumulative_cost   {ALL}   {$c1_value}{$err_type} += $cost;
		$cumulative_cost   {ALL}     {total}  {$err_type} += $cost;
		
		$normalizing_cost{$c2_value}{$c1_value} += $norm_cost;
		$normalizing_cost{$c2_value}  {total}   += $norm_cost;
		$normalizing_cost   {ALL}   {$c1_value} += $norm_cost;
		$normalizing_cost   {ALL}     {total}   += $norm_cost;
	    }
	    else {
		$error_count{$c1_value}{$err_type}++;
		$error_count  {total}  {$err_type}++;
		
		$cumulative_cost{$c1_value}{$err_type} += $cost;
		$cumulative_cost  {total}  {$err_type} += $cost;
		
		$normalizing_cost{$c1_value} += $norm_cost;
		$normalizing_cost  {total}   += $norm_cost;
	    }

#update timex2 mention statistics
	    if ($tst_occ) {
		$ref_mentions = $ref_occ->{mentions};
		$tst_mentions = $tst_occ->{mentions};
		foreach $ref_mention (@$ref_mentions) {
		    $err_type = "miss";
		    if ($ref_mention->{MAP}) {
			$err_type = extent_mismatch ($ref_mention->{extent}{locator}, $ref_mention->{MAP}->{extent}{locator}) <= $epsilon ?
			    "correct" : "error";
		    }
		    $mention_detection_statistics{$err_type}++;
		}
		foreach $tst_mention (@$tst_mentions) {
		    next if $tst_mention->{MAP};
		    $mention_detection_statistics{fa}++;
		}
	    }
	}
    }

#update timex2 false alarm statistics
    foreach $tst_id (keys %{$tst_database{timex2s}}) {
	$tst_timex2 = $tst_database{timex2s}{$tst_id};
	$timex2_type = $tst_timex2->{TYPE};
	$timex2_origin = $tst_timex2->{ORIGIN};
	foreach $doc (keys %{$tst_timex2->{documents}}) {
	    next unless exists $eval_docs{$doc};
	    $tst_occ = $tst_timex2->{documents}{$doc};
	    next if $tst_occ->{MAP};
	    $src_type = $tst_occ->{SOURCE};
	    $norm_cost = 0;
	    $cost = -$tst_occ->{FA_VALUE};
	    $err_type = "fa";
	    $c1_value = $cond1 eq "TIMEX2 TYPE" ? $timex2_type :
	                $cond1 eq "SOURCE TYPE" ? $src_type :
			$cond1 eq "TIMEX2 ORIGIN" ? $timex2_origin : undef;
	    if ($cond2) {
		$c2_value = $cond2 eq "TIMEX2 TYPE" ? $timex2_type :
	                    $cond2 eq "SOURCE TYPE" ? $src_type :
			    $cond2 eq "TIMEX2 ORIGIN" ? $timex2_origin : undef;

		$error_count{$c2_value}{$c1_value}{$err_type}++;
		$error_count{$c2_value}  {total}  {$err_type}++;
		$error_count   {ALL}   {$c1_value}{$err_type}++;
		$error_count   {ALL}     {total}  {$err_type}++;
		
		$cumulative_cost{$c2_value}{$c1_value}{$err_type} += $cost;
		$cumulative_cost{$c2_value}  {total}  {$err_type} += $cost;
		$cumulative_cost   {ALL}   {$c1_value}{$err_type} += $cost;
		$cumulative_cost   {ALL}     {total}  {$err_type} += $cost;
		
		$normalizing_cost{$c2_value}{$c1_value} += $norm_cost;
		$normalizing_cost{$c2_value}  {total}   += $norm_cost;
		$normalizing_cost   {ALL}   {$c1_value} += $norm_cost;
		$normalizing_cost   {ALL}     {total}   += $norm_cost;
	    }
	    else {
		$error_count{$c1_value}{$err_type}++;
		$error_count  {total}  {$err_type}++;
		
		$cumulative_cost{$c1_value}{$err_type} += $cost;
		$cumulative_cost  {total}  {$err_type} += $cost;
		
		$normalizing_cost{$c1_value} += $norm_cost;
		$normalizing_cost  {total}   += $norm_cost;
	    }
	}
    }
    return ({%error_count}, {%cumulative_cost}, {%normalizing_cost});
}

#################################

sub print_entity_mapping {

    my ($ref_db, $tst_db, $label) = @_;
    my ($ref_entity, $tst_entity, $entity, $ref_id, $tst_id, $doc, $ref_occ, $tst_occ, $output);

    print "\n======== $label ========\n\n";
    foreach $ref_id (sort keys %{$ref_db->{entities}}) {
	my $print_data = $print_all_data;
	$output = "--------\n";
	$ref_entity = $ref_db->{entities}{$ref_id};
	if ($tst_entity = $ref_entity->{MAP}) {
	    $tst_id = $tst_entity->{ID};
	    my $err_type = $ref_entity->{TYPE} eq $tst_entity->{TYPE} ? undef : "TYPE";
	    $err_type .= defined $err_type ? "/CLASS" : "CLASS" if ($ref_entity->{CLASS} ne $tst_entity->{CLASS});
	    my $link_err = $ref_database{entities}{$ref_id}{ORIGIN} ne $tst_database{entities}{$tst_id}{ORIGIN};
	    $print_data ||= $print_err_data if ($link_err or $err_type);

	    $output .= (($link_err or $err_type) ? ">>> " : "    ");
	    $output .= sprintf "ref entity $ref_id (%3.3s/%3.3s)\n", $ref_entity->{TYPE}, $ref_entity->{CLASS};

	    $output .= (($link_err or $err_type) ? ">>> " : "    ");
	    $output .= sprintf "tst entity $tst_id (%3.3s/%3.3s)", $tst_entity->{TYPE}, $tst_entity->{CLASS};
	    $output .= $err_type ? " -- ENTITY $err_type MISMATCH" : "";
	    $output .= $link_err ? " -- database LINK ERROR\n" : "\n";

	    $output .= sprintf ("      entity score:%9.5f out of%9.5f\n",
				$mapped_entity_value{$ref_id}{$tst_id}, $ref_entity->{VALUE});
	}
	else {
	    $print_data ||= $print_err_data;
	    $output .= sprintf (">>> ref entity $ref_id (%3.3s/%3.3s) -- NO MATCHING TST ENTITY\n",
				$ref_entity->{TYPE}, $ref_entity->{CLASS});
	    $output .= sprintf "      entity score:  0.00000 out of%9.5f\n", $ref_entity->{VALUE};
	}
	foreach $doc (keys %{$ref_entity->{documents}}) {
	    next unless defined $eval_docs{$doc};
	    $ref_occ = $ref_entity->{documents}{$doc};
	    print_entity_mapping_details ($ref_occ, $ref_occ->{MAP}, $doc, $print_data, $output);
	    $output = "";
	}
    }
    foreach $tst_id (sort keys %{$tst_db->{entities}}) {
	$tst_entity = $tst_db->{entities}{$tst_id};
	next if $tst_entity->{MAP};
	my $print_data = ($print_all_data or $print_err_data);
	$output = "--------\n" unless $tst_entity->{MAP};
	$output .= sprintf (">>> tst entity $tst_id (%3.3s/%3.3s) -- NO MATCHING REF ENTITY\n",
			    $tst_entity->{TYPE}, $tst_entity->{CLASS});
	$output .= sprintf "      entity score:%9.5f out of  0.00000\n", $tst_entity->{FA_VALUE};
	foreach $doc (keys %{$tst_entity->{documents}}) {
	    next unless defined $eval_docs{$doc};
	    $tst_occ = $tst_entity->{documents}{$doc};
	    next if $tst_occ->{MAP};
	    print_entity_mapping_details (undef, $tst_occ, $doc, $print_data, $output);
	    $output = "";
	}
    }
}

#################################

sub print_entity_mapping_details {

    my ($ref_entity, $tst_entity, $doc, $print_data, $output) = @_;
    my ($ref_mentions, $tst_mentions, $ref_mention, $tst_mention, $ref_names, $tst_names, $ref_name, $tst_name);
    my ($error_type, $link_err, $text);
    my ($entity, $type, $mention, @mentions, $name, @names);

    $output .= "- in document $doc:\n";
    if ($ref_entity) {
	foreach $mention (@{$ref_entity->{mentions}}) {
	    push @mentions, {DATA=>$mention, TYPE=>"REF"};
	}
	foreach $name (@{$ref_entity->{names}}) {
	    push @names, {DATA=>$name, TYPE=>"REF"};
	}
    }
    if ($tst_entity) {
	foreach $mention (@{$tst_entity->{mentions}}) {
	    push @mentions, {DATA=>$mention, TYPE=>"TST"};
	}
	foreach $name (@{$tst_entity->{names}}) {
	    push @names, {DATA=>$name, TYPE=>"TST"};
	}
    }
    if ($ref_entity and $tst_entity) {
	foreach $mention (sort sort_on_locator @mentions) {
	    $type = $mention->{TYPE};
	    $mention = $mention->{DATA};
	    next if $type eq "TST" and $mention->{MAP};
	    if ($mention->{MAP}) {
		$ref_mention = $mention;
		$tst_mention = $mention->{MAP};
		$error_type = $ref_mention->{TYPE} eq $tst_mention->{TYPE} ? undef : "TYPE";
		$error_type .= defined $error_type ? "/ROLE" : "ROLE" if ($ref_mention->{ROLE} ne $tst_mention->{ROLE});
		$error_type .= defined $error_type ? "/STYLE" : "STYLE" if ($ref_mention->{STYLE} ne $tst_mention->{STYLE});
		if ($error_type) {
		    $print_data ||= $print_err_data;
		    $output .= ">>>   ref mention = \"" . (defined $ref_mention->{head}{text} ? $ref_mention->{head}{text} : "???") . "\"";
		    $output .= sprintf " (%3.3s/%3.3s/%3.3s) -- MENTION $error_type MISMATCH (%3.3s/%3.3s/%3.3s)\n",
			$ref_mention->{TYPE}, $ref_mention->{ROLE}, $ref_mention->{STYLE},
			$tst_mention->{TYPE}, $tst_mention->{ROLE}, $tst_mention->{STYLE};
		}
		else {
		    $output .= "      ref mention = \"" . (defined $ref_mention->{head}{text} ? $ref_mention->{head}{text} : "???") . "\"";
		    $output .= sprintf " (%3.3s/%3.3s/%3.3s)\n", $ref_mention->{TYPE}, $ref_mention->{ROLE}, $ref_mention->{STYLE};
		}
		if (extent_mismatch ($ref_mention->{extent}{locator}, $tst_mention->{extent}{locator}) > 1) {
		    $print_data ||= $print_err_data;
		    $text = defined $ref_mention->{extent}{text} ? $ref_mention->{extent}{text} : "???";
		    $output .= ">>>    ref mention extent = \"$text\"\n";
		    $text = defined $tst_mention->{extent}{text} ? $tst_mention->{extent}{text} : "???";
		    $output .= ">>>    tst mention extent = \"$text\" -- MENTION EXTENT MISMATCH\n";
		}
	    }
	    else {
		$print_data ||= $print_err_data;
		$output .= ">>>   ".(lc$type)." mention = \"" . (defined $mention->{head}{text} ? $mention->{head}{text} : "???") . "\"";
		$output .= sprintf " (%3.3s/%3.3s/%3.3s) -- NO MATCHING %s MENTION\n",
		$mention->{TYPE}, $mention->{ROLE}, $mention->{STYLE}, ($type eq "REF"?"TST":"REF");
	    }
	}
	foreach $name (sort sort_on_locator @names) {
	    $type = $name->{TYPE};
	    $name = $name->{DATA};
	    next if $type eq "TST" and $name->{MAP};
	    if ($name->{MAP}) {
		$ref_name = $name;
		$tst_name = $name->{MAP};
		if (extent_mismatch($ref_name->{locator}, $tst_name->{locator}) <= 1) {
		    $output .= "         ref name = \"" . (defined $ref_name->{text} ? $ref_name->{text} : "???") . "\"\n";
		}
		else {
		    $print_data ||= $print_err_data;
		    $text = defined $ref_name->{text} ? $ref_name->{text} : "???";
		    $output .= ">>>    ref name extent = \"$text\"\n";
		    $text = defined $tst_name->{text} ? $tst_name->{text} : "???";
		    $output .= ">>>    tst name extent = \"$text\" -- NAME EXTENT MISMATCH\n";
		}
	    }
	    else {
		$print_data ||= $print_err_data;
		$output .= ">>>      ".(lc$type)." name = \"" . (defined $name->{text} ? $name->{text} : "???") . "\"";
		$output .= " -- NO MATCHING ".($type eq "REF"?"TST":"REF")." NAME\n";
	    }
	}
    }
    else {
	$print_data ||= $print_err_data;
	foreach $mention (sort sort_on_locator @mentions) {
	    $type = $mention->{TYPE};
	    $mention = $mention->{DATA};
	    $output .= "      ".(lc$type)." mention = \"" . (defined $mention->{head}{text} ? $mention->{head}{text} : "???") . "\"";
	    $output .= sprintf " (%3.3s/%3.3s/%3.3s)\n", $mention->{TYPE}, $mention->{ROLE}, $mention->{STYLE};
	}
	foreach $name (sort sort_on_locator @names) {
	    $type = $name->{TYPE};
	    $name = $name->{DATA};
	    $output .= "      ".(lc$type)." name = \"" . (defined $name->{text} ? $name->{text} : "???") . "\"";
	    $output .= " -- NO MATCHING ".($type eq "REF"?"TST":"REF")." NAME\n";
	}
    }
    print $output if $print_data;
}

#################################

sub print_relation_mapping {

    my ($ref_db, $tst_db, $label) = @_;
    my ($ref_relation, $tst_relation, $ref_id, $tst_id);
    my ($print_data, $error_type, $pre_err, $post_err, $arg, $narg, $entity);
    my ($type_mismatch, $subtype_mismatch, $class_mismatch, $time_mismatch);
    my ($e_ref, $e_tst, $a_ref, $a_tst, $arg_err);

    print "\n======== $label ========\n\n";
    foreach $ref_id (sort keys %{$ref_db->{relations}}) {
	$print_data = $print_all_data;
	$ref_relation = $ref_db->{relations}{$ref_id};
	if ($tst_relation = $ref_relation->{MAP}) {
	    $tst_id = $tst_relation->{ID};
	    undef $error_type;
	    $type_mismatch = $ref_relation->{TYPE} ne $tst_relation->{TYPE};
	    $subtype_mismatch = $ref_relation->{SUBTYPE} ne $tst_relation->{SUBTYPE};
	    $class_mismatch = $ref_relation->{CLASS} ne $tst_relation->{CLASS};
	    $time_mismatch = num_relation_time_mismatches ($ref_relation->{times}, $tst_relation->{times}) > 0;
	    $error_type = "TYPE" if $type_mismatch;
	    $error_type .= defined $error_type ? "/SUBTYPE" : "SUBTYPE" if $subtype_mismatch;
	    $error_type .= defined $error_type ? "/CLASS" : "CLASS" if $class_mismatch;
	    $error_type .= defined $error_type ? "/TIME" : "TIME" if $time_mismatch;
	    $print_data ||= $print_err_data if $error_type;
	    next unless $print_data;

#print mapped relations
	    print "--------\n";
	    printf "      relation score:%9.5f out of%9.5f\n",
	        $mapped_relation_value{$ref_id}{$tst_id}, $ref_relation->{VALUE};
	    $pre_err = $error_type ? ">>>" : "   ";
	    $post_err = $error_type ? " -- $error_type MISMATCH\n" : "\n";
	    
	    print "$pre_err ref relation $ref_id ($ref_relation->{TYPE}/$ref_relation->{SUBTYPE}/$ref_relation->{CLASS})$post_err";
	    foreach $time (@{$ref_relation->{times}}) {
		$pre_err = num_relation_time_matches ([$time], $tst_relation->{times}) ? "   " : ">>>";
		printf "$pre_err   time info:%s\n", relation_time_info($time);
	    }
	    $narg=0;
	    foreach $arg (@{$ref_relation->{arguments}}) {
		$e_ref = $ref_db->{entities}{$arg->{ID}};
		$a_ref = $ref_relation->{arguments}[$narg];
		$arg_err = ((not $a_ref->{MAP}) ? "NO CORRESPONDING TST ARGUMENT" :
			    ((not $e_ref->{MAP}) ? "UNMAPPED ARGUMENT" :
			     (($e_ref->{MAP}{ID} ne $a_ref->{MAP}{ID}) ? "MISMAPPED ARGUMENT" : "")));
		print $arg_err ? ">>>   ":"      ", relation_arg_description (++$narg, $e_ref, $arg_err);
	    }
	    print_relation_mentions ($ref_relation->{documents});
	    
	    $pre_err = $error_type ? ">>>" : "   ";
	    print "$pre_err tst relation $tst_id ($tst_relation->{TYPE}/$tst_relation->{SUBTYPE}/$tst_relation->{CLASS})$post_err";
	    foreach $time (@{$tst_relation->{times}}) {
		$pre_err = num_relation_time_matches ([$time], $ref_relation->{times}) ? "   " : ">>>";
		printf "$pre_err   time info:%s\n", relation_time_info($time);
	    }
	    $narg=0;
	    foreach $arg (@{$tst_relation->{arguments}}) {
		$e_tst = $tst_db->{entities}{$arg->{ID}};
		$a_tst = $tst_relation->{arguments}[$narg];
		$arg_err = ((not $a_tst->{MAP}) ? "NO CORRESPONDING REF ARGUMENT" :
			    ((not $e_tst->{MAP}) ? "UNMAPPED ARGUMENT" :
			     (($e_tst->{MAP}{ID} ne $a_tst->{MAP}{ID}) ? "MISMAPPED ARGUMENT": "")));
		print $arg_err ? ">>>   ":"      ", relation_arg_description (++$narg, $e_tst, $arg_err);
	    }
	    print_relation_mentions ($tst_relation->{documents});
	}

#print unmapped reference relations
	else {
	    $print_data ||= $print_err_data;
	    {
		next unless $print_data;
		print "--------\n";
		printf "      relation score:  0.00000 out of%9.5f\n", $ref_relation->{VALUE};
		$pre_err = ">>>";
		$post_err = " -- NO MATCHING TST RELATION\n";
		print "$pre_err ref relation $ref_id ($ref_relation->{TYPE}/$ref_relation->{SUBTYPE}/$ref_relation->{CLASS})$post_err";
		foreach $time (@{$ref_relation->{times}}) {
		    printf "$      time info:%s\n", relation_time_info($time);
		}
		$narg=0;
		foreach $arg (@{$ref_relation->{arguments}}) {
		    $entity = $ref_db->{entities}{$arg->{ID}};
		    print "      ".relation_arg_description (++$narg, $entity);
		}
		print_relation_mentions ($ref_relation->{documents});
	    }
	}
    }

#print unmapped test relations
    $print_data ||= $print_err_data;
    return unless $print_data;
    foreach $tst_id (sort keys %{$tst_db->{relations}}) {
	$tst_relation = $tst_db->{relations}{$tst_id};
	next if $tst_relation->{MAP};
	print "--------\n";
	printf "      relation score:%9.5f out of  0.00000\n", -$tst_relation->{VALUE};
	$pre_err = ">>>";
	$post_err = " -- NO MATCHING REF RELATION\n";
	print "$pre_err tst relation $tst_id ($tst_relation->{TYPE}/$tst_relation->{SUBTYPE}/$tst_relation->{CLASS})$post_err";
	foreach $time (@{$tst_relation->{times}}) {
	    printf "      time info:%s\n", relation_time_info($time);
	}
	$narg=0;
	foreach $arg (@{$tst_relation->{arguments}}) {
	    $entity = $tst_db->{entities}{$arg->{ID}};
	    print "      ".relation_arg_description (++$narg, $entity);
	}
	print_relation_mentions ($tst_relation->{documents});
    }
}

#################################

sub num_relation_time_mismatches {
    
    my ($t1, $t2) = @_;
    my $num_mismatches=0;
    $num_mismatches += @{$t1} if defined $t1;
    $num_mismatches += @{$t2} if defined $t2;
    $num_mismatches -= 2*num_relation_time_matches($t1, $t2);
    return $num_mismatches;
}

#################################

sub num_relation_time_matches {

    my ($times1, $times2) = @_;

    return 0 unless $times1 and $times2;
    my @times1 = @{$times1};
    my @times2 = @{$times2};
    my $num_matches=0;
    
    foreach my $time1 (@times1) {
	foreach my $time2 (@times2) {
	    $num_matches++ if relation_time_match ($time1, $time2);
#	    my $match = 1;
#	    foreach my $type (@info_type) {
#		next if not $time1->{$type} and not @time2->{$type};
#		$match = 0 if not $time1->{$type} or not $time2->{$type} or $time1->{$type} ne $time2->{$type};
#	    }
#	    $num_matches += $match;
	}
    }
    return $num_matches;
}

#################################

sub relation_time_match {

    my ($t1, $t2) = @_;
    my @info_type = ("TYPE", "VAL", "MOD", "DIR");

    foreach my $type (@info_type) {
	next if not $t1->{$type} and not $t2->{$type};
	return "" if not $t1->{$type} or not $t2->{$type} or $t1->{$type} ne $t2->{$type};
    }
    return 1;
}

#################################

sub relation_time_info {

    my ($time) = @_;
    my $info = "";

    return "" unless $time;
    foreach my $type (@relation_time_attributes, "text") {
	my $value = $time->{$type} ? $time->{$type} : "";
	next unless $value;
	$info .= "," if $info;
	$info .= " $type=\"$value\"";
    }
    return $info;
}

#################################

sub relation_arg_description {

    my ($narg, $entity, $error) = @_;
    my $out = sprintf "arg %d (%3.3s/%3.3s): ID=%s", $narg, $entity->{TYPE}, $entity->{CLASS}, $entity->{ID};

    if (my $name=longest_entity_name($entity)) {
	$out .= sprintf ", name=\"%s\"", limit_string($max_string_length, $name);
    }
    elsif (my $head=longest_entity_mention_head($entity)) {
	$out .= sprintf ", head=\"%s\"", limit_string($max_string_length, $head);
    }

    $out .= $error ? sprintf " -- $error\n" : "\n";
    return $out;
}	

#################################

sub print_relation_mentions {

    my ($docs) = @_;
    my ($doc, @mentions, $mention, $time, @times, $arg, $narg);

    foreach $doc (sort keys %{$docs}) {
	print "      -- in document $doc\n";
	@mentions = @{$docs->{$doc}{mentions}};
	foreach $mention (@mentions) {
	    print "         mention ID=$mention->{ID}, lex_cond=$mention->{LC}";
	    @times = @{$mention->{times}};
	    printf ", time info:%s", relation_time_info($time) if ($time=pop @times);
	    foreach $time (@times) {
		printf " & %s", relation_time_info($time);
	    }
	    print "\n";
	    $narg=0;
	    foreach $arg (@{$mention->{arguments}}) {
		printf "           arg %d: ID=$arg->{MENTIONID}, text=\"%s\"\n", ++$narg,
		    (defined $arg->{extent}{text} ? $arg->{extent}{text} : "???");
	    }
	}
    }
}

#################################

sub compute_entity_values {

    my ($ref_id, $ref_entity, $tst_id, $tst_entity, $value);
    my ($doc, $doc_entity, $ref_occ, $tst_occ, %doc_refs, %doc_tsts);
    my $ref_entities = $ref_database{entities};
    my $tst_entities = $tst_database{entities};

#prepare to deweight FA tst mentions that correspond to ref mentions
    tag_tst_mentions_that_correspond_to_ref_mentions ($ref_entities,
						      $tst_entities)
	if $entity_mention_ref_fa_wgt != 1.0;

#group document-level ref entities by document
    foreach $ref_id (keys %$ref_entities) {
	$ref_entity = $ref_entities->{$ref_id};
	$ref_entity->{VALUE} = 0;
	foreach $doc (keys %{$ref_entity->{documents}}) {
	    $ref_occ = $ref_entity->{documents}{$doc};
	    push @{$doc_refs{$doc}}, $ref_occ;
	}
    }

#group document-level tst entities by document
    foreach $tst_id (keys %$tst_entities) {
	$tst_entity = $tst_entities->{$tst_id};
	$tst_entity->{FA_VALUE} = 0;
	foreach $doc (keys %{$tst_entity->{documents}}) {
	    $tst_occ = $tst_entity->{documents}{$doc};
	    push @{$doc_tsts{$doc}}, $tst_occ;
	}
    }

    undef %mapped_entity_document_value;
    undef %mapped_entity_value;
    foreach $doc (keys %doc_refs) {
	foreach $ref_occ (@{$doc_refs{$doc}}) {

#compute ref entity values
	    $ref_id = $ref_occ->{ID};
	    ($ref_occ->{VALUE}) = entity_document_value($ref_occ);
	    $ref_entities->{$ref_id}{VALUE} += $ref_occ->{VALUE};

#compute ref-tst mapped entity values
	    foreach $tst_occ (@{$doc_tsts{$doc}}) {
		($value) = entity_document_value ($ref_occ, $tst_occ);
		next unless defined $value;
		$tst_id = $tst_occ->{ID};
		$mapped_entity_document_value{$doc}{$ref_id}{$tst_id} = $value;
		$mapped_entity_value{$ref_id}{$tst_id} += $value;
	    }
	}
    }

#compute tst entity values
    foreach $doc (keys %doc_tsts) {
	foreach $tst_occ (@{$doc_tsts{$doc}}) {
	    $tst_id = $tst_occ->{ID};
	    ($tst_occ->{VALUE}) = entity_document_value($tst_occ);
	    $tst_entities->{$tst_id}{VALUE} += $tst_occ->{VALUE};
	    ($tst_occ->{FA_VALUE}) = entity_document_value(undef,$tst_occ);
	    $tst_entities->{$tst_id}{FA_VALUE} += $tst_occ->{FA_VALUE};
	}
    }
}

#################################

sub tag_tst_mentions_that_correspond_to_ref_mentions {

    my ($ref_entities, $tst_entities) = @_;

    my ($doc, %doc_entities);
    my ($ref_id, $ref_entity, $ref_occ, $ref_mention, $ref_locator);
    my ($tst_id, $tst_entity, $tst_occ, $tst_mention, $tst_locator);

    foreach $ref_id (keys %$ref_entities) {
	$ref_entity = $ref_entities->{$ref_id};
	foreach $doc (keys %{$ref_entity->{documents}}) {
	    $ref_occ = $ref_entity->{documents}{$doc};
	    push @{$doc_entities{$doc}{REF}}, $ref_occ;
	}
    }

    foreach $tst_id (keys %$tst_entities) {
	$tst_entity = $tst_entities->{$tst_id};
	foreach $doc (keys %{$tst_entity->{documents}}) {
	    $tst_occ = $tst_entity->{documents}{$doc};
	    push @{$doc_entities{$doc}{TST}}, $tst_occ;
	}
    }

    foreach $doc (keys %doc_entities) {
	foreach $ref_occ (@{$doc_entities{$doc}{REF}}) {
	    foreach $tst_occ (@{$doc_entities{$doc}{TST}}) {
		foreach $ref_mention (@{$ref_occ->{mentions}}) {
		    foreach $tst_mention (@{$tst_occ->{mentions}}) {
			next unless defined $ref_mention->{head} and ($ref_locator=$ref_mention->{head}{locator});
			next unless defined $tst_mention->{head} and ($tst_locator=$tst_mention->{head}{locator});
			next if span_overlap($ref_locator, $tst_locator) < $min_overlap;
			$tst_mention->{is_ref_mention} = 1;
		    }
		}
	    }
	}
    }
}

#################################

sub map_entities {

    my ($db, $ref_id, $tst_id, $ref_entity, $tst_entity);
    my (@ref_entities, @tst_entities, @ref_cohorts, @tst_cohorts);

#collect entities
    foreach $ref_id (keys %{$ref_database{entities}}) {
	$ref_entity = $ref_database{entities}{$ref_id};
	push @ref_entities, $ref_entity;
    }
    foreach $tst_id (keys %{$tst_database{entities}}) {
	$tst_entity = $tst_database{entities}{$tst_id};
	push @tst_entities, $tst_entity;
    }

#group entities into cohort sets and map each set independently
    foreach $ref_entity (@ref_entities) {
	next if exists $ref_entity->{cohort};
	tag_cohorts ($ref_entity, \@ref_entities, \@tst_entities, \%mapped_entity_value, 1);
	@ref_cohorts = collect_cohorts (\@ref_entities);
	@tst_cohorts = collect_cohorts (\@tst_entities);
	map_cohorts (\@ref_cohorts, \@tst_cohorts, \%mapped_entity_value, $entity_fa_wgt);

	foreach $ref_entity (@ref_cohorts) {
	    $tst_entity = $ref_entity->{MAP};
	    next unless $tst_entity;
	    map_document_level_entity_mentions ($ref_entity, $tst_entity);
	}
    }
}

#################################

sub collect_cohorts {

    my ($entities) = @_;
    my ($entity, @cohorts);

    foreach $entity (@$entities) {
	next if $entity->{mapped};
	next unless $entity->{cohort};
	push @cohorts, $entity;
	$entity->{mapped} = 1;
    }
    return @cohorts;
}

#################################

sub tag_cohorts {

    my ($ent, $ent_set, $map_set, $mapped_values, $ent_set_is_ref) = @_;

    $ent->{cohort} = 1;
    foreach my $ent_map (@$map_set) {
	next if exists $ent_map->{cohort};
	tag_cohorts ($ent_map, $map_set, $ent_set, $mapped_values, not $ent_set_is_ref)
	    if $ent_set_is_ref ? defined $mapped_values->{$ent->{ID}}{$ent_map->{ID}} :
		defined $mapped_values->{$ent_map->{ID}}{$ent->{ID}};
    }
}

#################################

sub map_cohorts {

    my ($ref_cohorts, $tst_cohorts, $mapped_values, $fa_wgt) = @_;

    my ($i, $j, $ref_id, $tst_id, %mapping_costs, $fa_value);

#compute mapping costs
    for ($i=0; $i<@$ref_cohorts; $i++) {
	$ref_id = $ref_cohorts->[$i]{ID};
	for ($j=0; $j<@$tst_cohorts; $j++) {
	    $tst_id = $tst_cohorts->[$j]{ID};
	    next unless defined $mapped_values->{$ref_id}{$tst_id};
	    $fa_value = $fa_wgt*$tst_cohorts->[$j]{VALUE};
	    $fa_value *= $entity_mention_ref_fa_wgt if $tst_cohorts->[$j]{is_ref_mention};
	    $mapping_costs{$i}{$j} -= $mapped_values->{$ref_id}{$tst_id} + $fa_value;
	}
    }
	    
    my $map = weighted_bipartite_graph_matching(\%mapping_costs) or die
	"\n\nFATAL ERROR:  Cohort mapping through BGM FAILED\n\n";

    foreach $i (keys %$map) {
	$j = $map->{$i};
	$ref_cohorts->[$i]{MAP} = $tst_cohorts->[$j];
	$tst_cohorts->[$j]{MAP} = $ref_cohorts->[$i];
    }
}

#################################

sub map_document_level_entity_mentions {

    my ($ref_entity, $tst_entity) = @_;
    my ($ref_id, $doc, $ref_occ, $tst_occ);

    foreach $doc (keys %{$ref_entity->{documents}}) {
	$ref_occ = $ref_entity->{documents}{$doc};
	$tst_occ = $tst_entity->{documents}{$doc};
	next unless $tst_occ;
	$ref_occ->{MAP} = $tst_occ;
	$tst_occ->{MAP} = $ref_occ;

#map mentions	    
	(my $value, my $map) = entity_document_value($ref_occ, $tst_occ);
	my $ref_mentions = $ref_occ->{mentions};
	my $tst_mentions = $tst_occ->{mentions};
	foreach my $i (keys %$map) {
	    my $j = $map->{$i};
	    $ref_mentions->[$i]{MAP} = $tst_mentions->[$j];
	    $tst_mentions->[$j]{MAP} = $ref_mentions->[$i];
	}
	
#map names
	my ($ref_names, $tst_names, $ref_name, $tst_name, $overlap, $max_overlap);
	$ref_names = $ref_occ->{names};
	$tst_names = $ref_occ->{MAP}{names};
	foreach $ref_name (@$ref_names) {
	    $max_overlap = $min_overlap;
	    foreach $tst_name (@$tst_names) {
		$overlap = span_overlap($ref_name->{locator}, $tst_name->{locator});
		if ($overlap > $max_overlap) {
		    $max_overlap = $overlap;
		    $ref_name->{MAP} = $tst_name;
		}
	    }
	    $tst_name = $ref_name->{MAP};
	    $tst_name->{MAP} = $ref_name if $tst_name;
	}
    }
}

#################################

sub compute_relation_values {

    my ($ref_id, $ref_relation, $tst_id, $tst_relation, $value);
    my ($doc, $doc_relation, $ref_occ, $tst_occ, %doc_relations, %mapped_value_computed);
    my $ref_relations = $ref_database{relations};
    my $tst_relations = $tst_database{relations};

#group ref relations by document
    foreach $ref_id (keys %$ref_relations) {
	$ref_relation = $ref_relations->{$ref_id};
	foreach $doc (keys %{$ref_relation->{documents}}) {
	    $doc_relations{$doc}{REF}{$ref_id} = $ref_relation;
	    ($ref_relation->{documents}{$doc}{VALUE}) = 
		relation_document_value ($ref_relation, undef, $doc);
	    $ref_relation->{VALUE} += $ref_relation->{documents}{$doc}{VALUE};
	}
    }

#group tst relations by document
    foreach $tst_id (keys %$tst_relations) {
	$tst_relation = $tst_relations->{$tst_id};
	foreach $doc (keys %{$tst_relation->{documents}}) {
	    $doc_relations{$doc}{TST}{$tst_id} = $tst_relation;
	    ($tst_relation->{documents}{$doc}{VALUE}) = 
		relation_document_value (undef, $tst_relation, $doc);
	    $tst_relation->{VALUE} += $tst_relation->{documents}{$doc}{VALUE};
	}
    }

#compute ref-tst mapped relation values
    undef %mapped_relation_value;
    foreach $doc (keys %doc_relations) {
	foreach $ref_id (keys %{$doc_relations{$doc}{REF}}) {
	    foreach $tst_id (keys %{$doc_relations{$doc}{TST}}) {
		next if $mapped_value_computed{$ref_id}{$tst_id};
		($mapped_relation_value{$ref_id}{$tst_id}) =
		    relation_document_value ($ref_relations->{$ref_id},
					     $tst_relations->{$tst_id});
		$mapped_value_computed{$ref_id}{$tst_id} = 1;
	    }
	}
    }
}

#################################

sub map_relations {

    my ($ref_id, $tst_id, $ref_relation, $tst_relation);
    my (@ref_relations, @tst_relations, @ref_cohorts, @tst_cohorts);

#collect relations
    foreach $ref_id (keys %{$ref_database{relations}}) {
	$ref_relation = $ref_database{relations}{$ref_id};
	push @ref_relations, $ref_relation;
    }
    foreach $tst_id (keys %{$tst_database{relations}}) {
	$tst_relation = $tst_database{relations}{$tst_id};
	push @tst_relations, $tst_relation;
    }

#group relations into cohort sets and map each set independently
    foreach $ref_relation (@ref_relations) {
	next if exists $ref_relation->{cohort};
	tag_cohorts ($ref_relation, \@ref_relations, \@tst_relations, \%mapped_relation_value, 1);
	@ref_cohorts = collect_cohorts (\@ref_relations);
	@tst_cohorts = collect_cohorts (\@tst_relations);
	map_cohorts (\@ref_cohorts, \@tst_cohorts, \%mapped_relation_value, $relation_fa_wgt);

	foreach $ref_relation (@ref_cohorts) {
	    $tst_relation = $ref_relation->{MAP};
	    next unless $tst_relation;
	    map_document_level_relation_mentions ($ref_relation, $tst_relation);
	    foreach my $doc (keys %{$ref_relation->{documents}}, keys %{$tst_relation->{documents}}) {
		next if defined $mapped_relation_document_value{$doc}{$ref_relation->{ID}}{$tst_relation->{ID}};
		($mapped_relation_document_value{$doc}{$ref_relation->{ID}}{$tst_relation->{ID}}) =
		    relation_document_value ($ref_relation, $tst_relation, $doc);
	    }
	}
    }
}

#################################

sub map_document_level_relation_mentions {

    my ($ref_relation, $tst_relation) = @_;

    my ($value, $arg_order, $narg, $ref_arg, $tst_arg, $doc, $ref_occ, $tst_occ);

    ($value, $arg_order) = relation_document_value ($ref_relation,
						    $tst_relation);

#map relation arguments
    for ($narg=0; $narg<@{$ref_relation->{arguments}}; $narg++) {
	$ref_arg = $ref_relation->{arguments}[$narg];
	$tst_arg = $tst_relation->{arguments}[$arg_order->[$narg]];
	$ref_arg->{MAP} = $tst_arg;
	$tst_arg->{MAP} = $ref_arg;
    }

#map document level info
    foreach $doc (keys %{$ref_relation->{documents}}) {
	$ref_occ = $ref_relation->{documents}{$doc};
	$tst_occ = $tst_relation->{documents}{$doc};
	next unless $tst_occ;
	$ref_occ->{MAP} = $tst_occ;
	$tst_occ->{MAP} = $ref_occ;
	for ($narg=0; $narg<@{$ref_relation->{arguments}}; $narg++) {
	    $ref_arg = $ref_occ->{arguments}[$narg];
	    $tst_arg = $tst_occ->{arguments}[$arg_order->[$narg]];
	    $ref_arg->{MAP} = $tst_arg;
	    $tst_arg->{MAP} = $ref_arg;
	}
    }
}

#################################

sub entity_value {

#N.B.  The entity value must be undef if a match is not possible

    my ($ref_entity, $tst_entity) = @_;
    my ($doc_value, $total_value, $entity_value, $ref_doc);

#entities must exist
    return undef unless $ref_entity;
    $tst_entity = $ref_entity unless $tst_entity;

#accumulate value over all documents
    foreach $ref_doc (keys %{$ref_entity->{documents}}) {
	($doc_value) = entity_document_value ($ref_entity->{documents}{$ref_doc},
					      $tst_entity->{documents}{$ref_doc});
	$total_value += $doc_value if defined $doc_value;
    }
    return undef unless defined $total_value;
    return $total_value;
}
      
#################################

sub entity_document_value {

    my ($ref_doc_entity, $tst_doc_entity) = @_;

    my $fa_entity = not $ref_doc_entity; #calculate FA score if ref is null
    $ref_doc_entity = $tst_doc_entity unless $ref_doc_entity;
    $tst_doc_entity = $ref_doc_entity unless $tst_doc_entity;
    return undef unless $ref_doc_entity;
    my $ref_mentions = $ref_doc_entity->{mentions};
    my $tst_mentions = $tst_doc_entity->{mentions};

    my ($mention_value, $fa_score, %mapping_costs);
    for (my $i=0; $i<@$ref_mentions; $i++) {
	for (my $j=0; $j<@$tst_mentions; $j++) {
	    $mention_value = entity_mention_score ($ref_mentions->[$i], $tst_mentions->[$j]);
	    next unless defined $mention_value;
	    $fa_score = $entity_mention_fa_wgt * $mention_type_wgt{$tst_mentions->[$j]{TYPE}};
	    $fa_score *= $entity_mention_ref_fa_wgt if $tst_mentions->[$j]{is_ref_mention};
	    $mapping_costs{$j}{$i} -= $mention_value + $fa_score;
	}
    }
    return undef unless %mapping_costs;
    
#find optimum mapping of ref mentions to tst mentions
    my ($map) = weighted_bipartite_graph_matching(\%mapping_costs) or die
	"\n\nFATAL ERROR:  Document level entity mention mapping through BGM FAILED\n\n";

    undef $map if $fa_entity; #calculate FA score if ref is null
    my ($doc_mentions_value, $mentions_map);
    for (my $j=0; $j<@$tst_mentions; $j++) {
	my $i = $map->{$j};
	if (defined $i) {
	    $doc_mentions_value -= $mapping_costs{$j}{$i};
	    $mentions_map->{$i} = $j;
	}
	$fa_score = $entity_mention_fa_wgt * $mention_type_wgt{$tst_mentions->[$j]{TYPE}};
	$fa_score *= $entity_mention_ref_fa_wgt if $tst_mentions->[$j]{is_ref_mention};
	$doc_mentions_value -= $fa_score;
    }
    return undef unless defined $doc_mentions_value;
    my $entity_value = min($entity_type_wgt{$ref_doc_entity->{TYPE}}*$entity_class_wgt{$ref_doc_entity->{CLASS}},
			   $entity_type_wgt{$tst_doc_entity->{TYPE}}*$entity_class_wgt{$tst_doc_entity->{CLASS}});
    $entity_value = max($entity_value,$epsilon);

#reduce value for errors in entity attributes
    $entity_value *= $entity_err_wgt{TYPE} if $ref_doc_entity->{TYPE} ne $tst_doc_entity->{TYPE};
    $entity_value *= $entity_err_wgt{SUBTYPE} if $ref_doc_entity->{SUBTYPE} ne $tst_doc_entity->{SUBTYPE};
    $entity_value *= $entity_err_wgt{CLASS} if $ref_doc_entity->{CLASS} ne $tst_doc_entity->{CLASS};
    return $entity_value*$doc_mentions_value, $mentions_map;
}
      
#################################

sub entity_mention_score {

#N.B.  The mention mapping score must be undef if tst doesn't match ref.

    my ($ref_mention, $tst_mention) = @_;

    my ($ref_locator, $tst_locator, $score);

    if (defined $ref_mention->{head} and ($ref_locator = $ref_mention->{head}{locator}) and
	defined $tst_mention->{head} and ($tst_locator = $tst_mention->{head}{locator})) {
	return undef if span_overlap($ref_locator, $tst_locator) < $min_overlap;
	$score = min($mention_type_wgt{$ref_mention->{TYPE}},$mention_type_wgt{$tst_mention->{TYPE}});
    }
    elsif (defined $ref_mention->{extent} and ($ref_locator = $ref_mention->{extent}{locator}) and
	   defined $tst_mention->{extent} and ($tst_locator = $tst_mention->{extent}{locator})) {
	return undef if span_overlap($ref_locator, $tst_locator) < $min_overlap;
	$score = $epsilon;
    }
    else {
	return undef;
    }

#reduce value for errors in mention attributes
    $score *= $entity_mention_err_wgt{TYPE} if $ref_mention->{TYPE} ne $tst_mention->{TYPE};
    $score *= $entity_mention_err_wgt{ROLE} if $ref_mention->{ROLE} ne $tst_mention->{ROLE};
    $score *= $entity_mention_err_wgt{STYLE} if $ref_mention->{STYLE} ne $tst_mention->{STYLE};
    return $score;
}
      
#################################

sub relation_document_value {

    my ($ref_relation, $tst_relation, $doc) = @_;
    my ($arg, @docs, $arg_order, $arg_score, $alt_score, @ref_args, @tst_args, @alt_args);
    my ($ref_db, $tst_db, $ref, $tst);

    ($tst_db, $tst) = $tst_relation ? (\%tst_database, $tst_relation) : (\%ref_database, $ref_relation);
    ($ref_db, $ref) = $ref_relation ? (\%ref_database, $ref_relation) : (\%tst_database, $tst_relation);

    foreach $arg (@{$ref->{arguments}}) {
	push @ref_args, $arg->{ID};
    }
    foreach $arg (@{$tst->{arguments}}) {
	push @tst_args, $arg->{ID};
    }
    @docs = defined $doc ? ($doc) : keys %{$ref->{documents}};
    $arg_score = relation_arguments_score ($ref_db, \@ref_args, $tst_db, \@tst_args, \@docs);

    if (not $ref_relation or not $tst_relation) {
	return undef unless defined $arg_score;
	$arg_order = [0,1];
    }
    elsif ($relation_symmetry{$ref->{TYPE}}{$ref->{SUBTYPE}}) {
	@alt_args = reverse @tst_args;
	$alt_score = relation_arguments_score ($ref_db, \@ref_args, $tst_db, \@alt_args, \@docs);
	if (defined $arg_score and ((not defined $alt_score) or
				    $arg_score >= $alt_score)) {
	    $arg_order = [0,1];
	}
	elsif (defined $alt_score) {
	    $arg_score = $alt_score;
	    $arg_order = [1,0];
	}
	else {
	    return undef;
	}
    }
    elsif (not defined $arg_score) {
	return undef;
    }
    else {
	$arg_order = [0,1];
    }

    my $relation_value = min($relation_type_wgt{$ref->{TYPE}},
			     $relation_type_wgt{$tst->{TYPE}});

#reduce value for errors in relation attributes
    $relation_value *= $relation_err_wgt{TYPE} if $ref->{TYPE} ne $tst->{TYPE};
    $relation_value *= $relation_err_wgt{SUBTYPE} if $ref->{SUBTYPE} ne $tst->{SUBTYPE};

    return $relation_value*$arg_score, $arg_order;
}

#################################

sub relation_arguments_score {

    my ($ref_db, $ref_args, $tst_db, $tst_args, $docs) = @_;

    my ($ref_id, $tst_id, $ref_entities, $tst_entities);
    
    $ref_entities = $ref_db->{entities};
    $tst_entities = $tst_db->{entities};
    my $self_score = $ref_db eq $tst_db;
    my $score;
    for (my $i=0; $i<@$ref_args; $i++) {
	$ref_id = $ref_args->[$i];
	$tst_id = $tst_args->[$i];
	my $min_value = $relation_argument_threshold * $ref_entities->{$ref_id}{VALUE};
	my $match_ok = $self_score ? 1 :
	    (defined $mapped_entity_value{$ref_id}{$tst_id} ? 
	     $mapped_entity_value{$ref_id}{$tst_id} > (1-$required_precision)*$min_value : undef);
	my $arg_score;
	foreach my $doc (@$docs) {
	    if ($self_score) {
 		$arg_score += $ref_entities->{$ref_id}{documents}{$doc}{VALUE} if
		    defined $ref_entities->{$ref_id}{documents}{$doc};
	    }
	    elsif ($match_ok and
		   defined $mapped_entity_document_value{$doc}{$ref_id}{$tst_id}) {
		$arg_score += $mapped_entity_document_value{$doc}{$ref_id}{$tst_id};
	    }
	}
#	return undef unless $arg_score;
	$score += $arg_score if $arg_score;
    }
    return $score;
}

#################################

sub span_overlap {

#This subroutine returns the minimum mutual overlap between two spans.

    my ($ref_locator, $tst_locator, $minmax) = @_;
    if ($ref_locator->{data_type} eq "text" and $tst_locator->{data_type} eq "text") {
	return text_span_overlap ($ref_locator, $tst_locator, $minmax);
    }
    elsif ($ref_locator->{data_type} eq "audio" and $tst_locator->{data_type} eq "audio") {
	return audio_span_overlap ($ref_locator, $tst_locator, $minmax);
    }
    elsif ($ref_locator->{data_type} eq "image" and $tst_locator->{data_type} eq "image") {
	return image_span_overlap ($ref_locator, $tst_locator, $minmax);
    }
    else {
	die "\n\nFATAL ERROR in span_overlap\n"
	    ."    unknown or nonexistent or incompatible ref/tst locator data types\n\n";
    }
}

#################################

sub text_span_overlap {

#This subroutine returns the minimum mutual overlap between two spans.

    my ($ref_locator, $tst_locator, $minmax) = @_;
    my ($ref_start, $ref_end, $tst_start, $tst_end, $overlap);
    
    $ref_start = $ref_locator->{start};
    $ref_end = $ref_locator->{end};
    $tst_start = $tst_locator->{start};
    $tst_end = $tst_locator->{end};

    $overlap = 1 + (min($ref_end,$tst_end) -
		    max($ref_start,$tst_start));

    $minmax = \&max unless defined $minmax;
    return $overlap <= 0 ? 0 : $overlap / &$minmax ($ref_end-$ref_start+1,
						    $tst_end-$tst_start+1);
}

#################################

sub audio_span_overlap {

#This subroutine returns the minimum mutual overlap between two spans.

    my ($ref_locator, $tst_locator, $minmax) = @_;
    my ($ref_start, $ref_end, $tst_start, $tst_end, $overlap);
    my ($ref_overlap, $tst_overlap);
    my $nominal_frame_period = 0.01;
    
    $ref_start = $ref_locator->{tstart};
    $ref_end = $ref_start + $ref_locator->{tdur};
    $tst_start = $tst_locator->{tstart};
    $tst_end = $tst_start + $tst_locator->{tdur};

    $overlap = $nominal_frame_period + (min($ref_end,$tst_end) -
					max($ref_start,$tst_start));

    $minmax = \&max unless defined $minmax;
    return $overlap <= 0 ? 0 : $overlap / &$minmax ($ref_end-$ref_start+$nominal_frame_period,
						    $tst_end-$tst_start+$nominal_frame_period);
}

#################################

sub image_span_overlap {

#This subroutine returns the minimum mutual overlap between two image spans.

    my ($ref_locator, $tst_locator, $minmax) = @_;
    my $ref_boxes = $ref_locator->{bblist};
    my $tst_boxes = $tst_locator->{bblist};
    my ($ref_box, $tst_box);
    my $nominal_pixel_size = 1; # = 1 cm???!!! no -- current scale is pixels rather than cm

    my $ref_area = 0;
    foreach $ref_box (@$ref_boxes) {
	$ref_area += ($ref_box->{width} + $nominal_pixel_size)
	    * ($ref_box->{height} + $nominal_pixel_size);
    }

    my $tst_area = 0;
    foreach $tst_box (@$tst_boxes) {
	$tst_area += ($tst_box->{width} + $nominal_pixel_size)
	    * ($tst_box->{height} + $nominal_pixel_size);
    }
    
    my $mutual_area = 0;
    foreach $ref_box (@$ref_boxes) {
	foreach $tst_box (@$tst_boxes) {
	    next if ($ref_box->{page} ne $tst_box->{page});
	    my $x1 = max($ref_box->{x_start},
			 $tst_box->{x_start});
	    my $x2 = min($ref_box->{x_start}+$ref_box->{width},
			 $tst_box->{x_start}+$tst_box->{width});
	    next if ($x1 > $x2);
	    my $y1 = max($ref_box->{y_start},
			 $tst_box->{y_start});
	    my $y2 = min($ref_box->{y_start}+$ref_box->{height},
			 $tst_box->{y_start}+$tst_box->{height});
	    next if ($y1 > $y2);
	    $mutual_area += (($x2 - $x1 + $nominal_pixel_size) *
			     ($y2 - $y1 + $nominal_pixel_size));
	}
    }

#return the minimum mutual overlap
    $minmax = \&max unless defined $minmax;
    return $mutual_area / &$minmax ($ref_area,$tst_area);
}

#################################

sub extent_mismatch {

#This subroutine returns the maximum mismatch in the
#extent of two locators

    my ($ref_locator, $tst_locator) = @_;
    if ($ref_locator->{data_type} eq "text" and $ref_locator->{data_type} eq "text") {
	return text_extent_mismatch ($ref_locator, $tst_locator);
    }
    elsif ($ref_locator->{data_type} eq "audio" and $ref_locator->{data_type} eq "audio") {
	return audio_extent_mismatch ($ref_locator, $tst_locator);
    }
    elsif ($ref_locator->{data_type} eq "image" and $ref_locator->{data_type} eq "image") {
	return image_extent_mismatch ($ref_locator, $tst_locator);
    }
    else {
	die "\n\nFATAL ERROR in extent_mismatch\n\n";
    }
}

#################################

sub text_extent_mismatch {

#This subroutine returns the maximum mismatch in the
#character extent of two text streams

    my ($ref_locator, $tst_locator) = @_;
    my $extent_mismatch = 0;

    $extent_mismatch =
	max(abs($ref_locator->{start} - $tst_locator->{start}),
	    abs($ref_locator->{end} - $tst_locator->{end}))/$max_diff_chars;
}

#################################

sub audio_extent_mismatch {

#This subroutine returns the maximum mismatch in the
#time extent of two audio signals

    my ($ref_locator, $tst_locator) = @_;
    my $extent_mismatch = 0;
    my ($ref_start, $ref_end, $tst_start, $tst_end);

    $ref_start = $ref_locator->{tstart};
    $tst_start = $tst_locator->{tstart};
    $ref_end = $ref_start + $ref_locator->{tdur};
    $tst_end = $tst_start + $tst_locator->{tdur};

    $extent_mismatch =
	max(abs($ref_start - $tst_start),
	    abs($ref_end - $tst_end))/$max_diff_time;
}

#################################

sub image_extent_mismatch {

#This subroutine returns the maximum mismatch in the
#spatial extent of two images.

    my ($ref_locator, $tst_locator) = @_;
    my $ref_boxes = $ref_locator->{bblist};
    my $tst_boxes = $tst_locator->{bblist};
    my ($ref_box, $tst_box);
    my ($x_mismatch, $y_mismatch);
    my $huge_mismatch = 9999999;
    my $extent_mismatch = 0;

    foreach $ref_box (@$ref_boxes) {
	$x_mismatch = $y_mismatch = $huge_mismatch;
	foreach $tst_box (@$tst_boxes) {
	    next if ($ref_box->{page} ne $tst_box->{page});
	    $x_mismatch = min($x_mismatch,
			      abs($ref_box->{x_start} - $tst_box->{x_start}),
			      abs($ref_box->{x_start}+$ref_box->{width} -
				  $tst_box->{x_start}+$tst_box->{width}));
	    $y_mismatch = min($y_mismatch,
			      abs($ref_box->{y_start} - $tst_box->{y_start}),
			      abs($ref_box->{y_start}+$ref_box->{height} -
				  $tst_box->{y_start}+$tst_box->{height}));
	}
	$extent_mismatch = max($extent_mismatch, $x_mismatch, $y_mismatch);
    }
    return $extent_mismatch/$max_diff_xy;
}

#################################

sub max {

    my ($max, $x);

    $max = shift @_;
    foreach $x (@_) {
	if ($x > $max) {$max = $x}
    }
    return $max;
}

#################################

sub min {

    my ($min, $x);

    $min = shift @_;
    foreach $x (@_) {
	if ($x < $min) {$min = $x}
    }
    return $min;
}

#################################

sub get_document_data {

    my ($db, $docs, $file) = @_;
    my ($line, $name);
    my ($tag, $data, $doc_data, $span);
    my ($src_file, $source, $doc_id, %src_doc, $ndocs);
    my (@entities, $entity, $db_entity, $attribute);
    my (@relations, $relation, $db_relation, $argument, $arg, @args);
    my (@events, $event, $db_event, $id, $role, $participant);
    my (@timex2s, $timex2, $db_timex2);

#read data from file
    open (FILE, $file) or die "\nUnable to open ACE data file '$input_file'", $usage;
    while ($line=<FILE>){
	$data .= $line;
    }
    close (FILE);

#get source file information
    while (($tag, $doc_data, $data) = extract_sgml_tag_and_span ("source_file", $data)) {
	$data_type = lc extract_sgml_tag_attribute ($name="TYPE", $tag) or die
	    "\n\nFATAL INPUT ERROR:  no source file tag attribute '$name' in '$input_file'\n\n";
	$data_type =~ /^(text|audio|image)$/ or die
	    "\n\nFATAL INPUT ERROR:  unknown document data type '$data_type' (in '$input_file')\n\n";
	$src_file = extract_sgml_tag_attribute ($name="URI", $tag) or die
	    "\n\nFATAL INPUT ERROR:  no source file tag attribute '$name' in '$input_file'\n\n";
	$source = extract_sgml_tag_attribute ($name="SOURCE", $tag) or die
	    "\n\nFATAL INPUT ERROR:  no source file tag attribute '$name' in '$input_file'\n\n";
	$source_type = $source if not defined $source_type;
	$source_types{$source} = 1;
	$source_type = "MIXED" if $source ne $source_type;
	
#get document data for all documents in the source file
	while (($tag, $span, $doc_data) = extract_sgml_tag_and_span ("document", $doc_data)) {
	    $doc_id = extract_sgml_tag_attribute ("DOCID", $tag) or die
		"\n\nFATAL INPUT ERROR:  no document ID found in tag '$tag' in file '$input_file'\n\n";
	    not defined $docs->{$doc_id} or die
		"\n\nFATAL INPUT ERROR:  document ID '$doc_id' in file '$input_file' is not unique\n\n";
	    $input_doc = $doc_id;
	    $docs->{$doc_id}{SOURCE} = $source;
	    $docs->{$doc_id}{FILE} = $src_file;
	    $docs->{$doc_id}{TYPE} = $data_type unless exists $docs->{$doc_id}{TYPE};
	    $data_type eq $docs->{$doc_id}{TYPE} or die
		"\n\nFATAL INPUT ERROR:  all data for a given document must be of the same type\n".
		    "        data of type '$docs->{$doc_id}{TYPE} was previously loaded for document '$doc_id'\n".
			"        but now file '$input_file' contains data of type '$data_type'\n\n";
	    $ndocs++;
	    
#load entity data into database
	    @entities = get_entity_data ($span);
	    foreach $entity (@entities) {
		$db->{entities}{$entity->{ID}} = {} unless defined $db->{entities}{$entity->{ID}};
		$db_entity = $db->{entities}{$entity->{ID}};
		$db_entity->{documents}{$doc_id} = {%$entity};
		$db_entity->{documents}{$doc_id}{SOURCE} = $source;
		$db_entity->{ORIGIN} = "CORPUS" unless defined $db_entity->{ORIGIN};
		foreach $attribute (@entity_attributes) {
		    next unless defined $entity->{$attribute};
		    $db_entity->{$attribute} = $entity->{$attribute} unless defined $db_entity->{$attribute};
		    $entity->{$attribute} eq $db_entity->{$attribute} or die
			"\n\nFATAL INPUT ERROR:  attribute value conflict for attribute '$attribute'"
			    ." for entity '$entity->{ID}' in document '$doc_id'\n"
				."    database value is '$db_entity->{$attribute}'\n"
				    ."    document value is '$entity->{$attribute}'\n\n";
		}
		promote_entity_mention_to_mention_entity ($db, $doc_id, $db_entity);
	    }

#load relation data into database
	    @relations = get_relation_data ($span);
	    foreach $relation (@relations) {
		$db->{relations}{$relation->{ID}} = {} unless defined $db->{relations}{$relation->{ID}};
		$db_relation = $db->{relations}{$relation->{ID}};
		$db_relation->{documents}{$doc_id} = {%$relation};
		$db_relation->{documents}{$doc_id}{SOURCE} = $source;
		$db_relation->{ORIGIN} = "CORPUS" unless defined $db_relation->{ORIGIN};
		foreach $attribute (@relation_attributes) {
		    next unless defined $relation->{$attribute};
		    $db_relation->{$attribute} = $relation->{$attribute} unless defined $db_relation->{$attribute};
		    $relation->{$attribute} eq $db_relation->{$attribute} or die
			"\n\nFATAL INPUT ERROR:  attribute value conflict for attribute '$attribute'"
			    ." for relation '$relation->{ID}' in document '$doc_id'\n"
				."    database value is '$db_relation->{$attribute}'\n"
				    ."    document value is '$relation->{$attribute}'\n\n";
		}
		foreach $time (@{$relation->{times}}) {
		    push @{$db_relation->{times}}, $time unless num_relation_time_matches ([$time], $db_relation->{times});
		}
		@args = defined $db_relation->{arguments} ? @{$db_relation->{arguments}} : ();
		delete $db_relation->{arguments};
		foreach $argument (@{$relation->{arguments}}) {
		    not @args or ($arg = shift @args) ne $argument or die
			"\n\nFATAL INPUT ERROR:  argument conflict for relation '$relation->{ID}' in document '$doc_id'\n"
			    ."  input arg is '$argument->{ID}', but preexisting arg is '$arg'\n\n";
		    push @{$db_relation->{arguments}}, $argument;
		}
		promote_relation_mention_to_mention_relation ($db, $doc_id, $db_relation);
	    }

#load event data into database
	    @events = get_event_data ($span);
	    foreach $event (@events) {
		$db->{events}{$event->{ID}} = {} unless defined $db->{events}{$event->{ID}};
		$db_event = $db->{events}{$event->{ID}};
		$db_event->{documents}{$doc_id} = {%$event};
		$db_event->{documents}{$doc_id}{SOURCE} = $source;
		$db_event->{ORIGIN} = "CORPUS" unless defined $db_event->{ORIGIN};
		foreach $attribute (@event_attributes) {
		    next unless defined $event->{$attribute};
		    $db_event->{$attribute} = $event->{$attribute} unless defined $db_event->{$attribute};
		    $event->{$attribute} eq $db_event->{$attribute} or die
			"\n\nFATAL INPUT ERROR:  attribute value conflict for attribute '$attribute'"
			    ." for event '$event->{ID}' in document '$doc_id'\n"
				."    database value is '$db_event->{$attribute}'\n"
				    ."    document value is '$event->{$attribute}'\n\n";
		}
		foreach $role (keys %{$event->{participants}}) {
		    foreach $id (keys %{$event->{participants}{$role}}) {
			$db_event->{participants}{$role}{$id} = $event->{participants}{$role}{$id};
		    }
		}
		defined $db_event->{participants} or die
		    "\n\nFATAL INPUT ERROR:  event $event->{ID} has no participants\n\n";
	    }

#load timex2 data into database
	    @timex2s = get_timex2_data ($span);
	    foreach $timex2 (@timex2s) {
		$db->{timex2s}{$timex2->{ID}} = {} unless defined $db->{timex2s}{$timex2->{ID}};
		$db_timex2 = $db->{timex2s}{$timex2->{ID}};
		$db_timex2->{ID} = $timex2->{ID};
		$db_timex2->{TYPE} = $timex2->{TYPE};
		$db_timex2->{documents}{$doc_id} = {%$timex2};
		$db_timex2->{documents}{$doc_id}{SOURCE} = $source;
		$db_timex2->{ORIGIN} = "CORPUS" unless defined $db_timex2->{ORIGIN};
		$db_timex2->{attributes} = my $db_attributes = {} unless defined $db_timex2->{attributes};
		my $doc_attributes = $timex2->{attributes};
		foreach $attribute (keys %$doc_attributes) {
		    $db_attributes->{$attribute} = $doc_attributes->{$attribute}
		        unless defined $db_attributes->{$attribute};
		    $doc_attributes->{$attribute} eq $db_attributes->{$attribute} or die
			"\n\nFATAL INPUT ERROR:  attribute value conflict for attribute '$attribute'"
			    ." for timex2 '$timex2->{ID}' in document '$doc_id'\n"
				."    database value is '$db_attributes->{$attribute}'\n"
				    ."    document value is '$doc_attributes->{$attribute}'\n\n";
		}
	    }
	}
    }
    $ndocs or warn
	"\n\nWARNING:  file '$input_file' contains no documents\n\n";
}

#################################

sub promote_entity_mention_to_mention_entity {

    my ($db, $doc, $entity) = @_;
    my ($mention, $name, @mentions, @names, $new_id);

    my $num;
    @mentions = @{$entity->{documents}{$doc}{mentions}};
    @names = @{$entity->{documents}{$doc}{names}};
    foreach $mention (@mentions) {
	$num++;
	$entity_serial_number++;
	$new_id = "$entity->{ID} mention $num SN$entity_serial_number";
	$db->{entities}{$entity->{ID}}{MENTION_ENTITY_IDS}{$mention->{ID}} = $new_id;
	$db->{mention_entities}{$new_id} = {%$entity};
	$db->{mention_entities}{$new_id}{ID} = $new_id;
	$db->{mention_entities}{$new_id}{documents} = {%{$entity->{documents}}};
	$db->{mention_entities}{$new_id}{documents} = {$doc => {%{$entity->{documents}{$doc}}}};
	$db->{mention_entities}{$new_id}{documents}{$doc}{ID} = $new_id;
	$db->{mention_entities}{$new_id}{documents}{$doc}{mentions} = [{%$mention}];
	my $max_overlap = $min_overlap;
	my $new_name;
	foreach $name (@names) {
	    next unless defined $mention->{head};
	    my $overlap = span_overlap ($name->{locator}, $mention->{head}{locator});
	    next if $overlap < $max_overlap;
	    $new_name = $name;
	    $max_overlap = $overlap;
	}
	if ($new_name) {
	    $mention->{TYPE} eq "NAME" or warn
		"\n\nWARNING:  NAME found for un-NAME mention $new_id (name = '$new_name->{text}')\n\n";
	}
	else {
	    $mention->{TYPE} ne "NAME" or $mention->{STYLE} eq "METONYMIC" or warn
		"\n\nWARNING:  no NAME found for NAME mention $new_id\n\n";
	    $new_name = $mention->{head} if
		$mention->{TYPE} eq "NAME" and $mention->{STYLE} eq "METONYMIC";
	}
	$db->{mention_entities}{$new_id}{documents}{$doc}{names} = $new_name ? [{%$new_name}] : undef;
    }
    $num or warn
	"\n\nWARNING:  no mentions found for entity '$entity->{ID}' in promote_entity_mention_to_mention_entity\n\n";
}

#################################

sub promote_relation_mention_to_mention_relation {

    my ($db, $doc, $relation) = @_;
    my @entityids;

    my $num;
    my @mentions = @{$relation->{documents}{$doc}{mentions}};
    foreach my $arg (@{$relation->{arguments}}) {
	$entityids[$arg->{ARGNUM}] = $arg->{ID};
    }
    foreach my $mention (@mentions) {
	$num++;
	$relation_serial_number++;
	my $new_id = "$relation->{ID} mention $num SN$relation_serial_number";
	my $mention_relation = $db->{mention_relations}{$new_id} = {%$relation};
	$mention_relation->{documents} = {$doc => {%{$relation->{documents}{$doc}}}};
	$mention_relation->{documents}{$doc}{ID} = $mention_relation->{ID} = $new_id;
	$mention_relation->{documents}{$doc}{mentions} = [$mention={%$mention}];
	my @args;
	foreach my $arg (@{$mention->{arguments}}) {
	    my $mention_entity_id = $db->{entities}{$entityids[$arg->{ARGNUM}]}{MENTION_ENTITY_IDS}{$arg->{MENTIONID}};
	    defined $mention_entity_id or die
		"\n\nFATAL ERROR:  no mention entity ID for arg mention '$arg->{MENTIONID}' of relation '$relation->{ID}'\n\n";
	    push @args, {ARGNUM => $arg->{ARGNUM}, ID => $mention_entity_id};
	}
	$mention_relation->{documents}{$doc}{arguments} = $mention_relation->{arguments} = [@args];
    }
    $num or warn
	"\n\nWARNING:  no mentions found for relation '$relation->{ID}' in promote_relation_mention_to_mention_relation\n\n";
}

#################################

sub get_event_data { #extract document-level information for all events in the document

    my ($data) = @_;
    my (@events, %event_ids);
    my ($tag, $span, $type, $modality, $class, $attribute, $mention);

    while (($tag, $span, $data) = extract_sgml_tag_and_span ("event", $data)) {
	my %event;
#get event ID
	($input_event) = extract_sgml_tag_attribute ("ID", $tag) or die
	    "\n\nFATAL INPUT ERROR:  no event ID found for an event in file '$input_file'\n".
		"    Event tag is '$tag'\nEvent data is '$span'\n";
	$input_event =~ s/^\s*|\s*$//g; #trim any white space from beginning/end of event ID
	not defined $event_ids{$input_event} or die
	    "\n\nFATAL ERROR:  multiple definitions of event '$input_event'\n".
		"              (every event ID must be unique)\n\n";
	$event_ids{$input_event} = 1;
	$event{ID} = $input_event;
#get event TYPE
	($type) = extract_sgml_tag_attribute ("TYPE", $tag) or die
	    "\n\nFATAL INPUT ERROR for event '$input_event' in document '$input_doc'".
		" in file '$input_file'\n    no event TYPE found.  Event tag is '$tag'\n\n";
	$event{TYPE} = $normalize_event_type{$type} or die
	    "\n\nFATAL INPUT ERROR for event '$input_event' in document '$input_doc'".
		" in file '$input_file'\n    unrecognized event type ($type).  Event tag is '$tag'\n\n";
#get event MODALITY
	($modality) = extract_sgml_tag_attribute ("MODALITY", $tag) or die
	    "\n\nFATAL INPUT ERROR for event '$input_event' in document '$input_doc'".
		" in file '$input_file'\n    no event MODALITY found.  Event tag is '$tag'\n\n";
	$event{MODALITY} = $normalize_event_modality{$modality} or die
	    "\n\nFATAL INPUT ERROR for event '$input_event' in document '$input_doc'".
		" in file '$input_file'\n    unrecognized event sub-type ($modality).  Event tag is '$tag'\n\n";

	$event{participants} = get_event_participants ($span);
	$event{mentions} = [get_event_mentions ($span)];

	promote_event_mention_participants (\%event);
	push @events, {%event};
    }
    return @events;
}

#################################

sub get_event_participants {

    my ($event_data) = @_;
    my ($tag, $span, $participant, %participants);

    my $remaining_data = $event_data;
    while ($participant = get_event_participant ($remaining_data)) {
	not defined $participants{$participant->{ROLE}}{$participant->{ID}} or die
	    "\n\nFATAL INPUT ERROR for event '$input_event' in document '$input_doc' in file '$input_file'\n".
		"    Multiple entries for participant '$participant->{ID} in ROLE $participant->{ROLE}'.\n".
		    "    Event data is '$event_data'\n\n";
	$participants{$participant->{ROLE}}{$participant->{ID}} = $participant;
	($tag, $span, $remaining_data) = extract_sgml_tag_and_span ("event_participant", $remaining_data);
    }
    %participants or die
	"\n\nFATAL INPUT ERROR for event '$input_event' in document '$input_doc' in file '$input_file'\n".
	    "    There must be at least one participant in the event, but there are none.\n".
		"    Event data is '$event_data'\n\n";
    return {%participants};
}

#################################

sub get_event_participant {

    my ($data) = @_;
    my ($tag, $span, %participant, $role);

    return undef unless ($tag, $span) = extract_sgml_tag_and_span ("event_participant", $data);

    $participant{ID} = extract_sgml_tag_attribute ("ENTITYID", $tag) or die
	"\n\nFATAL INPUT ERROR for event '$input_event' in document '$input_doc' in file '$input_file'\n".
	    "    No participant ENTITYID attribute.  Participant data is '$data'\n\n";
    $role = extract_sgml_tag_attribute ("ROLE", $tag) or die
	"\n\nFATAL INPUT ERROR for event '$input_event' in document '$input_doc' in file '$input_file'\n".
	    "    No ROLE attribute.  Participant data is '$data'\n\n";
    $participant{ROLE} = $normalize_event_participant_role{$role} or die
	"\n\nFATAL INPUT ERROR for event '$input_event' in document '$input_doc' in file '$input_file'\n".
	    "    Unrecognized ROLE attribute ($role).  Participant data is '$data'\n\n";
    return {%participant};
}

#################################

sub get_event_mentions {

    my ($data) = @_;
    my ($tag, $span, @mentions, $mention);
    
    while ($mention = get_event_mention ($data)) {
	push @mentions, $mention;
	($tag, $span, $data) = extract_sgml_tag_and_span ("event_mention", $data);
    }
    return @mentions;
}

#################################

sub get_event_mention {

    my ($data) = @_;
    my ($tag, $span, %mention, $type);
    
    return undef unless ($tag, $span) = extract_sgml_tag_and_span ("event_mention", $data);

    $mention{ID} = extract_sgml_tag_attribute ("ID", $tag) or die
	"\n\nFATAL INPUT ERROR for event '$input_event' in document '$input_doc' in file '$input_file'\n".
	    "    ID attribute is missing.  Event data is '$data'\n\n";
    $type = extract_sgml_tag_attribute ("TYPE", $tag) or die
	"\n\nFATAL INPUT ERROR for event '$input_event' in document '$input_doc' in file '$input_file'\n".
	    "    TYPE attribute is missing.  Event data is '$data'\n\n";
    $mention{TYPE} = $normalize_event_mention_type{$type} or die
	"\n\nFATAL INPUT ERROR for event '$input_event' in document '$input_doc' in file '$input_file'\n".
	    "    Unrecognized TYPE attribute ($type).  Participant data is '$data'\n\n";
    $mention{extent} = get_locator ("extent", $data);
    $mention{anchor} = get_locator ("anchor", $data);
    $mention{participants} = get_event_mention_participants ($span);
    return {%mention};
}

#################################

sub get_event_mention_participants {

    my ($mention_data) = @_;
    my ($tag, $span, $participant, %participants);

    my $remaining_data = $mention_data;
    while ($participant = get_event_mention_participant ($remaining_data)) {
	not defined $participants{$participant->{ROLE}}{$participant->{ID}} or die
	    "\n\nFATAL INPUT ERROR for event '$input_event' in document '$input_doc' in file '$input_file'\n".
		"    Multiple entries for participant '$participant->{ID}'.  Mention data is '$mention_data'\n\n";
	$participants{$participant->{ROLE}}{$participant->{ID}} = $participant;
	($tag, $span, $remaining_data) = extract_sgml_tag_and_span ("event_mention_participant", $remaining_data);
    }
    return {%participants};
}

#################################

sub get_event_mention_participant {

    my ($data) = @_;
    my ($tag, $span, %participant, $role, $source);

    return undef unless ($tag, $span) = extract_sgml_tag_and_span ("event_mention_participant", $data);

    $participant{ID} = extract_sgml_tag_attribute ("ENTITYID", $tag) or die
	"\n\nFATAL INPUT ERROR for event '$input_event' in document '$input_doc' in file '$input_file'\n".
	    "    No participant ENTITYID attribute.  Participant data is '$data'\n\n";
    $participant{MENTIONID} = extract_sgml_tag_attribute ("ENTITYMENTIONID", $tag) or die
	"\n\nFATAL INPUT ERROR for event '$input_event' in document '$input_doc' in file '$input_file'\n".
	    "    No participant ENTITYMENTIONID attribute.  Participant data is '$data'\n\n";
    $role = extract_sgml_tag_attribute ("ROLE", $tag) or die
	"\n\nFATAL INPUT ERROR for event '$input_event' in document '$input_doc' in file '$input_file'\n".
	    "    No ROLE attribute.  Participant data is '$data'\n\n";
    $participant{ROLE} = $normalize_event_participant_role{$role} or die
	"\n\nFATAL INPUT ERROR for event '$input_event' in document '$input_doc' in file '$input_file'\n".
	    "    Unrecognized ROLE attribute ($role).  Participant data is '$data'\n\n";
    $participant{extent} = get_locator ("extent", $span);
    return {%participant};
}

#################################

sub promote_event_mention_participants {

    my ($event) = @_;
    my ($mention, $role, $id);

    foreach $mention (@{$event->{mentions}}) {
	foreach $role (keys %{$mention->{participants}}) {
	    foreach $id (keys %{$mention->{participants}{$role}}) {
		$event->{participants}{$role}{$id} = $mention->{participants}{$role}{$id};
	    }
	}
    }
}

#################################

sub print_event_data {

    my ($type, $db) = @_;
    my ($event_id, $event, $role, $id);
    my ($doc_id, $doc, $doc_info);
    my ($attribute, $mention, $entity);

    print "\n======== $type events  ========\n\n";
    foreach $event_id (sort keys %{$db->{events}}) {
	$event = $db->{events}{$event_id};
	print "event ID=$event->{ID}";
	foreach $attribute (@event_attributes) {
	    next if $attribute eq "ID";
	    print ", $attribute=$event->{$attribute}" if defined $event->{$attribute};
	}
	print "\n";
	my @participant_info;
	foreach $role (sort keys %{$event->{participants}}) {
	    foreach $id (sort keys %{$event->{participants}{$role}}) {
		printf "  %s\n", event_participant_description ($db, $event->{participants}{$role}{$id});
	    }
	}
	print_event_mentions ($db, $event_id);
    }
}

#################################

sub event_participant_description {

    my ($db, $participant, $text) = @_;

    my $id = $participant->{ID};
    my $out = sprintf "%9s participant ID=%s", $participant->{ROLE}, $id;
    my $entity = $db->{entities}{$id};

    if (my $name=longest_entity_name($entity)) {
	$out .= sprintf ", name=\"%s\"", limit_string($max_string_length, $name);
    }
    elsif (my $head=longest_entity_mention_head($entity)) {
	$out .= sprintf ", head=\"%s\"", limit_string($max_string_length, $head);
    }

    $out .= limit_string ($max_string_length, $text) if $text;
    return $out;
}	

#################################

sub limit_string {

    my ($max_len, $string) = @_;

    my $len_str = length $string;
    return $string if $len_str <= $max_len;
    return substr ($string, 0, $max_len-3)."...";
}

#################################

sub print_event_mentions {

    my ($db, $event_id, $text) = @_;
    my ($docs, $doc, @mentions, $mention, $role, $id);

    $docs = $db->{events}{$event_id}{documents};
    foreach $doc (sort keys %{$docs}) {
	print "      -- in document $doc\n";
	@mentions = @{$docs->{$doc}{mentions}};
	foreach $mention (@mentions) {
	    printf "         mention ID=$mention->{ID}, TYPE=$mention->{TYPE}, anchor=%s, extent=%s\n",
	        limit_string ($max_string_length, $mention->{anchor}{text}),
		limit_string ($max_string_length, $mention->{extent}{text});
	    foreach $role (sort keys %{$mention->{participants}}) {
		foreach $id (sort keys %{$mention->{participants}{$role}}) {
		    printf "           %s\n", mention_participant_description ($db, $mention->{participants}{$role}{$id});
		}
	    }
	}
    }
}

#################################

sub mention_participant_description {

    my ($db, $participant, $text) = @_;

    my $out = sprintf "%9s participant mention ID=%s (%s), extent=%s",
        $participant->{ROLE}, $participant->{MENTIONID}, $participant->{ID},
        $participant->{extent}{text} ? limit_string ($max_string_length, $participant->{extent}{text}) : "???";
    $out .= sprintf ", text=\"$text\"" if $text;
    return $out;
}	

#################################

sub compute_event_values {

    my ($ref_id, $ref_event, $tst_id, $tst_event, $value);
    my ($doc, $doc_event, $ref_occ, $tst_occ, %doc_events, %mapped_value_computed);
    my $ref_events = $ref_database{events};
    my $tst_events = $tst_database{events};

#group ref events by document
    foreach $ref_id (keys %$ref_events) {
	$ref_event = $ref_events->{$ref_id};
	foreach $doc (keys %{$ref_event->{documents}}) {
	    $doc_events{$doc}{REF}{$ref_id} = $ref_event;
	    $ref_event->{documents}{$doc}{VALUE} = 
		mapped_event_document_value ($doc, \%ref_database, $ref_event);
	    $ref_event->{VALUE} += $ref_event->{documents}{$doc}{VALUE};
	}
    }

#group tst events by document
    foreach $tst_id (keys %$tst_events) {
	$tst_event = $tst_events->{$tst_id};
	foreach $doc (keys %{$tst_event->{documents}}) {
	    $doc_events{$doc}{TST}{$tst_id} = $tst_event;
	    $tst_event->{documents}{$doc}{VALUE} = 
		mapped_event_document_value ($doc, \%tst_database, $tst_event);
	    $tst_event->{VALUE} += $tst_event->{documents}{$doc}{VALUE};
	}
    }

#compute ref-tst mapped event values
    undef %mapped_event_value;
    foreach $doc (keys %doc_events) {
	foreach $ref_id (keys %{$doc_events{$doc}{REF}}) {
	    foreach $tst_id (keys %{$doc_events{$doc}{TST}}) {
		next if $mapped_value_computed{$ref_id}{$tst_id};
		($mapped_event_value{$ref_id}{$tst_id}) = event_value
		    (\%ref_database, $ref_events->{$ref_id},
		     \%tst_database, $tst_events->{$tst_id});
		$mapped_value_computed{$ref_id}{$tst_id} = 1;
	    }
	}
    }
}

#################################

sub map_events {

    my ($ref_id, $tst_id, $ref_event, $tst_event);
    my (@ref_events, @tst_events, @ref_cohorts, @tst_cohorts);

#collect events
    foreach $ref_id (keys %{$ref_database{events}}) {
	$ref_event = $ref_database{events}{$ref_id};
	push @ref_events, $ref_event;
    }
    foreach $tst_id (keys %{$tst_database{events}}) {
	$tst_event = $tst_database{events}{$tst_id};
	push @tst_events, $tst_event;
    }

#group events into cohort sets and map each set independently
    foreach $ref_event (@ref_events) {
	next if exists $ref_event->{cohort};
	tag_cohorts ($ref_event, \@ref_events, \@tst_events, \%mapped_event_value, 1);
	@ref_cohorts = collect_cohorts (\@ref_events);
	@tst_cohorts = collect_cohorts (\@tst_events);
	map_cohorts (\@ref_cohorts, \@tst_cohorts, \%mapped_event_value, $event_fa_wgt);

	foreach $ref_event (@ref_cohorts) {
	    $tst_event = $ref_event->{MAP};
	    next unless $tst_event;
	    map_document_level_event_mentions ($ref_event, $tst_event);
	    foreach my $doc (keys %{$ref_event->{documents}}, keys %{$tst_event->{documents}}) {
		next if defined $mapped_event_document_value{$doc}{$ref_event->{ID}}{$tst_event->{ID}};
		$mapped_event_document_value{$doc}{$ref_event->{ID}}{$tst_event->{ID}} =
		    mapped_event_document_value ($doc, \%ref_database, $ref_event, \%tst_database, $tst_event);
	    }
	}
    }
}

#################################

sub map_document_level_event_mentions {

    my ($ref_event, $tst_event) = @_;

    my ($score, $mapping, $map, $p_ref, $p_tst, $doc, $ref_occ, $tst_occ);

    ($score, $mapping) = event_participants_score (\%ref_database, $ref_event,
						   \%tst_database, $tst_event);

#map event participants
    foreach $map (@$mapping) {
	next unless defined $map->{REFID};
	$p_ref = $ref_event->{participants}{$map->{REFROLE}}{$map->{REFID}};
	$p_tst = $tst_event->{participants}{$map->{TSTROLE}}{$map->{TSTID}};
	$p_ref->{MAP} = $p_tst;
	$p_tst->{MAP} = $p_ref;
    }

#map document level info
    foreach $doc (keys %{$ref_event->{documents}}) {
	$ref_occ = $ref_event->{documents}{$doc};
	$tst_occ = $tst_event->{documents}{$doc};
	$ref_occ->{MAP} = $tst_occ;
	$tst_occ->{MAP} = $ref_occ;
	foreach $map (@$mapping) {
	    next unless defined $map->{REFID};
	    $p_ref = $ref_occ->{participants}{$map->{REFROLE}}{$map->{REFID}};
	    $p_tst = $tst_occ->{participants}{$map->{TSTROLE}}{$map->{TSTID}};
	    next unless $p_ref and $p_tst;
	    $p_ref->{MAP} = $p_tst;
	    $p_tst->{MAP} = $p_ref;
	}
    }
}

#################################

sub event_value {

#N.B.  The event mapping score must be undef if a match is not possible

    my ($ref_db, $ref_event, $tst_db, $tst_event) = @_;
    my ($role, $id, $participants_score, @ref_participants, @tst_participants);
    my ($event_value, $map, $ref_id, $tst_id, $ref_occ, $tst_occ);

#events must exist
    my $self_score = not defined $tst_event;
    $tst_db = $ref_db if $self_score;
    $tst_event = $ref_event if $self_score;
    return undef unless $ref_db and $ref_event;

#compute participants score
    ($participants_score, $map) = event_participants_score
	($ref_db, $ref_event, $tst_db, $tst_event);

    foreach my $doc (keys %{$ref_event->{documents}}) {
	my $ref_occ = $ref_event->{documents}{$doc};
	my $tst_occ = $tst_event->{documents}{$doc};
	next unless $tst_occ;
	my $value = mapped_event_document_value
	    ($doc, $ref_db, $ref_event, $tst_db, $tst_event, $map);
	$event_value += $value if $value;
    }

    return $event_value;
}

#################################

sub mapped_event_document_value {

    my ($doc, $ref_db, $ref_event, $tst_db, $tst_event, $map) = @_;
    my ($ref_mention, $tst_mention, $event_value, $participants_score);

    $participants_score = mapped_event_participants_document_score
	($doc, $ref_db, $ref_event, $tst_db, $tst_event, $map);

#give epsilon credit for overlapping anchors
    my $ref_occ = $ref_event->{documents}{$doc};
    $tst_event = $ref_event unless $tst_event;
    my $tst_occ = $tst_event->{documents}{$doc};
    foreach $ref_mention (@{$ref_occ->{mentions}}) {
	foreach $tst_mention (@{$tst_occ->{mentions}}) {
	    next unless span_overlap($ref_mention->{anchor}{locator},
				     $tst_mention->{anchor}{locator}) > $min_overlap;
	    $participants_score += $epsilon;
	}
    }

    return undef unless $participants_score;

    $event_value = min($event_type_wgt{$ref_event->{TYPE}} * $event_modality_wgt{$ref_event->{MODALITY}},
		       $event_type_wgt{$tst_event->{TYPE}} * $event_modality_wgt{$tst_event->{MODALITY}});

#reduce value for errors in event attributes
    $event_value *= $event_err_wgt{TYPE} if $ref_event->{TYPE} ne $tst_event->{TYPE};
    $event_value *= $event_err_wgt{MODALITY} if $ref_event->{MODALITY} ne $tst_event->{MODALITY};
    return $event_value*$participants_score;
}

#################################

sub event_participants_score {

    my ($ref_db, $ref_event, $tst_db, $tst_event) = @_;
    my ($role, $ref_id, $tst_id);
    my (@ref_participants, @tst_participants, $participant);
    my ($doc, $score, $fa_score);

    $tst_db = $ref_db unless defined $tst_db;
    $tst_event = $ref_event unless defined $tst_event;
    return undef unless defined $ref_db and defined $ref_event and
	defined $ref_event->{participants} and defined $tst_event->{participants};

#collect participants
    foreach $role (keys %{$ref_event->{participants}}) {
	foreach $ref_id (keys %{$ref_event->{participants}{$role}}) {
	    $participant = {ID => $ref_id, ROLE => $role};
	    push @ref_participants, $participant;
	}
    }
    return undef unless @ref_participants;
    foreach $role (keys %{$tst_event->{participants}}) {
	foreach $tst_id (keys %{$tst_event->{participants}{$role}}) {
	    $participant = {ID => $tst_id, ROLE => $role};
	    push @tst_participants, $participant;
	}
    }
    return undef unless @tst_participants;

#collect participant mapping scores
    my %mapping_costs;
    my $self_score = $ref_db eq $tst_db;
    for (my $i=0; $i<@ref_participants; $i++) {
	$ref_id = $ref_participants[$i]->{ID};
	my $min_value = $event_participant_threshold * $ref_db->{entities}{$ref_id}{VALUE};
	for (my $j=0; $j<@tst_participants; $j++) {
	    $tst_id = $tst_participants[$j]->{ID};
	    my $match_ok = $self_score ? 1 :
		(defined $mapped_entity_value{$ref_id}{$tst_id} ? 
		 $mapped_entity_value{$ref_id}{$tst_id} > (1-$required_precision)*$min_value : undef);
	    $score = $fa_score = 0;
	    foreach $doc (keys %{$tst_db->{events}{$tst_event->{ID}}{documents}}) {
		if ($self_score) {
		    $score += $ref_db->{entities}{$ref_id}{documents}{$doc}{VALUE} if $i == $j;
		}
		elsif ($match_ok and
		       defined $mapped_entity_document_value{$doc}{$ref_id}{$tst_id}) {
		    $score += $mapped_entity_document_value{$doc}{$ref_id}{$tst_id};
		}
		elsif (defined $tst_db->{entities}{$tst_id}{documents}{$doc} and
		       defined $tst_db->{entities}{$tst_id}{documents}{$doc}{VALUE}) {
		    $fa_score += $tst_db->{entities}{$tst_id}{documents}{$doc}{VALUE};
		}
	    }
	    $score *= $event_participant_role_err_wgt if
		$ref_participants[$i]->{ROLE} ne $tst_participants[$j]->{ROLE};
	    $fa_score *= $event_participant_fa_wgt;
	    $mapping_costs{$j}{$i} -= $score + $fa_score;
	}
    }
    return undef unless %mapping_costs;
    
#find optimum mapping
    my ($map) = weighted_bipartite_graph_matching(\%mapping_costs) or die
	"\n\nFATAL ERROR:  Event participant mapping through BGM FAILED\n\n";

    my $participants_map;
    for (my $j=0; $j<@tst_participants; $j++) {
	my $i = $map->{$j};
	push @$participants_map,
	    {REFROLE => (defined $i ? $ref_participants[$i]->{ROLE} : undef),
	     REFID   => (defined $i ? $ref_participants[$i]->{ID} : undef),
	     TSTROLE => $tst_participants[$j]->{ROLE},
	     TSTID   => $tst_participants[$j]->{ID}};
    }

    my $participants_score = 0;
    foreach $doc (keys %{$ref_db->{events}{$ref_event->{ID}}{documents}}) {
	$participants_score += mapped_event_participants_document_score
	    ($doc, $ref_db, $ref_event, $tst_db, $tst_event, $participants_map);
    }

    return ($participants_score, $participants_map);
}

#################################

sub mapped_event_participants_document_score {

    my ($doc, $ref_db, $ref_event, $tst_db, $tst_event, $map) = @_;

    my ($ref_role, $ref_id, $tst_role, $tst_id, $mapped_participant);
    my $total_score = 0;
    my $self_score = $tst_event ? $ref_event eq $tst_event : 1;

    $map = get_event_participants_map ($ref_event, $self_score) if not $map;

    foreach $mapped_participant (@$map) {
	$ref_role = $mapped_participant->{REFROLE};
	$ref_id = $mapped_participant->{REFID};
	$tst_role = $mapped_participant->{TSTROLE};
	$tst_id = $mapped_participant->{TSTID};
	if (defined $ref_id) {
	    my $score;
	    if ($self_score) {
		$score = $ref_db->{entities}{$ref_id}{documents}{$doc}{VALUE};
	    }
	    elsif (defined $mapped_entity_document_value{$doc}{$ref_id} and
		   defined $mapped_entity_document_value{$doc}{$ref_id}{$tst_id}) {
		$score = $mapped_entity_document_value{$doc}{$ref_id}{$tst_id};
		$score *= $event_participant_role_err_wgt if $ref_role ne $tst_role;
	    }
	    $total_score += $score if $score;
	}
	else {
	    next if not defined $tst_db->{entities}{$tst_id}{documents}{$doc};
	    my $fa_score -= $tst_db->{entities}{$tst_id}{documents}{$doc}{VALUE};
	    $fa_score *= $event_participant_fa_wgt;
	    $total_score += $fa_score;
	}
    }

    return $total_score;
}

#################################

sub get_event_participants_map {

    my ($ref_event, $self_score) = @_;

    my ($tst_event, $ref_role, $ref_id, $tst_role, $tst_id, $map, $ref_map);

    if ($self_score) {
	foreach $ref_role (keys %{$ref_event->{participants}}) {
	    foreach $ref_id (keys %{$ref_event->{participants}{$ref_role}}) {
		push @$map, {REFROLE => $ref_role,
			     REFID   => $ref_id,
			     TSTROLE => $ref_role,
			     TSTID   => $ref_id};
	    }
	}
	return $map;
    }

    $tst_event = $ref_event->{MAP};
    return undef unless $tst_event;
    foreach $tst_role (keys %{$tst_event->{participants}}) {
	foreach $tst_id (keys %{$tst_event->{participants}{$tst_role}}) {
	    $ref_map = $tst_event->{participants}{$tst_role}{$tst_id}{MAP};
	    $ref_role = defined $ref_map ? $ref_map->{ROLE} : undef;
	    $ref_id = defined $ref_map ? $ref_map->{ID} : undef;
	    push @$map, {REFROLE => $ref_role,
			 REFID   => $ref_id,
			 TSTROLE => $tst_role,
			 TSTID   => $tst_id};
	}
    }
    return $map;
}

#################################

sub num_event_participants {

    my ($event) = @_;
    my ($role, $id);

    return 0 unless defined $event and defined $event->{participants};

#count participants
    my $n = 0;
    foreach $role (keys %{$event->{participants}}) {
	foreach $id (keys %{$event->{participants}{$role}}) {
	    $n++;
	}
    }

    return $n;
}

#################################

sub print_event_mapping {

    my ($ref_db, $tst_db, $label) = @_;
    my ($ref_event, $tst_event, $ref_id, $tst_id, $role, $id);
    my ($print_data, $error_type, $pre_err, $post_err, $p_ref, $e_ref, $p_tst, $e_tst, $part_err);
    my ($type_mismatch, $modality_mismatch);

    print "\n======== $label ========\n\n";
    foreach $ref_id (sort keys %{$ref_db->{events}}) {
	$print_data = $print_all_data;
	$ref_event = $ref_db->{events}{$ref_id};
	if ($tst_event = $ref_event->{MAP}) {
	    $tst_id = $tst_event->{ID};
	    undef $error_type;
	    $type_mismatch = $ref_event->{TYPE} ne $tst_event->{TYPE};
	    $modality_mismatch = $ref_event->{MODALITY} ne $tst_event->{MODALITY};
	    $error_type = "TYPE" if $type_mismatch;
	    $error_type .= defined $error_type ? "/MODALITY" : "MODALITY" if $modality_mismatch;
	    $print_data ||= $print_err_data if $error_type;
	    next unless $print_data;

#print mapped events
	    print "--------\n";
	    printf "      event score:%9.5f out of%9.5f\n",
	        $mapped_event_value{$ref_id}{$tst_id}, $ref_event->{VALUE};
	    $pre_err = $error_type ? ">>>" : "   ";
	    $post_err = $error_type ? " -- $error_type MISMATCH" : "";
	    
	    print "$pre_err ref event $ref_id ($ref_event->{TYPE}/$ref_event->{MODALITY})$post_err\n";
	    foreach $role (sort keys %{$ref_event->{participants}}) {
		foreach $id (sort keys %{$ref_event->{participants}{$role}}) {
		    $e_ref = $ref_db->{entities}{$id};
		    $p_ref = $ref_event->{participants}{$role}{$id};
		    $p_tst = $p_ref->{MAP};
		    $part_err = (not $p_tst) ? ", NO CORRESPONDING TST PARTICIPANT" :
			(((not $e_ref->{MAP}) ? ", UNMAPPED PARTICIPANT" :
			  ($p_ref->{MAP}{ID} and $e_ref->{MAP}{ID} eq $p_ref->{MAP}{ID}) ? "" : ", MISMAPPED PARTICIPANT")
			 .(not $p_tst or $p_tst->{ROLE} eq $role ? "" : ", ROLE MISMATCH"));
		    printf "%s\n", ($part_err ? ">>>   ":"      ").event_participant_description ($ref_db, $p_ref, $part_err);
		}
	    }
	    print_event_mentions ($ref_db, $ref_id);
	    
	    print "$pre_err tst event $tst_id ($tst_event->{TYPE}/$tst_event->{MODALITY})$post_err\n";
	    foreach $role (sort keys %{$tst_event->{participants}}) {
		foreach $id (sort keys %{$tst_event->{participants}{$role}}) {
		    $e_tst = $tst_db->{entities}{$id};
		    $p_tst = $tst_event->{participants}{$role}{$id};
		    $p_ref = $p_tst->{MAP};
		    $part_err = (not $p_ref) ? ", NO CORRESPONDING REF PARTICIPANT" :
			(((not $e_tst->{MAP}{ID}) ? ", UNMAPPED PARTICIPANT" :
			  ($p_tst->{MAP}{ID} and $e_tst->{MAP}{ID} eq $p_tst->{MAP}{ID}) ? "" : ", MISMAPPED PARTICIPANT")
			 .(not $p_ref or $p_ref->{ROLE} eq $role ? "" : ", ROLE MISMATCH"));
		    printf "%s\n", ($part_err ? ">>>   ":"      ").event_participant_description ($tst_db, $p_tst, $part_err);
		}
	    }
	    print_event_mentions ($tst_db, $tst_id);
	}

#print unmapped reference events
	else {
	    $print_data ||= $print_err_data;
	    next unless $print_data;
	    print "--------\n";
	    printf "      event score:  0.00000 out of%9.5f\n", $ref_event->{VALUE};
	    $pre_err = ">>>";
	    $post_err = " -- NO MATCHING TST EVENT";
	    print "$pre_err ref event $ref_id ($ref_event->{TYPE}/$ref_event->{MODALITY})$post_err\n";
	    foreach $role (sort keys %{$ref_event->{participants}}) {
		foreach $id (sort keys %{$ref_event->{participants}{$role}}) {
		    printf "      %s\n", event_participant_description ($ref_db, $ref_event->{participants}{$role}{$id});
		}
	    }
	    print_event_mentions ($ref_db, $ref_id);
	}
    }

#print unmapped test events
    $print_data ||= $print_err_data;
    return unless $print_data;
    foreach $tst_id (sort keys %{$tst_db->{events}}) {
	$tst_event = $tst_db->{events}{$tst_id};
	next if $tst_event->{MAP};
	print "--------\n";
	printf "      event score:%9.5f out of  0.00000\n", -$tst_event->{VALUE};
	$pre_err = ">>>";
	$post_err = " -- NO MATCHING REF EVENT";
	print "$pre_err tst event $tst_id ($tst_event->{TYPE}/$tst_event->{MODALITY})$post_err\n";
	foreach $role (sort keys %{$tst_event->{participants}}) {
	    foreach $id (sort keys %{$tst_event->{participants}{$role}}) {
		printf "      %s\n", event_participant_description ($tst_db, $tst_event->{participants}{$role}{$id});
	    }
	}
	print_event_mentions ($tst_db, $tst_id);
    }
}

#################################

sub get_timex2_data { #extract document-level information for all timex2s in the document

    my ($data) = @_;
    my (@timex2s, %timex2_ids);
    my ($tag, $span, $type, $modality, $class, $attribute, $mention);

    while (($tag, $span, $data) = extract_sgml_tag_and_span ("timex", $data)) {
	my %timex2;
#get timex2 attributes and ID
	($timex2{attributes}) = extract_sgml_tag_attributes ($tag);
	defined $timex2{attributes}{ID} or die
	    "\n\nFATAL INPUT ERROR:  no timex2 ID found for a timex2 in file '$input_file'\n".
		"    Timex2 tag is '$tag'\nTimex2 data is '$span'\n";
	defined $timex2{attributes}{TYPE} or die
	    "\n\nFATAL INPUT ERROR:  no timex2 TYPE found for a timex2 in file '$input_file'\n".
		"    Timex2 tag is '$tag'\nTimex2 data is '$span'\n";
	$timex2{attributes}{TYPE} eq "TIMEX2" or die
	    "\n\nFATAL INPUT ERROR:  timex2 TYPE must be TIMEX2 but is '$timex2{attributes}{TYPE}' in file '$input_file'\n".
		"    Timex2 tag is '$tag'\nTimex2 data is '$span'\n";
	$input_timex2 = $timex2{attributes}{ID};
	$input_timex2 =~ s/^\s*|\s*$//g; #trim any white space from beginning/end of timex2 ID
	not defined $timex2_ids{$input_timex2} or die
	    "\n\nFATAL ERROR:  multiple definitions of timex2 '$input_timex2'\n".
		"              (every timex2 ID must be unique)\n\n";
	$timex2_ids{$input_timex2} = 1;
	$timex2{mentions} = [get_timex2_mentions (\%timex2, $span)];
#set default values
	$timex2{attributes}{SET} = "NO" unless defined $timex2{attributes}{SET};
	$timex2{attributes}{NON_SPECIFIC} = "NO" unless defined $timex2{attributes}{NON_SPECIFIC};
#move ID and TYPE out of attribute hash
	$timex2{ID} = $input_timex2;
	delete $timex2{attributes}{ID};
	$timex2{TYPE} = $timex2{attributes}{TYPE};
	delete $timex2{attributes}{TYPE};
	push @timex2s, {%timex2};
    }
    return @timex2s;
}

#################################

sub get_timex2_mentions { #extract mention information for all mentions of a timex2 in a document

    my ($timex2, $data) = @_;
    my (%mention, @mentions);
    my ($tag, $span, $attribute, $head);
    
    while (($tag, $span, $data) = extract_sgml_tag_and_span ("timex_mention", $data)) {
	undef %mention;
#get timex2 mention attributes	
	($mention{attributes}) = extract_sgml_tag_attributes ($tag);
	if (defined $mention{attributes}{ID}) {
	    $mention{ID} = $mention{attributes}{ID};
	    delete $mention{attributes}{ID};
	}
#promote timex2 mention attributes
	foreach $attribute (keys %{$mention{attributes}}) {
	    if (not $timex2->{attributes}{$attribute}) {
		$timex2->{attributes}{$attribute} = $mention{attributes}{$attribute};
	    }
	    elsif ($attribute eq "COMMENT") {
		$timex2->{attributes}{$attribute} .= "\n$mention{attributes}{$attribute}";
	    }
	    else {
		$timex2->{attributes}{$attribute} eq $mention{attributes}{$attribute} or die
		    "\n\nFATAL INPUT ERROR for timex2 '$input_timex2' in document '$input_doc'\n".
			"    inconsistent attribute values for attribute $attribute".
			    " ('$timex2->{attributes}{$attribute}' versus '$mention{attributes}{$attribute}')\n\n";
	    }
	}
#get timex2 mention head
	$mention{head} = get_locator ("head", $span);
#get timex2 mention extent
	$mention{extent} = get_locator ("extent", $span);
	defined $mention{head} or defined $mention{extent} or die
	    "\n\nFATAL INPUT ERROR for timex2 '$input_timex2' in document '$input_doc'\n".
		"    no mention head or extent found in data ($span)\n\n";
	push @mentions, {%mention};
    }
    @mentions > 0 or die
	"\n\nFATAL INPUT ERROR for timex2 '$input_timex2' in document '$input_doc'\n".
	    "    timex2 contains no mentions\n\n";
    return @mentions;
}

#################################

sub compute_timex2_values {

    my ($ref_id, $ref_timex2, $tst_id, $tst_timex2, $value);
    my ($doc, $doc_timex2, $ref_occ, $tst_occ, %doc_refs, %doc_tsts);
    my $ref_timex2s = $ref_database{timex2s};
    my $tst_timex2s = $tst_database{timex2s};

#group ref timex2s by document
    foreach $ref_id (keys %$ref_timex2s) {
	$ref_timex2 = $ref_timex2s->{$ref_id};
	$ref_timex2->{VALUE} = 0;
	foreach $doc (keys %{$ref_timex2->{documents}}) {
	    $ref_occ = $ref_timex2->{documents}{$doc};
	    push @{$doc_refs{$doc}}, $ref_occ;
	}
    }

#group tst timex2s by document
    foreach $tst_id (keys %$tst_timex2s) {
	$tst_timex2 = $tst_timex2s->{$tst_id};
	$tst_timex2->{FA_VALUE} = 0;
	foreach $doc (keys %{$tst_timex2->{documents}}) {
	    $tst_occ = $tst_timex2->{documents}{$doc};
	    push @{$doc_tsts{$doc}}, $tst_occ;
	}
    }

    undef %mapped_timex2_document_value;
    undef %mapped_timex2_value;
    foreach $doc (keys %eval_docs) {
	foreach $ref_occ (@{$doc_refs{$doc}}) {

#compute ref timex2 values
	    $ref_id = $ref_occ->{ID};
	    ($ref_occ->{VALUE}) = timex2_document_value ($ref_occ);
	    $ref_timex2s->{$ref_id}{VALUE} += $ref_occ->{VALUE};

#compute ref-tst mapped timex2 values
	    foreach $tst_occ (@{$doc_tsts{$doc}}) {
		$tst_id = $tst_occ->{ID};
		($value) = timex2_document_value ($ref_occ, $tst_occ);
		next unless $value;
		$mapped_timex2_document_value{$doc}{$ref_id}{$tst_id} = $value;
		$mapped_timex2_value{$ref_id}{$tst_id} += $value;
	    }
	}
    }

#compute tst timex2 values
    foreach $doc (keys %eval_docs) {
	foreach $tst_occ (@{$doc_tsts{$doc}}) {
	    $tst_id = $tst_occ->{ID};
	    ($tst_occ->{VALUE}) = timex2_document_value ($tst_occ);
	    $tst_timex2s->{$tst_id}{VALUE} += $tst_occ->{VALUE};
	    ($tst_occ->{FA_VALUE}) = timex2_document_value (undef, $tst_occ);
	    $tst_timex2s->{$tst_id}{FA_VALUE} += $tst_occ->{FA_VALUE};
	}
    }
}

#################################

sub timex2_value {

    my ($timex2) = @_;

    my $total_value;
    foreach my $doc (keys %{$timex2->{documents}}) {
	my $doc_timex2 = $timex2->{documents}{$doc};
	($doc_timex2->{VALUE}) = timex2_document_value($doc_timex2);
	$total_value += $doc_timex2->{VALUE};
    }
    return $total_value;
}


#################################

sub mapped_timex2_value {

    my ($ref_timex2, $tst_timex2) = @_;

    my ($total_value, $value);
    foreach my $doc (keys %{$ref_timex2->{documents}}) {
	next unless defined $tst_timex2->{documents}{$doc};
	($value) = timex2_document_value($ref_timex2->{documents}{$doc},
					$tst_timex2->{documents}{$doc});
	$total_value += $value if $value;
    }
    return $total_value;
}

#################################

sub timex2_document_value {

    my ($ref_doc_timex2, $tst_doc_timex2) = @_;

    my $fa_timex2 = not $ref_doc_timex2; #calculate FA score if ref is null
    $ref_doc_timex2 = $tst_doc_timex2 unless $ref_doc_timex2;
    $tst_doc_timex2 = $ref_doc_timex2 unless $tst_doc_timex2;
    return undef unless $ref_doc_timex2;
    my $ref_mentions = $ref_doc_timex2->{mentions};
    my $tst_mentions = $tst_doc_timex2->{mentions};

    my ($mention_value, %mapping_costs);
    for (my $i=0; $i<@$ref_mentions; $i++) {
	for (my $j=0; $j<@$tst_mentions; $j++) {
	    $mention_value = timex2_mention_score ($ref_mentions->[$i], $tst_mentions->[$j]);
	    next unless defined $mention_value;
	    $mapping_costs{$j}{$i} -= $mention_value + $timex2_fa_wgt;
	}
    }
    return undef unless %mapping_costs;
    
#find optimum mapping of ref mentions to tst mentions
    my ($map) = weighted_bipartite_graph_matching(\%mapping_costs) or die
	"\n\nFATAL ERROR:  Document level timex2 mention mapping through BGM FAILED\n\n";

    undef $map if $fa_timex2; #calculate FA score if ref is null
    my ($doc_mentions_value, $mentions_map);
    for (my $j=0; $j<@$tst_mentions; $j++) {
	my $i = $map->{$j};
	if (defined $i) {
	    $doc_mentions_value -= $mapping_costs{$j}{$i};
	    $mentions_map->{$i} = $j;
	}
	$doc_mentions_value -= $timex2_fa_wgt;
    }
    return undef unless defined $doc_mentions_value;

#compute value
    my $timex2_value = $timex2_detection_wgt;
    foreach my $attribute (keys %timex2_attribute_wgt) {
	my $ref_att = $ref_doc_timex2->{attributes}{$attribute};
	my $tst_att = $tst_doc_timex2->{attributes}{$attribute};
	$timex2_value += $timex2_attribute_wgt{$attribute} *
	    ($ref_att eq $tst_att ? 1 : $epsilon)
		if (defined $ref_att and defined $tst_att);
    }
    $timex2_value = max($timex2_value,$epsilon);
    return $timex2_value*$doc_mentions_value, $mentions_map;
}
      
#################################

sub timex2_mention_score {

#N.B.  The mention mapping score must be undef if tst doesn't match ref.

    my ($ref_mention, $tst_mention) = @_;
    my ($ref_locator, $tst_locator, $overlap);

    $ref_locator = $ref_mention->{head}{locator};
    $tst_locator = $tst_mention->{head}{locator};
    return (span_overlap($ref_locator, $tst_locator) < $min_overlap ? undef : 1)
	if $ref_locator and $tst_locator;
    
    $ref_locator = $ref_mention->{extent}{locator};
    $tst_locator = $tst_mention->{extent}{locator};
    return (span_overlap($ref_locator, $tst_locator, \&min) < $min_overlap ? undef : 1)
	if $ref_locator and $tst_locator;

    return undef;
}
      
#################################

sub map_timex2s {

    my ($ref_id, $tst_id, $ref_timex2, $tst_timex2);
    my (@ref_timex2s, @tst_timex2s, @ref_cohorts, @tst_cohorts);

#collect timex2s
    foreach $ref_id (keys %{$ref_database{timex2s}}) {
	$ref_timex2 = $ref_database{timex2s}{$ref_id};
	push @ref_timex2s, $ref_timex2;
    }
    foreach $tst_id (keys %{$tst_database{timex2s}}) {
	$tst_timex2 = $tst_database{timex2s}{$tst_id};
	push @tst_timex2s, $tst_timex2;
    }

#group timex2s into cohort sets and map each set independently
    foreach $ref_timex2 (@ref_timex2s) {
	next if exists $ref_timex2->{cohort};
	tag_cohorts ($ref_timex2, \@ref_timex2s, \@tst_timex2s, \%mapped_timex2_value, 1);
	@ref_cohorts = collect_cohorts (\@ref_timex2s);
	@tst_cohorts = collect_cohorts (\@tst_timex2s);
	map_cohorts (\@ref_cohorts, \@tst_cohorts, \%mapped_timex2_value, $timex2_fa_wgt);

	foreach $ref_timex2 (@ref_cohorts) {
	    $tst_timex2 = $ref_timex2->{MAP};
	    next unless $tst_timex2;
	    map_document_level_timex2_mentions ($ref_timex2, $tst_timex2);
	}
    }
}

#################################

sub map_document_level_timex2_mentions {

    my ($ref_timex2, $tst_timex2) = @_;

    foreach my $doc (keys %{$ref_timex2->{documents}}) {
	my $ref_occ = $ref_timex2->{documents}{$doc};
	my $tst_occ = $tst_timex2->{documents}{$doc};
	next unless $tst_occ;
	$ref_occ->{MAP} = $tst_occ;
	$tst_occ->{MAP} = $ref_occ;

#map mentions	    
	(my $value, my $map) = timex2_document_value($ref_occ, $tst_occ);
	my $ref_mentions = $ref_occ->{mentions};
	my $tst_mentions = $tst_occ->{mentions};
	foreach my $i (keys %$map) {
	    my $j = $map->{$i};
	    $ref_mentions->[$i]{MAP} = $tst_mentions->[$j];
	    $tst_mentions->[$j]{MAP} = $ref_mentions->[$i];
	}
    }
}

#################################

sub print_timex2_data {

    my ($type, $db) = @_;
    my ($timex2_id);

    print "\n======== $type timexs ========\n\n";
    foreach $timex2_id (sort keys %{$db->{timex2s}}) {
	print_timex2 ($db->{timex2s}{$timex2_id});
    }
}

#################################

sub print_timex2 {

    my ($timex2) = @_;

    my $total_value;
    printf "timex2 ID=$timex2->{ID}, VALUE=%.5f, TYPE=$timex2->{TYPE}", timex2_value($timex2);
    foreach my $attribute (sort keys %{$timex2->{attributes}}) {
	print ", $attribute=$timex2->{attributes}{$attribute}" if defined $timex2->{attributes}{$attribute};
    }
    print "\n";
    foreach my $doc (sort keys %{$timex2->{documents}}) {
	my $doc_info = $timex2->{documents}{$doc};
	print "    -- in document $doc\n";
	foreach my $mention (sort sort_on_locator @{$doc_info->{mentions}}) {
	    printf "      mention extent=\"%s\"\n", defined $mention->{extent}{text} ? $mention->{extent}{text} : "???";
	}
    }
}

#################################

sub print_timex2_mapping {

    my ($ref_db, $tst_db, $label) = @_;
    my ($ref_timex2, $tst_timex2, $ref_id, $tst_id, $doc, $ref_occ, $tst_occ, $attribute, $output);

    print "\n======== $label ========\n\n";
    foreach $ref_id (sort keys %{$ref_db->{timex2s}}) {
	my $print_data = $print_all_data;
	$output = "--------\n";
	$ref_timex2 = $ref_db->{timex2s}{$ref_id};
	if ($tst_timex2 = $ref_timex2->{MAP}) {
	    $tst_id = $tst_timex2->{ID};
	    my $attribute_error;
	    foreach $attribute (sort keys %timex2_attribute_wgt) {
		next if (not defined $ref_timex2->{attributes}{$attribute} and
			 not defined $tst_timex2->{attributes}{$attribute});
		next if (defined $ref_timex2->{attributes}{$attribute} and
			 defined $tst_timex2->{attributes}{$attribute} and
			 $ref_timex2->{attributes}{$attribute} eq $tst_timex2->{attributes}{$attribute});
		$attribute_error .= "/" if $attribute_error;
		$attribute_error .= $attribute;
	    }
	    $print_data ||= $print_err_data if $attribute_error;
	    $output .= ($attribute_error ? ">>> " : "    ")."ref timex2 $ref_id";
	    foreach $attribute (sort keys %timex2_attribute_wgt) {
		next if not defined $ref_timex2->{attributes}{$attribute};
		$output .= ", $attribute=$ref_timex2->{attributes}{$attribute}";
	    }
	    $output .= "\n";

	    $output .= ($attribute_error ? ">>> " : "    ")."tst timex2 $tst_id";
	    foreach $attribute (sort keys %timex2_attribute_wgt) {
		next if not defined $tst_timex2->{attributes}{$attribute};
		$output .= ", $attribute=$tst_timex2->{attributes}{$attribute}";
	    }
	    $output .= $attribute_error ? " -- ATTRIBUTE ERROR ($attribute_error)\n" : "\n";

	    $output .= sprintf ("      timex2 score:%9.5f out of%9.5f\n",
				$mapped_timex2_value{$ref_id}{$tst_id}, $ref_timex2->{VALUE});
	}
	else {
	    $print_data ||= $print_err_data;
	    $output .= ">>> ref timex2 $ref_id -- NO MATCHING TST TIMEX2\n";
	    $output .= sprintf ("      timex2 score:  0.00000 out of%9.5f\n", $ref_timex2->{VALUE});
	}
	foreach $doc (keys %{$ref_timex2->{documents}}) {
	    next unless defined $eval_docs{$doc};
	    $ref_occ = $ref_timex2->{documents}{$doc};
	    print_timex2_mapping_details ($ref_occ, $ref_occ->{MAP}, $doc, $print_data, $output);
	    $output = "";
	}
    }
    foreach $tst_id (sort keys %{$tst_db->{timex2s}}) {
	$tst_timex2 = $tst_db->{timex2s}{$tst_id};
	next if $tst_timex2->{MAP};
	my $print_data = ($print_all_data or $print_err_data);
	$output = "--------\n";
	$output .= ">>> tst timex2 $tst_id";
	foreach $attribute (sort keys %timex2_attribute_wgt) {
	    next if not defined $tst_timex2->{attributes}{$attribute};
	    $output .= ", $attribute=$tst_timex2->{attributes}{$attribute}";
	}
	$output .= " -- NO MATCHING REF TIMEX2\n";
	$output .= sprintf ("      timex2 score:%9.5f out of  0.00000\n", $tst_timex2->{FA_VALUE});
	foreach $doc (keys %{$tst_timex2->{documents}}) {
	    next unless defined $eval_docs{$doc};
	    $tst_occ = $tst_timex2->{documents}{$doc};
	    print_timex2_mapping_details (undef, $tst_occ, $doc, $print_all_data, $output);
	    $output = "";
	}
    }
}

#################################

sub print_timex2_mapping_details {

    my ($ref_timex2, $tst_timex2, $doc, $print_data, $output) = @_;
    my ($type, $ref_mention, $tst_mention, $mention, @mentions);

    $output .= "- in document $doc:\n";
    if ($ref_timex2) {
	foreach $mention (@{$ref_timex2->{mentions}}) {
	    push @mentions, {DATA=>$mention, TYPE=>"REF"};
	}
    }
    if ($tst_timex2) {
	foreach $mention (@{$tst_timex2->{mentions}}) {
	    push @mentions, {DATA=>$mention, TYPE=>"TST"};
	}
    }
    if ($ref_timex2 and $tst_timex2) {
	foreach $mention (sort sort_on_locator @mentions) {
	    $type = $mention->{TYPE};
	    $mention = $mention->{DATA};
	    next if $type eq "TST" and $mention->{MAP};
	    if ($mention->{MAP}) {
		$ref_mention = $mention;
		$tst_mention = $mention->{MAP};
		my $extent_error = 
		    extent_mismatch ($ref_mention->{extent}{locator}, $tst_mention->{extent}{locator}) > $epsilon;
		$print_data ||= $print_err_data if $extent_error;
		if (not $extent_error and
		    defined $ref_mention->{extent}{text} and
		    defined $tst_mention->{extent}{text} and
		    $ref_mention->{extent}{text} eq $tst_mention->{extent}{text}) {
		    $output .= "          mention = \"" . $ref_mention->{extent}{text} . "\"\n";
		}
		else {
		    $output .= $extent_error ? ">>>   " : "      ";
		    $output .= "ref mention = \"" . (defined $ref_mention->{extent}{text} ? $ref_mention->{extent}{text} : "???") . "\"";
		    $output .= "\n";
		    $output .= $extent_error ? ">>>   " : "      ";
		    $output .= "tst mention = \"" . (defined $tst_mention->{extent}{text} ? $tst_mention->{extent}{text} : "???") . "\"";
		    $output .= $extent_error ? " -- EXTENT ERROR\n" : "\n";
		}
	    }
	    else {
		$print_data ||= $print_err_data;
		$output .= ">>>   ".(lc$type)." mention = \"" . (defined $mention->{extent}{text} ? $mention->{extent}{text} : "???") . "\"";
		$output .= " -- NO MATCHING %s MENTION\n",
	    }
	}
    }
    else {
	$print_data ||= $print_err_data;
	foreach $mention (sort sort_on_locator @mentions) {
	    $type = $mention->{TYPE};
	    $mention = $mention->{DATA};
	    $output .= "      ".(lc$type)." mention = \"" . (defined $mention->{extent}{text} ? $mention->{extent}{text} : "???") . "\"";
	    $output .= "\n";
	}
    }
    print $output if $print_data;
}

#################################

sub get_entity_data { #extract document-level information for all entities in the document

    my ($span) = @_;
    my (@entities, %entity, %entity_ids);
    my ($tag, $data, $type, $class, $nentities);

    while (($tag, $data, $span) = extract_sgml_tag_and_span ("entity", $span)) {
	undef %entity;
#get entity ID
	$input_entity = extract_sgml_tag_attribute ("ID", $tag) or die
	    "\n\nFATAL INPUT ERROR:  no entity ID found for an entity in file '$input_file'\n".
		"    entity tag is '$tag'\n    entity data is '$data'\n\n";
	$input_entity =~ s/^\s*|\s*$//g; #trim any white space from beginning/end of entity ID
	not defined $entity_ids{$input_entity} or die
	    "\n\nFATAL ERROR:  multiple definitions of entity '$input_entity' in document '$input_doc'\n".
		"    (entities may be defined only once per document)\n\n";
	$entity_ids{$input_entity} = 1;
	$entity{ID} = $input_entity;
#get entity TYPE
	$type = extract_sgml_tag_attribute ("TYPE", $tag) or die
	    "\n\nFATAL INPUT ERROR:  no entity TYPE found for entity '$input_entity'\n".
		"    entity tag is '$tag'\n    entity data is '$data'\n\n";
	$entity{TYPE} = normalize_entity_type($type) or die
	    "\n\nFATAL INPUT ERROR:  unrecognized entity TYPE ($type) for entity '$input_entity'\n".
		"    entity tag is '$tag'\n    entity data is '$data'\n\n";
#get entity SUBTYPE
	$entity{SUBTYPE} = extract_sgml_tag_attribute ("SUBTYPE", $tag);
	$entity{SUBTYPE} = "<NONE>" unless defined $entity{SUBTYPE};
	$entity_subtypes{$entity{TYPE}}{$entity{SUBTYPE}} or die	
	    "\n\nFATAL INPUT ERROR:  illegal entity SUBTYPE ($entity{SUBTYPE}) for entity type '$entity{TYPE}' for entity '$input_entity'\n".
		"    entity tag is '$tag'\n    entity data is '$data'\n\n";
#get entity CLASS
	$class = extract_sgml_tag_attribute ("CLASS", $tag) or die
	    "\n\nFATAL INPUT ERROR:  no entity CLASS found for entity '$input_entity'\n".
		"    entity tag is '$tag'\n    entity data is '$data'\n\n";
	$entity{CLASS} = normalize_entity_class($class) or die
	    "\n\nFATAL INPUT ERROR:  unrecognized entity class ($class) for entity '$input_entity'\n".
		"    entity tag is '$tag'\n    entity data is '$data'\n\n";
#get other entity data
	$entity{mentions} = [get_entity_mentions (\%entity, $data)];
	$entity{names} = [get_entity_names ($data)];
	push @entities, {%entity};
	$nentities++;
    }
    return @entities;
}

#################################

sub get_entity_mentions { #extract mention information for all mentions of an entity in a document

    my ($entity, $mention_data) = @_;
    my (%mention, @mentions);
    my ($tag, $data, $head);
    my ($type, $mention_role, $ref_style);
    
    while (($tag, $data, $mention_data) = extract_sgml_tag_and_span ("entity_mention", $mention_data)) {
	undef %mention;
#get entity mention ID
	$mention{ID} = extract_sgml_tag_attribute ("ID", $tag);
#get entity mention TYPE
	$type = extract_sgml_tag_attribute ("TYPE", $tag) or die
	    "\n\nFATAL INPUT ERROR for entity '$input_entity' in document '$input_doc'\n".
		"    no mention TYPE found in tag '$tag'\n    mention data is '$data'\n\n";
	$mention{TYPE} = $normalize_mention_type{$type} or die
	    "\n\nFATAL INPUT ERROR for entity '$input_entity' in document '$input_doc'\n".
		"    unrecognized mention type ($type)\n    mention data is '$data'\n\n";
#get entity mention ROLE
	($mention_role) = extract_sgml_tag_attribute ("ROLE", $tag);
	$mention_role = $entity->{TYPE} unless defined $mention_role;
	$mention{ROLE} = normalize_entity_type($mention_role) or die
	    "\n\nFATAL INPUT ERROR for entity '$input_entity' in document '$input_doc'\n".
		"    unrecognized mention role ($mention_role)\n    mention data is '$data'\n\n";
#get reference style	
	($ref_style) = extract_sgml_tag_attribute ("METONYMY_MENTION", $tag);
	($ref_style) = extract_sgml_tag_attribute ("REFERENCE", $tag) unless defined $ref_style;
	($ref_style) = extract_sgml_tag_attribute ("STYLE", $tag) unless defined $ref_style;
	$ref_style = "LITERAL" unless defined $ref_style;
	$ref_style = "METONYMIC" if $ref_style =~ /^(TRUE|INTENDED)$/;
	$ref_style =~ /^(LITERAL|METONYMIC)$/ or die
	    "\n\nFATAL INPUT ERROR for entity '$input_entity' in document '$input_doc'".
		"    unrecognized mention reference ($ref_style)\n    mention data is '$data'\n\n";
	$mention{STYLE} = $ref_style;
#get entity mention LDCTYPE and LDCATR
	$mention{LDCTYPE} = extract_sgml_tag_attribute ("LDCTYPE", $tag);
	$mention{LDCATR} = extract_sgml_tag_attribute ("LDCATR", $tag);
#get entity mention extent
	$mention{extent} = get_locator ("extent", $data) or die
	    "\n\nFATAL INPUT ERROR for entity '$input_entity' in document '$input_doc'\n".
		"    no mention extent found in data ($data)\n\n";
#get entity mention head
	$head = get_locator ("head", $data);
	$mention{head} = $head if defined $head;
	push @mentions, {%mention};
    }
    @mentions > 0 or die
	"\n\nFATAL INPUT ERROR for entity '$input_entity' in document '$input_doc'\n".
	    "    entity contains no mentions\n\n";
    return @mentions;
}

#################################

sub reduce_entity_to_single_role {

    my ($entity) = @_;
    my ($doc, $doc_info, $mention);

    $entity->{documents} = {%{$entity->{documents}}};
    foreach $doc (keys %{$entity->{documents}}) {
	$doc_info = $entity->{documents}{$doc} = {%{$entity->{documents}{$doc}}};
	$doc_info->{ID} = $entity->{ID};
	$doc_info->{TYPE} = $entity->{TYPE};
	my @same_role_mentions;
	foreach $mention (@{$doc_info->{mentions}}) {
	    push @same_role_mentions, {%$mention} if $mention->{ROLE} eq $entity->{TYPE};
	}
	$doc_info->{mentions} = [@same_role_mentions];
	delete $entity->{documents}{$doc} unless @same_role_mentions > 0;
    }
}

#################################

sub longest_entity_mention_head {

    my ($entity) = @_;
    my (@docs, $doc, @mentions, $mention, $text);
    my $longest_head="";
    if ($entity->{documents}) { # this is a database entity
	@docs = keys %{$entity->{documents}};
	foreach $doc (@docs) {
	    @mentions = @{$entity->{documents}{$doc}{mentions}};
	    foreach $mention (@mentions) {
		$text = $mention->{head}{text} if defined $mention->{head}{text};
		$longest_head = $text if defined $text and length($text) > length($longest_head);
	    }
	}
    }
    else { # this is a document-level entity
	@mentions = @{$entity->{mentions}};
	foreach $mention (@mentions) {
	    $text = $mention->{head}{text} if defined $mention->{head}{text};
	    $longest_head = $text if defined $text and length($text) > length($longest_head);
	}
    }
    return length($longest_head) > 0 ? $longest_head : undef;
}

#################################
    
sub get_entity_names { #extract name information for all names of an entity in a document

    my ($data) = @_;
    my ($tag, $span, $name, @names);
    
    ($data) = extract_sgml_span ("entity_attributes", $data);

    while ($name=get_locator("name",$data)) {
	push @names, $name;
	($tag, $span, $data) = extract_sgml_tag_and_span ("name", $data);
    }
    return @names;
}

#################################

sub longest_entity_name {

    my ($entity) = @_;
    my (@docs, $doc, @names, $name, $text);
    my $longest_name="";
    if ($entity->{documents}) { # this is a database entity
	@docs = keys %{$entity->{documents}};
	foreach $doc (@docs) {
	    next unless defined $entity->{documents}{$doc}{names};
	    @names = @{$entity->{documents}{$doc}{names}};
	    foreach $name (@names) {
		$text = $name->{text};
		$longest_name = $text if defined $text and length($text) > length($longest_name);
	    }
	}
    }
    else { # this is a document-level entity
	return undef unless defined $entity->{names};
	@names = @{$entity->{names}};
	foreach $name (@names) {
	    $text = $name->{text};
	    $longest_name = $text if defined $text and length($text) > length($longest_name);
	}
    }
    return length($longest_name) > 0 ? $longest_name : undef;
}

#################################
    
sub get_locator {

    my ($name, $data) = @_;

    my ($span, %info);

    return undef unless ($span) = extract_sgml_span ($name, $data);

    if ($data_type eq "text") {
	($info{locator}) = get_text_locator ($span);
	$info{text} = $info{locator}{text};
    }
    elsif ($data_type eq "audio") {
	($info{locator}) = get_audio_locator ($span);
	$info{text} = $info{locator}{text};
    }
    elsif ($data_type eq "image") {
	($info{locator}) = [get_image_locator ($span)];
	$info{text} = "";
	foreach my $bbox (@{$info{locator}}) {
	    $info{text} .= $bbox->{locator}{text};
	}
    }
    else {
	die "\n\nFATAL INPUT ERROR for document '$input_doc' in file '$input_file'\n".
	    "    No locator read routine for '$name' locator in '$data' for '$data_type'\n\n";
    }
    $info{text} =~ s/-\n//sg;
    $info{text} =~ s/\n/ /sg;
    $info{text} =~ s/\s+/ /sg;
    $info{locator}{data_type} = $data_type;
    return {%info};
}

#################################
    
sub get_text_locator {

    my ($data) = @_;
    my (%locator, $tag, $span);

    ($tag, $span) = extract_sgml_tag_and_span ("charseq", $data) or die
	"\n\nFATAL INPUT ERROR for entity '$input_entity' in document '$input_doc'".
	    "in file '$input_file'\n    text mention contains no position info (no 'charseq' tag): '$data'\n\n";

    ($locator{start}) = extract_sgml_tag_attribute ("START", $tag) or die
	"\n\nFATAL INPUT ERROR for entity '$input_entity' in file '$input_file'\n".
	    "    No 'START' attribute found in data '$data'\n\n";
    ($locator{end}) = extract_sgml_tag_attribute ("END", $tag) or die
	"\n\nFATAL INPUT ERROR for entity '$input_entity' in file '$input_file'\n".
	    "    No 'END' attribute found in data '$data'\n\n";
    $locator{end}-$locator{start} >= 0 or die
	"\n\nFATAL INPUT ERROR for entity '$input_entity' in file '$input_file'\n".
	    "    Negative text span in data '$data'\n\n";
    $locator{start} >= 0 or die
	"\n\nFATAL INPUT ERROR for entity '$input_entity' in file '$input_file'\n".
	    "    Negative character index in data '$data'\n\n";
    ($locator{text}) = $span;
    return {%locator};
}

#################################
    
sub get_audio_locator {

    my ($data) = @_;
    my (%locator, $tag, $span);

    ($tag, $span) = extract_sgml_tag_and_span ("timespan", $data) or die
	"\n\nFATAL INPUT ERROR for entity '$input_entity' in document '$input_doc'".
	    "in file '$input_file'\n    audio mention contains no timing info (no 'times' tag): '$data'\n\n";

    ($locator{tstart}) = extract_sgml_tag_attribute ("START", $tag) or die
	"\n\nFATAL INPUT ERROR for entity '$input_entity' in file '$input_file'\n".
	    "    No 'START' tag found in data '$data'\n\n";
    ($locator{tdur}) = extract_sgml_tag_attribute ("END", $tag) - $locator{tstart} or die
	"\n\nFATAL INPUT ERROR for entity '$input_entity' in file '$input_file'\n".
	    "    No 'END' tag found in data '$data'\n\n";
    $locator{tdur} >= 0 or die
	"\n\nFATAL INPUT ERROR for entity '$input_entity' in file '$input_file'\n".
	    "    Negative time duration in data '$data'\n\n";
    $locator{tstart} >= 0 or die
	"\n\nFATAL INPUT ERROR for entity '$input_entity' in file '$input_file'\n".
	    "    Negative start time in data '$data'\n\n";
    ($locator{text}) = $span;
    return {%locator};
}

#################################
    
sub get_image_locator {

    my ($data) = @_;
    my (%box, @boxlist);
    my ($tag, $span);
    
    ($data) = extract_sgml_span ("bblist", $data);

    my $nboxes = 0;
    while (($tag, $span, $data) = extract_sgml_tag_and_span ("pixelboundingbox", $data)) {
	$box{page} = extract_sgml_tag_attribute ("Signal", $tag) or die
	    "\n\nFATAL INPUT ERROR for entity '$input_entity' in file '$input_file'\n".
		"    No 'Signal' attribute found in tag '$tag'\n\n";
	$box{x_start} = extract_sgml_tag_attribute ("x1", $tag) or die
	    "\n\nFATAL INPUT ERROR for entity '$input_entity' in file '$input_file'\n".
		"    No 'x1' tag found in data '$tag'\n\n";
	$box{y_start} = extract_sgml_tag_attribute ("y1", $tag) or die
	    "\n\nFATAL INPUT ERROR for entity '$input_entity' in file '$input_file'\n".
		"    No 'y1' tag found in data '$tag'\n\n";
	$box{width} = extract_sgml_tag_attribute ("x2", $tag) or die
	    "\n\nFATAL INPUT ERROR for entity '$input_entity' in file '$input_file'\n".
		"    No 'x2' tag found in data '$tag'\n\n";
	$box{height} = extract_sgml_tag_attribute ("y2", $tag) or die
	    "\n\nFATAL INPUT ERROR for entity '$input_entity' in file '$input_file'\n".
		"    No 'y2' tag found in data '$tag'\n\n";

	$box{width} >= 0 or die
	    "\n\nFATAL INPUT ERROR for entity '$input_entity' in file '$input_file'\n".
		"    Negative bounding box width in data '$tag'\n\n";
	$box{height} >= 0 or die
	    "\n\nFATAL INPUT ERROR for entity '$input_entity' in file '$input_file'\n".
		"    Negative bounding box height in data '$tag'\n\n";
	$box{x_start} >= 0 or die
	    "\n\nFATAL INPUT ERROR for entity '$input_entity' in file '$input_file'\n".
		"    Negative 'x1' in data '$tag'\n\n";
	$box{y_start} >= 0 or die
	    "\n\nFATAL INPUT ERROR for entity '$input_entity' in file '$input_file'\n".
		"    Negative 'y1' in data '$tag'\n\n";
	$box{text} = $span;

	push @boxlist, {%box};
	$nboxes++;
    }
    $nboxes > 0 or die
	"\n\nFATAL INPUT ERROR for entity '$input_entity' in document '$input_doc'".
	    "in file '$input_file'\n    image mention contains no boxes (no 'pixelboundingbox' tag): '$data'\n\n";
    return @boxlist;
}

#################################

sub sort_on_locator {

    my $ax = $a;
    my $bx = $b;

    ($ax, $bx) = ($ax->{DATA}, $bx->{DATA}) if defined $ax->{DATA} and defined $bx->{DATA};

    $ax = defined $ax->{head}{locator} ? $ax->{head}{locator} :
	defined $ax->{extent}{locator} ? $ax->{extent}{locator} :
	    defined $ax->{locator} ? $ax->{locator} : die
		"\n\nFATAL ERROR in input to sort_on_locator\n\n";

    $bx = defined $bx->{head}{locator} ? $bx->{head}{locator} :
	defined $bx->{extent}{locator} ? $bx->{extent}{locator} :
	    defined $bx->{locator} ? $bx->{locator} : die
		"\n\nFATAL ERROR in input to sort_on_locator\n\n";

    if ($ax->{data_type} eq "text" and $bx->{data_type} eq "text") {
	return $ax->{start} <=> $bx->{start};
    }
    elsif ($ax->{data_type} eq "audio" and $bx->{data_type} eq "audio") {
	return $ax->{tstart} <=> $bx->{tstart};
    }
    elsif ($ax->{data_type} eq "image" and $bx->{data_type} eq "image") {
	my $ax_box = $ax->{bblist}[0];
	my $bx_box = $bx->{bblist}[0];
	my $cmp = $ax_box->{page} <=> $bx_box->{page};
	return $cmp if $cmp;
	$cmp = $ax_box->{y_start} <=> $bx_box->{y_start};
	return $cmp if $cmp;
	$cmp = $ax_box->{x_start} <=> $bx_box->{x_start};
	return $cmp;
    }
    else {
	die "\n\nFATAL ERROR in sort_on_locator\n\n";
    }
}

#################################

sub get_relation_data { #extract document-level information for all relations in the document

    my ($data) = @_;
    my (@relations, %relation_ids);
    my ($tag, $span, $type, $subtype, $class, $attribute, $mention);

    while (($tag, $span, $data) = extract_sgml_tag_and_span ("relation", $data)) {
	my %relation;
#get relation ID
	($input_relation) = extract_sgml_tag_attribute ("ID", $tag) or die
	    "\n\nFATAL INPUT ERROR:  no relation ID found for a relation in file '$input_file'\n".
		"    Relation tag is '$tag'\n    Relation data is '$span'\n\n";
	$input_relation =~ s/^\s*|\s*$//g; #trim any white space from beginning/end of relation ID
	not defined $relation_ids{$input_relation} or die
	    "\n\nFATAL ERROR:  multiple definitions of relation '$input_relation'\n".
		"    (every relation ID must be unique)\n\n";
	$relation_ids{$input_relation} = 1;
	$relation{ID} = $input_relation;
#get relation TYPE
	($type) = extract_sgml_tag_attribute ("TYPE", $tag) or die
	    "\n\nFATAL INPUT ERROR for relation '$input_relation' in document '$input_doc'".
		" in file '$input_file'\n    no relation TYPE found.  Relation tag is '$tag'\n\n";
	$relation{TYPE} = $normalize_relation_type{$type} or die
	    "\n\nFATAL INPUT ERROR for relation '$input_relation' in document '$input_doc'".
		" in file '$input_file'\n    unrecognized relation type ($type).  Relation tag is '$tag'\n\n";
#get relation SUBTYPE
	($subtype) = extract_sgml_tag_attribute ("SUBTYPE", $tag) or $subtype = "<NONE>";
	$relation{SUBTYPE} = $subtype;
	$relation_subtypes{$relation{TYPE}}{$relation{SUBTYPE}} or die	
	    "\n\nFATAL INPUT ERROR:  illegal relation SUBTYPE ($relation{SUBTYPE}) for relation type $relation{TYPE}\n".
		"    Relation tag is '$tag'\n    Relation data is '$data'\n\n";

#get relation CLASS
	($class) = extract_sgml_tag_attribute ("CLASS", $tag);
	$class = "EXPLICIT" unless defined $class;
	$relation{CLASS} = $normalize_relation_class{$class} or die
	    "\n\nFATAL INPUT ERROR for relation '$input_relation' in document '$input_doc'".
		" in file '$input_file'\n    unrecognized relation class ($class).  Relation tag is '$tag'\n\n";

	$relation{arguments} = [get_relation_arguments ($span)];
	$relation{mentions} = [get_relation_mentions ($span)];
	$relation{times} = [get_times ("relation_time", $span)];

	promote_relation_mention_times (\%relation);
	push @relations, {%relation};
    }
    return @relations;
}

#################################

sub promote_relation_mention_times {

    my ($relation) = @_;
    my ($mention, $time);

    foreach $mention (@{$relation->{mentions}}) {
	foreach $time (@{$mention->{times}}) {
	    next if null_relation_time ($time);
	    my %time = %{$time};
	    delete $time{text};
	    push @{$relation->{times}}, \%time unless num_relation_time_matches ([$time], $relation->{times});
	}
    }
}

#################################

sub null_relation_time {

    my ($time) = @_;
    foreach my $attribute (@relation_time_attributes) {
	return "" if $time->{$attribute};
    }
    return 1;
}

#################################

sub get_relation_arguments {

    my ($data) = @_;
    my ($tag, $span, $argument, @arguments);

    while ($argument = get_relation_argument ($data)) {
	push @arguments, $argument;
	($tag, $span, $data) = extract_sgml_tag_and_span ("rel_entity_arg", $data);
    }
    @arguments = sort {$a->{ARGNUM} <=> $b->{ARGNUM}} @arguments;
    return @arguments;
}

#################################

sub get_relation_argument {

    my ($data) = @_;
    my ($tag, $span, $num, %argument);

    return undef unless ($tag, $span) = extract_sgml_tag_and_span ("rel_entity_arg", $data);

    $argument{ARGNUM} = extract_sgml_tag_attribute ("ARGNUM", $tag) or die
	"\n\nFATAL INPUT ERROR for relation '$input_relation' in document '$input_doc' in file '$input_file'\n".
	    "    No ARGNUM attribute.  Relation data is '$data'\n\n";
    $argument{ARGNUM} == 1 or $argument{ARGNUM} == 2 or die
	"\n\nFATAL INPUT ERROR for relation '$input_relation' in document '$input_doc' in file '$input_file'\n".
	    "    ARGNUM must be '1' or '2', but ARGNUM is '$argument{ARGNUM}'.  Relation data is '$data'\n\n";
    $argument{ID} = extract_sgml_tag_attribute ("ENTITYID", $tag) or die
	"\n\nFATAL INPUT ERROR for relation '$input_relation' in document '$input_doc' in file '$input_file'\n".
	    "    No argument ENTITYID attribute.  Relation data is '$data'\n\n";
    return {%argument};
}

#################################

sub get_times {

    my ($name, $data) = @_;
    my ($tag, $span, $time, @times);
    
    while ($time = get_time ($name, $data)) {
	push @times, $time;
	($tag, $span, $data) = extract_sgml_tag_and_span ($name, $data);
    }
    return @times;
}

#################################

sub get_time {

    my ($name, $data) = @_;
    my ($tag, $span, $attribute, %time);
    
    return undef unless ($tag, $span) = extract_sgml_tag_and_span ($name, $data);
    foreach $attribute (@relation_time_attributes) {
	$time{$attribute} = extract_sgml_tag_attribute ($attribute, $tag);
    }
    ($time{source}) = get_locator ("source", $span);
    return {%time};
}

#################################

sub get_relation_mentions {

    my ($data) = @_;
    my ($tag, $span, @mentions, $mention);
    
    while ($mention = get_relation_mention ($data)) {
	push @mentions, $mention;
	($tag, $span, $data) = extract_sgml_tag_and_span ("relation_mention", $data);
    }
    return @mentions;
}

#################################

sub get_relation_mention {

    my ($data) = @_;
    my ($tag, $span, %mention);
    
    return undef unless ($tag, $span) = extract_sgml_tag_and_span ("relation_mention", $data);

    $mention{ID} = extract_sgml_tag_attribute ("ID", $tag) or die
	"\n\nFATAL INPUT ERROR for relation '$input_relation' in document '$input_doc' in file '$input_file'\n".
	    "    ID attribute is missing.  Relation data is '$data'\n\n";
    $mention{LC} = extract_sgml_tag_attribute ("LDCLEXICALCONDITION", $tag);
    $mention{LC} = "unknown" unless $mention{LC};
    $mention{extent} = get_locator ("ldc_extent", $data);
    $mention{arguments} = [get_relation_mention_arguments ($span)];
    $mention{times} = [get_times ("rel_mention_time", $span)];
    return {%mention};
}

#################################

sub get_relation_mention_arguments {

    my ($data) = @_;
    my ($tag, $span, $argument, @arguments);

    while ($argument = get_relation_mention_argument ($data)) {
	push @arguments, $argument;
	($tag, $span, $data) = extract_sgml_tag_and_span ("rel_mention_arg", $data);
    }
    @arguments = sort {$a->{ARGNUM} <=> $b->{ARGNUM}} @arguments;
    return @arguments;
}

#################################

sub get_relation_mention_argument {

    my ($data) = @_;
    my ($tag, $span, $num, %argument);

    return undef unless ($tag, $span) = extract_sgml_tag_and_span ("rel_mention_arg", $data);

    $argument{ARGNUM} = extract_sgml_tag_attribute ("ARGNUM", $tag) or die
	"\n\nFATAL INPUT ERROR for relation '$input_relation' in document '$input_doc' in file '$input_file'\n".
	    "    No ARGNUM attribute.  Relation data is '$data'\n\n";
    $argument{MENTIONID} = extract_sgml_tag_attribute ("ENTITYMENTIONID", $tag) or die
	"\n\nFATAL INPUT ERROR for relation '$input_relation' in document '$input_doc' in file '$input_file'\n".
	    "    No argument ENTITYMENTIONID attribute.  Relation data is '$data'\n\n";
    $argument{extent} = get_locator ("extent", $span) or die
	"\n\nFATAL INPUT ERROR for relation '$input_relation' in document '$input_doc' in file '$input_file'\n".
	    "    No argument extent.  Relation data is '$data'\n\n";
    return {%argument};
}

#################################

sub extract_sgml_span {
    
    my ($name, $data) = @_;
    
    return () unless defined $name and defined $data;
    return () unless ($data =~ /<$name(\s+[^>]*)*>(.*)/si);
    my $tag = $1;
    my $remainder = $2;
    return (undef, $remainder) if ($tag and $tag =~ /\/$/s);
    ($data =~ /<$name(\s+[^>]*)*>(.*?)<\/$name\s*>(.*)/si) ? ($2, $3) : ();
}

#################################

sub extract_sgml_tag_and_span {
    
    my ($name, $data) = @_;
    
    return () unless defined $name and defined $data;
    return () unless ($data =~ /<$name(\s+[^>]*)*>(.*)/si);
    my $tag = $1;
    my $remainder = $2;
    return ($tag, undef, $remainder) if ($tag and $tag =~ /\/$/s);
    ($data =~ /<$name(\s+[^>]*)*>(.*?)<\/$name\s*>(.*)/si) ? ($1, $2, $3) : ();
}

#################################

sub extract_sgml_tag_attribute {

    my ($name, $data) = @_;

    return () unless defined $name and defined $data;
    my $attr = ($data =~ /(^|\s)$name\s*=\s*\"\s*([^\"]*)\s*\"/si) ? ($2) : ();
    return () if not defined $attr or $attr =~ /^\s*$/;
    $attr =~ s/^\s*|\s*$//g;
    return $attr;
}

#################################

sub extract_sgml_tag_attributes {

    my ($data) = @_;
    my %attributes;

    return () unless defined defined $data;
    while ($data =~ s/(^|\s+)([^\s]+)\s*=\s*\"\s*([^\"]*)\s*\"//si) {
	my $type = uc $2;
	my $attr = $3;
	$attr =~ s/^\s*|\s*$//g;
	next if not defined $attr or $attr =~ /^\s*$/;
	$attributes{$type} = $attr if length $attr;
    }
    return {%attributes};
}

#################################

sub date_time_stamp {

    my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = localtime();
    my @months = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
    my ($date, $time);

    $time = sprintf "%2.2d:%2.2d:%2.2d", $hour, $min, $sec;
    $date = sprintf "%4.4s %3.3s %s", 1900+$year, $months[$mon], $mday;
    return ($date, $time);
}

#################################

sub print_document_data {

    my ($type, $documents) = @_;
    my ($doc_id, $doc);

    print "\n======== $type documents ========\n\n";
    foreach $doc_id (sort keys %$documents) {
	$doc = $documents->{$doc_id};
	print "doc ID=$doc_id, ";
	print "TYPE=$doc->{TYPE}, ";
	print "FILE=$doc->{FILE}\n";
    }
}

#################################

sub print_entity_data {

    my ($type, $db) = @_;
    my ($entity_id);

    print "\n======== $type entities ========\n\n";
    foreach $entity_id (sort keys %{$db->{entities}}) {
	print_entity ($db->{entities}{$entity_id});
    }
}

#################################

sub print_entity {

    my ($entity) = @_;

    my ($name, $title, $doc, $doc_info, $attribute, $mention);
    my $total_value;
    foreach $doc (keys %{$entity->{documents}}) {
	my $doc_entity = $entity->{documents}{$doc};
	($doc_entity->{VALUE}) = entity_document_value($doc_entity);
	$total_value += $doc_entity->{VALUE};
    }
    printf "entity ID=$entity->{ID}, VALUE=%.5f", $total_value;
    foreach $attribute (@entity_attributes) {
	next if $attribute eq "ID";
	print ", $attribute=$entity->{$attribute}" if defined $entity->{$attribute};
    }
    print "\n";
    foreach $name (@{$entity->{names}}) {
	print "  name=\"$name\"\n";
    }
    foreach $title (@{$entity->{titles}}) {
	print "  title=\"$title\"\n";
    }
    foreach $doc (sort keys %{$entity->{documents}}) {
	$doc_info = $entity->{documents}{$doc};
	print "    -- in document $doc\n";
	foreach $mention (sort sort_on_locator @{$doc_info->{mentions}}) {
	    print "      mention TYPE=$mention->{TYPE}, ROLE=$mention->{ROLE}, STYLE=$mention->{STYLE}, ";
	    printf "head=\"%s\", ", defined $mention->{head}{text} ? $mention->{head}{text} : "???";
	    printf "extent=\"%s\"\n", defined $mention->{extent}{text} ? $mention->{extent}{text} : "???";
	}
	foreach $name (sort sort_on_locator @{$doc_info->{names}}) {
	    printf "      name extent=\"%s\"\n", defined $name->{text} ? $name->{text} : "???";
	}
    }
}

#################################

sub print_relation_data {

    my ($type, $db) = @_;
    my ($relation_id, $relation, $arg, $time);
    my ($doc_id, $doc, $doc_info);
    my ($attribute, $mention, $narg, $entity);

    print "\n======== $type relations  ========\n\n";
    foreach $relation_id (sort keys %{$db->{relations}}) {
	$relation = $db->{relations}{$relation_id};
	print "relation ID=$relation->{ID}";
	foreach $attribute (@relation_attributes) {
	    next if $attribute eq "ID";
	    print ", $attribute=$relation->{$attribute}" if defined $relation->{$attribute};
	}
	print "\n";
	foreach $time (@{$relation->{times}}) {
	    printf "  time info:%s\n", relation_time_info($time);
	}
	$narg=0;
	foreach $arg (@{$relation->{arguments}}) {
	    $entity = $db->{entities}{$arg->{ID}};
	    print "  ".relation_arg_description (++$narg, $entity);
	}
	print_relation_mentions ($relation->{documents});
    }
}

#################################

sub get_database {

    my ($file) = @_;
    my ($line, $data, $tag, $span);
    my (%entity, $ndb_entities, $attribute);

#read data from file
    open (FILE, $file) or die "\nUnable to open ACE database file '$file'";
    while ($line=<FILE>){
	$data .= $line;
    }
    close (FILE);

#get entity data for all entities in the database file
    while (($tag, $span, $data) = extract_sgml_tag_and_span ("entity", $data)) {
	foreach $attribute (@entity_attributes) {
	    next if $attribute eq "ORIGIN";
	    $entity{$attribute} = extract_sgml_tag_attribute ($attribute, $tag);
	}
	$entity{CLASS} = "SPECIFIC" unless defined $entity{CLASS};
	$entity{names} = [get_entity_attribute_vector ($span, "name")];
	$entity{titles} = [get_entity_attribute_vector ($span, "title")];
	$entity{ID} or die
	    "\n\nFATAL INPUT ERROR:  no entity ID found in tag '$tag' in database file '$file'\n\n";
	$entity{TYPE} or die
	    "\n\nFATAL INPUT ERROR:  no entity TYPE found in tag '$tag' in database file '$file'\n\n";
	not $ref_database{entities}{$entity{ID}} or die
	    "\n\nFATAL INPUT ERROR:  entity ID '$entity{ID}' in database file '$file' is not unique\n\n";
	($entity{TYPE}) = normalize_entity_type($entity{TYPE});
	($ref_database{entities}{$entity{ID}}) = {%entity};
	$ref_database{entities}{$entity{ID}}{ORIGIN} = "DATABASE"; #flag this entity as a database entity
	$ref_database{entities}{$entity{ID}}{VALUE} = 
	    entity_value($ref_database{entities}{$entity{ID}}, $ref_database{entities}{$entity{ID}});
	$ndb_entities++;
    }
    $ndb_entities or warn
	"\n\nWARNING:  database file '$file' contains no entities\n\n";
}

#################################
    
sub normalize_entity_type {

    my ($type) = @_;
    $type =~ s/^\s*|\s*$//g;
    return $normalize_entity_type{substr($type,0,3)};
}


#################################
    
sub normalize_entity_class {

    my ($class) = @_;
    $class =~ s/^\s*|\s*$//g;
    return $normalize_entity_class{substr($class,0,3)};
}

#################################
    
sub get_entity_attribute_vector {

    my ($entity_data, $attribute) = @_;
    my $span;
    my (%attribute, @attributes);
    my ($tag, $data);
    
    ($span) = extract_sgml_span ("entity_attributes", $entity_data);

    while ($span and (($tag, $data, $span) = 
	   extract_sgml_tag_and_span ($attribute, $span)))
    {
	push @attributes, $data;
    }
    return @attributes;
}

#################################

sub weighted_bipartite_graph_matching {
    my ($score) = @_;
    
    my $INF = 1E30;
    my (@row_mate, @col_mate, @row_dec, @col_inc);
    my (@parent_row, @unchosen_row, @slack_row, @slack);
    my ($k, $l, $row, $col, @col_min, $cost, %cost);
    my $t = 0;
    
    unless (defined $score) {
	warn "input to BGM is undefined\n";
	return undef;
    }
    return {} if (keys %$score) == 0;
    
    my @rows = sort keys %{$score};
    my $miss = "miss";
    $miss .= "0" while exists $score->{$miss};
    my (@cols, %cols);
    my $min_score = $INF;
    foreach $row (@rows) {
	foreach $col (keys %{$score->{$row}}) {
	    $min_score = min($min_score,$score->{$row}{$col});
	    $cols{$col} = $col;
	}
    }
    @cols = sort keys %cols;
    my $fa = "fa";
    $fa .= "0" while exists $cols{$fa};
    my $reverse_search = @rows < @cols; # search is faster when ncols <= nrows
    foreach $row (@rows) {
	foreach $col (keys %{$score->{$row}}) {
	    ($reverse_search ? $cost{$col}{$row} : $cost{$row}{$col})
		= $score->{$row}{$col} - $min_score;
	}
    }
    push @rows, $miss;
    push @cols, $fa;
    if ($reverse_search) {
	my @xr = @rows;
	@rows = @cols;
	@cols = @xr;
    }

    my $nrows = @rows;
    my $ncols = @cols;
    my $nmax = max($nrows,$ncols);
    my $no_match_cost = -$min_score*(1+$required_precision);

    # subtract the column minimas
    for ($l=0; $l<$nmax; $l++) {
	$col_min[$l] = $no_match_cost;
	next unless $l < $ncols;
	$col = $cols[$l];
	foreach $row (keys %cost) {
	    next unless defined $cost{$row}{$col};
	    my $val = $cost{$row}{$col};
	    $col_min[$l] = $val if $val < $col_min[$l];
	}
    }
    
    # initial stage
    for ($l=0; $l<$nmax; $l++) {
	$col_inc[$l] = 0;
	$slack[$l] = $INF;
    }
    
  ROW:
    for ($k=0; $k<$nmax; $k++) {
	$row = $k < $nrows ? $rows[$k] : undef;
	my $row_min = $no_match_cost;
	for (my $l=0; $l<$ncols; $l++) {
	    my $col = $cols[$l];
	    my $val = ((defined $row and defined $cost{$row}{$col}) ? $cost{$row}{$col}: $no_match_cost) - $col_min[$l];
	    $row_min = $val if $val < $row_min;
	}
	$row_dec[$k] = $row_min;
	for ($l=0; $l<$nmax; $l++) {
	    $col = $l < $ncols ? $cols[$l]: undef;
	    $cost = ((defined $row and defined $col and defined $cost{$row}{$col}) ?
		     $cost{$row}{$col} : $no_match_cost) - $col_min[$l];
	    if ($cost==$row_min and not defined $row_mate[$l]) {
		$col_mate[$k] = $l;
		$row_mate[$l] = $k;
                # matching row $k with column $l
		next ROW;
	    }
	}
	$col_mate[$k] = -1;
	$unchosen_row[$t++] = $k;
    }
    
    goto CHECK_RESULT if $t == 0;
    
    my $s;
    my $unmatched = $t;
    # start stages to get the rest of the matching
    while (1) {
	my $q = 0;
	
	while (1) {
	    while ($q < $t) {
		# explore node q of forest; if matching can be increased, update matching
		$k = $unchosen_row[$q];
		$row = $k < $nrows ? $rows[$k] : undef;
		$s = $row_dec[$k];
		for ($l=0; $l<$nmax; $l++) {
		    if ($slack[$l]>0) {
			$col = $l < $ncols ? $cols[$l]: undef;
			$cost = ((defined $row and defined $col and defined $cost{$row}{$col}) ?
				 $cost{$row}{$col} : $no_match_cost) - $col_min[$l];
			my $del = $cost - $s + $col_inc[$l];
			if ($del < $slack[$l]) {
			    if ($del == 0) {
				goto UPDATE_MATCHING unless defined $row_mate[$l];
				$slack[$l] = 0;
				$parent_row[$l] = $k;
				$unchosen_row[$t++] = $row_mate[$l];
			    }
			    else {
				$slack[$l] = $del;
				$slack_row[$l] = $k;
			    }
			}
		    }
		}
		
		$q++;
	    }
	    
	    # introduce a new zero into the matrix by modifying row_dec and col_inc
	    # if the matching can be increased update matching
	    $s = $INF;
	    for ($l=0; $l<$nmax; $l++) {
		if ($slack[$l] and ($slack[$l]<$s)) {
		    $s = $slack[$l];
		}
	    }
	    for ($q = 0; $q<$t; $q++) {
		$row_dec[$unchosen_row[$q]] += $s;
	    }
	    
	    for ($l=0; $l<$nmax; $l++) {
		if ($slack[$l]) {
		    $slack[$l] -= $s;
		    if ($slack[$l]==0) {
			# look at a new zero and update matching with col_inc uptodate if there's a breakthrough
			$k = $slack_row[$l];
			unless (defined $row_mate[$l]) {
			    for (my $j=$l+1; $j<$nmax; $j++) {
				if ($slack[$j]==0) {
				    $col_inc[$j] += $s;
				}
			    }
			    goto UPDATE_MATCHING;
			}
			else {
			    $parent_row[$l] = $k;
			    $unchosen_row[$t++] = $row_mate[$l];
			}
		    }
		}
		else {
		    $col_inc[$l] += $s;
		}
	    }
	}
	
      UPDATE_MATCHING:  # update the matching by pairing row k with column l
	while (1) {
	    my $j = $col_mate[$k];
	    $col_mate[$k] = $l;
	    $row_mate[$l] = $k;
            # matching row $k with column $l
	    last UPDATE_MATCHING if $j < 0;
	    $k = $parent_row[$j];
	    $l = $j;
	}
	
	$unmatched--;
	goto CHECK_RESULT if $unmatched == 0;
	
	$t = 0;  # get ready for another stage
	for ($l=0; $l<$nmax; $l++) {
	    $parent_row[$l] = -1;
	    $slack[$l] = $INF;
	}
	for ($k=0; $k<$nmax; $k++) {
	    $unchosen_row[$t++] = $k if $col_mate[$k] < 0;
	}
    }  # next stage
    
  CHECK_RESULT:  # rigorously check results before handing them back
    for ($k=0; $k<$nmax; $k++) {
	$row = $k < $nrows ? $rows[$k] : undef;
	for ($l=0; $l<$nmax; $l++) {
	    $col = $l < $ncols ? $cols[$l]: undef;
	    $cost = ((defined $row and defined $col and defined $cost{$row}{$col}) ?
		     $cost{$row}{$col} : $no_match_cost) - $col_min[$l];
	    if ($cost < ($row_dec[$k] - $col_inc[$l])) {
		next unless $cost < ($row_dec[$k] - $col_inc[$l]) - $required_precision*max(abs($row_dec[$k]),abs($col_inc[$l]));
		warn "BGM: this cannot happen: cost{$row}{$col} ($cost) cannot be less than row_dec{$row} ($row_dec[$k]) - col_inc{$col} ($col_inc[$l])\n";
		return undef;
	    }
	}
    }
    
    for ($k=0; $k<$nmax; $k++) {
	$row = $k < $nrows ? $rows[$k] : undef;
	$l = $col_mate[$k];
	$col = $l < $ncols ? $cols[$l]: undef;
	$cost = ((defined $row and defined $col and defined $cost{$row}{$col}) ?
		 $cost{$row}{$col} : $no_match_cost) - $col_min[$l];
	if (($l<0) or ($cost != ($row_dec[$k] - $col_inc[$l]))) {
	    next unless $l<0 or abs($cost - ($row_dec[$k] - $col_inc[$l])) > $required_precision*max(abs($row_dec[$k]),abs($col_inc[$l]));
	    warn "BGM: every row should have a column mate: row $row doesn't, col: $col\n";
	    return undef;
	}
    }
    
    my %map;
    for ($l=0; $l<@row_mate; $l++) {
	$k = $row_mate[$l];
	$row = $k < $nrows ? $rows[$k] : undef;
	$col = $l < $ncols ? $cols[$l]: undef;
	next unless defined $row and defined $col and defined $cost{$row}{$col};
	$reverse_search ? ($map{$col} = $row) : ($map{$row} = $col);
    }
    return {%map};
}