#!/usr/bin/perl -w
use strict;

#################################
# History:
#
# version 15: 26 Sept 2003
#    * corrected a false error message
#
# version 14: 24 Sept 2003
#    * uems to exclude any LEXEME of subtype un-lex
#
# version 13: 23 Sept 2003
#    * always merge adjacent IPs
#
# version 12: 11 Sept 2003
#    * bug fix
#
# version 11: 28 Aug 2003
#    * renamed program as  ag-to-rttm+uems  reflecting new functionality
#    * added code to allow the program to use a list of AG "split files",
#         so that it inhales all the split files, and treats them all as
#         one AG file (basically by renaming all the annotations/anchors)
#    * added flag -I to specify that the program is to use a list of
#         AG "split files" rather than the name of an AG file
#    * added code to generate the scoring UEM file for speaker diarization
#         and the scoring UEM file for disfluencies and SU's.  Options
#         -d and -s specify the names of these files
#    * added flag -x to have the scoring UEM files exclude overlap regions
#    # added flag -v to generate verbose progress messages, mostly about
#         the generation of the scoring UEM files
#
# version 10: 25 July 2003
#    * flag zero duration tokens
#    * save the command line to the output file as comment
#
# version 9:  21 July 2003
#    * added 'unannotated' as a valid SU subtype
#
# version 8:  8 July 2003
#    * changed unknown gender label from 'other' to 'unknown'
#    * changed metadata subtype 'explicitEditingTerm' to 'explicit_editing_term'
#    * took out debug print statement
#    * fixed defective sort_sus subroutine
#
# version 7:  7 July 2003
#    * added code to change channel value from 'ALL' to '1'
#    * fixed script to produce the appropriate lexeme class for a given lexeme
#      if such info exists
#    * made tokens with alpha subtype to have a period '.' after them
#    * made confidence score field to be required, output <NA> if we don't
#      have it
#    * normalized orthography of breath, cough, laughter, lipsmack, and sneeze noises
#    * undid the change to make SU to contain only tokens that are not
#      annotated as noRTMetadata
#    * fixed defective sort_ips subroutine
#
# version 6:  25 June 2003
#    * fixed script to allow output of rttms to be optional
#    * added command line switch to normalize SWB1 transcription format
#      (i.e. 'because_1' to 'because')
#    * converted XML symbolics to normal characters
#      (i.e. '&amp;' to '&')
#    * changed SU to contain only tokens that are not annotated as
#      noRTMetadata
#
# version 5:  23 June 2003
#    * debug IP creation
#
# version 4:  18 June 2003
#    * debug extract_stt_object
#    * debug/improve output sort
#    * expand LEXEME subtypes
#
# version 3:  16 June 2003
#    * STT token type/subtype reconciled with MAIA documentation
#    * STT token category/text interpreted to match CTM subtypes
#    * SEGMENT records now accommodate speaker name
#    * field documentation added in leading comments
#
# version 2:  13 June 2003
#    * basic debug complete
#
# version 1:  11 June 2003
#    * rough draft
#
#################################
#GLOBAL VARIABLES

my $dt_MaxSpkrMerge = 0.3; #maximum between-lex gap to merge speaker segments together
my $dt_MaxIPMerge   = 0.3; #maximum between-IP gap to merge two IP's into one.
my %speaker_data;
my $begNoscore;   # explicit noscore region in AG file
my $endNoscore;   # explicit noscore region in AG file
my $segID          = "SG0000";
my $lexID          = "LX0000";
my $nlxID          = "NL0000";
my $nspID          = "NS0000";
my $flrID          = "FL0000";
my $edtID          = "ED0000";
my $ipID           = "IP0000";
my $suID           = "SU0000";
my $apID           = "AP0000";
my $spkrID         = "SP0000";
my $noscoreID      = "ZP0000";
my $noRTMetadataID = "NR0000";

my %sort_order = ("NOSCORE"         =>  0,
		  "NO_RT_METADATA"  =>  1,
		  "SEGMENT"         =>  2,
		  "SPEAKER"         =>  3,
		  "SU"              =>  4,
		  "A/P"             =>  5,
		  "CB"              =>  6,
		  "IP"              =>  7,
		  "EDIT"            =>  8,
		  "FILLER"          =>  9,
		  "NON-SPEECH"      => 10,
		  "NON-LEX"         => 11,
		  "LEXEME"          => 12,
		  "SPKR-INFO"       => 13,
		  "SUboundary"      => 14);

#################################

my ($date, $time) = date_time_stamp();
print "ag-to-rttm+uems run on $date at $time\n";

my $commandline = join(" ", @ARGV);
print "command line:  ", $0, " ", $commandline, "\n";

my $usage = "\n\n$0 <options> -i <AG input file> -o <RTTM file> -s <RTTMS file>\n\n".
    "Description:  This Perl program converts EARS speech annotation\n".
    "    from AG XML format to RTTM format and creates a scoring uem file\n".
    "    for speaker-diarization and one for SU and disfluency scoring.\n". 
    "\n".
    "Options:\n".
    "  -c converts SWB1 transcription format to SNOR format.\n".
    "  -x excludes overlap regions from the scoring uem file.\n".
    "  -n includes the [non-standard] NOSCORE records in the rttm file\n".
    "  -h prints this help message.\n".
    "  -v causes verbose progress messages to be printed to STDOUT.\n".
    "Required arguments:\n".
    "  -i <file> is an AG file.\n".
    "  -I <file> is a text file listing multiple AG files, one per line.\n".
    "\t  ... This option is intended to be used for a split ag file.\n".
    "  -r <file> is the RTTM output filename.\n".
    "  -d <file> is the disfluency and SU scoring-UEM output filename.\n".
    "  -s <file> is the speaker-diarization scoring-UEM output filename.\n";

#################################
#MAIN

{
    use vars qw ($opt_i $opt_I $opt_d $opt_r $opt_s $opt_h $opt_c $opt_n $opt_x $opt_v);
    use Getopt::Std;
    getopts ('hcxnvd:i:I:r:s:');
    die $usage if defined($opt_h);
    $opt_i or $opt_I or die "\n\nFATAL ERROR: undefined AG input file$usage";
    $opt_d or die "\n\nFATAL ERROR: undefined SU and disfluency scoring UEM output file$usage";
    $opt_s or die "\n\nFATAL ERROR: undefined speaker-diarization scoring-UEM output file$usage";
    $opt_r or die "\n\nFATAL ERROR: undefined RTTM output file$usage";

    my ($data, %signals, %times, %annotations, %channels, $spkr, $rttm);
    my @spkr_uemsegs = ();
    my @disfl_uemsegs = ();

#get AG data
    if ($opt_I) {
        open AGLIST, $opt_I or die "\n\nFATAL ERROR:  unable to open AG file list '$opt_I'$usage";
        my $aglist_idx = 0;
        while (my $curr_ag = <AGLIST>) {
            chomp $curr_ag;
            next if $curr_ag =~ /^$/;
            print "Processing $curr_ag\n" if $opt_v;
            open DATA, $curr_ag or die "\n\n$usage\nFATAL ERROR:  unable to open AG file '$curr_ag'";
            $data = ();
            $data .= $_ while <DATA>;
            close DATA;

            # Extract AG data
            get_ag_signals ($aglist_idx, \%signals, $data);
            print "\t have signals\n" if $opt_v;
            get_ag_times ($aglist_idx, \%times, $data);
            print "\t have times\n" if $opt_v;
            get_ag_annotations ($aglist_idx, \%annotations, $data);
            print "\t have annotations\n" if $opt_v;
            normalize_swb1_text (\%annotations) if $opt_c;
            print "\t text has been normalized\n" if $opt_c and $opt_v;
            $aglist_idx += 1;
        }
        close AGLIST;
    } else {
        open DATA, $opt_i or die "\n\nFATAL ERROR:  unable to open AG file '$opt_i'$usage";
        $data .= $_ while <DATA>;
        close DATA;

        # Extract AG data
        get_ag_signals (0, \%signals, $data);
        print "\t have signals\n" if $opt_v;
        get_ag_times (0, \%times, $data);
        print "\t have times\n" if $opt_v;
        get_ag_annotations (0, \%annotations, $data);
        normalize_swb1_text (\%annotations) if $opt_c;
        print "\t text has been normalized\n" if $opt_c and $opt_v;
   }

#format RTTM data
    create_pseudo_times (\%signals, \%times, \%annotations);
    print "Have created pseudo-times.  Doing ag_to_rttm\n" if $opt_v;
    check_token_durations(\%times, \%annotations);
    $rttm = [ag_to_rttm (\%signals, \%times, \%annotations, \%channels)];
    print "Have collected rttm data\n" if $opt_v;
    @$rttm = sort sort_objects @$rttm;
    print "Have sorted the rttm data\n" if $opt_v;

    foreach $spkr (keys %speaker_data) {
        print "\t$spkr\n" if $opt_v;
	add_IPs ($spkr, $rttm);
        print "\t\thave IPs\n" if $opt_v;
	add_SUs ($spkr, $rttm);
        print "\t\thave SUs\n" if $opt_v;
        add_speaker_diarization ($spkr, $rttm);
        print "\t\thave diarization\n" if $opt_v;
    }

    @$rttm = sort sort_objects @$rttm;
    print "Have sorted the rttm data again\n" if $opt_v;

    foreach my $chnl (keys %channels) {
        generate_scoring_uems ($rttm, \@spkr_uemsegs, \@disfl_uemsegs, $chnl);
    }
    print "Have generated the scoring uem data\n" if $opt_v;


#output RTTM data
    print "Putting out rttm file\n" if $opt_v;
    output_rttm_object_file ($rttm, $opt_r);
    print "\tDone putting out rttm file.\n" if $opt_v;
#output scoring-UEM files
    print "Putting out speaker-diarization scoring-uem file\n" if $opt_v;
    output_speaker_diarization_scoring_uem_file(@spkr_uemsegs);
    print "Done putting out speaker-diarization scoring-uem file\n" if $opt_v;
    print "Putting out disfluency-and-su scoring-uem file\n" if $opt_v;
    output_disfluency_and_su_scoring_uem_file(@disfl_uemsegs);
    print "Done putting out speaker-diarization scoring-uem file\n" if $opt_v;
    print "Program exiting normally.\n";
}


#################################

sub output_speaker_diarization_scoring_uem_file {
    open DATA, ">$opt_s" or die "\n\nFATAL ERROR:  unable to open scoring UEM output file $opt_s";
    foreach my $uemseg (sort sort_uems @_) {
        print "Spkr seg\n" if $opt_v;
        print "\tfile $uemseg->{file}\n" if $opt_v;
        print "\tchnl $uemseg->{chnl}\n" if $opt_v;
        print "\ttbeg $uemseg->{tbeg}\n" if $opt_v;
        print "\ttend $uemseg->{tend}\n" if $opt_v;
        print "\ttdur $uemseg->{tdur}\n" if $opt_v;

        printf DATA "$uemseg->{file} $uemseg->{chnl} %.3f %.3f\n",
              $uemseg->{tbeg}, $uemseg->{tend};
   }
   close DATA;
}


#################################

sub output_disfluency_and_su_scoring_uem_file {
    open DATA, ">$opt_d" or die "\n\nFATAL ERROR:  unable to open scoring UEM output file $opt_d";
    foreach my $uemseg (sort sort_uems @_) {
        print "Disfl seg\n"  if $opt_v;
        print "\tfile $uemseg->{file}\n"  if $opt_v;
        print "\tchnl $uemseg->{chnl}\n"  if $opt_v;
        print "\ttbeg $uemseg->{tbeg}\n"  if $opt_v;
        print "\ttend $uemseg->{tend}\n"  if $opt_v;
        print "\ttdur $uemseg->{tdur}\n"  if $opt_v;

        printf DATA "$uemseg->{file} $uemseg->{chnl} %.3f %.3f\n",
              $uemseg->{tbeg}, $uemseg->{tend};
   }
   close DATA;
}


#################################
sub get_ag_signals {

    my ($id_qual, $signals, $data) = @_;
    my ($tag, $span, $id, $chnl, $file);

    while (($span, $data) = extract_sgml_span ("timeline", $data)) {
	(($tag) = extract_sgml_tag_and_span ("signal", $span))
	    or die "\n\nFATAL ERROR:  no 'signal' in '$span'\n\n";
	(($id) = extract_sgml_tag_attribute ("id", $tag))
	    or die "\n\nFATAL ERROR:  no 'id' in '$tag'\n\n";
        $id .= ":$id_qual";
	(($chnl) = extract_sgml_tag_attribute ("track", $tag))
	    or die "\n\nFATAL ERROR:  no 'track' in '$tag'\n\n";
	$chnl = 1 if ($chnl eq "ALL"); # this is a hack for bad data
	$signals->{$id}{chnl} = $chnl;
	(($file) = extract_sgml_tag_attribute ("xlink:href", $tag))
	    or die "\n\nFATAL ERROR:  no 'xlink:href' in '$tag'\n\n";
	$file =~ s/.*\///;
	$file =~ s/\..*//;
	$signals->{$id}{file} = $file;
    }
    %$signals or die "\n\nFATAL ERROR:  no signals found in get_ag_signals\n\n";
    # return {%signals};
}

#################################

sub get_ag_times {

    my ($id_qual, $times, $data) = @_;
    my ($tag, $span, $id, $time, $signal);

    while (($tag, $span, $data) = extract_sgml_tag_and_span ("anchor", $data)) {
	(($id) = extract_sgml_tag_attribute ("id", $tag))
	    or die "\n\nFATAL ERROR:  no 'id' in '$tag'\n\n";
        $id .= ":$id_qual";
	(($signal) = extract_sgml_tag_attribute ("signals", $tag))
	    or die "\n\nFATAL ERROR:  no 'signals' in '$tag'\n\n";
        $signal .= ":$id_qual";
	$times->{$id}{signal} = $signal;
	($time) = extract_sgml_tag_attribute ("offset", $tag);
	$times->{$id}{time} = $time if defined $time;
    }
    %$times or die "\n\nFATAL ERROR:  no times found in get_ag_times\n\n";
    #return {%times};
}

#################################

sub get_ag_annotations {

    my ($id_qual, $annotations, $data) = @_;
    my ($tag, $span, $id, $type, $start, $end, $name, $feature);

    while (($tag, $span, $data) = extract_sgml_tag_and_span ("annotation", $data)) {
	(($id) = extract_sgml_tag_attribute ("id", $tag))
	    or die "\n\nFATAL ERROR:  no 'id' in '$tag'\n\n";
        $id .= ":$id_qual";
        $annotations->{$id}{id_qual} = $id_qual;
	(($type) = extract_sgml_tag_attribute ("type", $tag))
	    or die "\n\nFATAL ERROR:  no 'type' in '$tag'\n\n";
	$annotations->{$id}{type} = $type;
	(($start) = extract_sgml_tag_attribute ("start", $tag))
	    or die "\n\nFATAL ERROR:  no 'start' in '$tag'\n\n";
        $start .= ":$id_qual";
	$annotations->{$id}{start} = $start;
	(($end) = extract_sgml_tag_attribute ("end", $tag))
	    or die "\n\nFATAL ERROR:  no 'end' in '$tag'\n\n";
        $end .= ":$id_qual";
	$annotations->{$id}{end} = $end;
	while (($tag, $feature, $span) = extract_sgml_tag_and_span ("feature", $span)) {
	    (($name) = extract_sgml_tag_attribute ("name", $tag))
		or die "\n\nFATAL ERROR:  no 'name' in '$tag'\n\n";
	    $annotations->{$id}{features}{$name} = $feature;
	}
    }
    %$annotations or die "\n\nFATAL ERROR:  no annotations found in get_ag_annotations\n\n";
    #return {%annotations};
}

#################################

sub create_pseudo_times {

    my ($signals, $times, $annotations) = @_;
    my ($id, $id_qual, $start, $end, $signal, $tbeg, $tend, $time, $dt, $token, $tokens, @tokens);

#interpolate times for all tokens in all segments
    foreach $id (keys %$annotations) {
        $id_qual = $annotations->{$id}{id_qual};
	next unless $annotations->{$id}{type} eq "segment";
	$start = $annotations->{$id}{start};
	$times->{$start} or
	    die "\n\nFATAL ERROR:  no time info for segment start '$start'\n\n";
	$signal = $times->{$start}{signal};
        $signal .= ":$id_qual";
	$end = $annotations->{$id}{end};
	$times->{$end} or
	    die "\n\nFATAL ERROR:  no time info for segment end '$end'\n\n";
	$signal eq ($times->{$end}{signal} . ":$id_qual") or
	    die "\n\nFATAL ERROR:  inconsistent start/end signals $signal $times->{$end}{signal} :$id_qual for segment '$id'\n\n";
	$tbeg = $times->{$start}{time} or
	    die "\n\nFATAL ERROR:  no begin time for segment '$id'\n\n";
	$tend = $times->{$end}{time} or
	    die "\n\nFATAL ERROR:  no end time for segment '$id'\n\n";
        # segments may or may not have children tokens
        #
	if (exists($annotations->{$id}{features}{_AtlasAnnChil_})) {
	    $tokens = $annotations->{$id}{features}{_AtlasAnnChil_};
	    #$tokens =~ /token/ or
	    #    die "\n\nFATAL ERROR:  children not tokens for segment '$id'\n\n";
	    $tokens =~ s/.*token *//;
	    @tokens = split /\s+/, $tokens;
	    $time = $tbeg;
	    $dt = ($tend-$tbeg)/@tokens;
	    foreach $token (@tokens) {
                $token .= ":$id_qual";
		$annotations->{$token} or
		    die "\n\nFATAL ERROR:  no annotation for token '$token'\n\n";
		$start = $annotations->{$token}{start};
		$times->{$start} or
		    die "\n\nFATAL ERROR:  no time info for token '$token' start '$start'\n\n";
		$signal = $times->{$start}{signal};
		$end = $annotations->{$token}{end};
		$times->{$end} or
		    die "\n\nFATAL ERROR:  no time info for token '$token' end '$end'\n\n";
		$signal eq $times->{$end}{signal} or
		    die "\n\nFATAL ERROR:  inconsistent start/end signals for start '$start' and end '$end'\n\n";
		if (not defined $times->{$start}{time}) {
		    $times->{$start}{time} = $time;
		    $times->{$start}{fake} = "true";
		}
		$time += $dt;
		if (not defined $times->{$end}{time}) {
		    $times->{$end}{time} = $time;
		    $times->{$end}{fake} = "true";
		}
	    }
	}
    }

#check for complete coverage
    foreach $time (keys %$times) {
	defined $times->{$time}{time} or
	    die "\n\nFATAL ERROR:  time not defined for anchor '$time'\n\n";
    }
}

#################################

sub check_token_durations {
    my ($times, $annotations) = @_;
    my ($id, $start, $end, $tbeg, $tend, $fatal);
    $fatal = 0;
    foreach $id (keys %$annotations) {
	next unless $annotations->{$id}{type} eq "token";
	$start = $annotations->{$id}{start};
	$end = $annotations->{$id}{end};
	$times->{$start} or
	    die "\n\nFATAL ERROR:  no time info for token start '$start'\n\n";
	$times->{$end} or
	    die "\n\nFATAL ERROR:  no time info for token end '$end'\n\n";
	$tbeg = $times->{$start}{time} or
	    die "\n\nFATAL ERROR:  no begin time for token '$id'\n\n";
	$tend = $times->{$end}{time} or
	    die "\n\nFATAL ERROR:  no end time for token '$id'\n\n";
	if ($tend - $tbeg <= 0.0001) {
	    # print all the errors and then die later
	    $annotations->{$id}{features}{text} ? print "FATAL ERROR:  token '$annotations->{$id}{features}{text}' at '$tbeg' has zero duration\n" : print "FATAL ERROR:  token at '$tbeg' has zero duration\n";
	    $fatal = 1;
	}
    }
    if ($fatal) { die };
}

#################################

sub get_id_qual {
   my ($id) = @_;
   my @fields = split(/:/, $id);
   return @fields[@fields - 1];
}

#################################

sub ag_to_rttm {

    my ($signals, $times, $annotations, $channels) = @_;
    my ($id, $annotation, $type, $features, $object, $start, $end, $signal, @objects);

#add speaker information
    foreach $id (keys %$annotations) {
	$annotation = $annotations->{$id};
	$type = $annotation->{type};
	propagate_speaker_info (get_id_qual($id), $annotations, $annotation) if $type eq "speaker";
    }
    print "Done with speaker info\n" if $opt_v;

#flag unclear transcriptions
    foreach $id (keys %$annotations) {
	$annotation = $annotations->{$id};
	$type = $annotation->{type};
	propagate_unclear_info (get_id_qual($id), $annotations, $annotation) if $type =~ /^(unclear|questionableTranscription)$/;
    }
    print "Done with unclear transcriptions\n" if $opt_v;

#flag filled pauses
    foreach $id (keys %$annotations) {
	$annotation = $annotations->{$id};
	$type = $annotation->{type};
	propagate_fp_info (get_id_qual($id), $annotations, $annotation) if $type eq "filledPause";
    }
    print "Done with filled pauses\n" if $opt_v;

#flag no metadata
    foreach $id (keys %$annotations) {
	$annotation = $annotations->{$id};
	$type = $annotation->{type};
	propagate_nometadata_info(get_id_qual($id), $annotations, $annotation) if $type eq "noRTMetadata";
    }
    print "Done with noRTMetadata\n" if $opt_v;

#extract RTTM objects
    foreach $id (keys %$annotations) {
	$annotation = $annotations->{$id};
	$type = $annotation->{type};
	$features = $annotation->{features};
	$object = ($type eq "speaker" ? extract_spkr_object ($annotation->{features}) :
		   ($type eq "noscore" ? extract_noscore_object ($annotation->{features}) :
		    ($type eq "noRTMetadata" ? extract_no_rt_metadata_object ($annotation->{features}) :
		     ($type eq "segment" ? extract_segment_object ($annotation->{features}) :
		      ($type eq "token" ? extract_stt_object ($annotation->{features}) :
		       ($type =~ /^(SU|depod|filledPause|discourseMarker|explicitEditingTerm|aside)$/ ?
		        $object = extract_mde_object ($type, $annotation->{features}) :
		        undef))))));
	next unless $object;
	$start = $annotations->{$id}{start};
	$times->{$start} or die "\n\nFATAL ERROR:  no time info for '$start'\n\n";
	$signal = $times->{$start}{signal};
	$end = $annotations->{$id}{end};
	$times->{$end} or die "\n\nFATAL ERROR:  no time info for '$end'\n\n";
	($signal eq $times->{$end}{signal})
	    or die "\n\nFATAL ERROR:  inconsistent start/end signals for annotation '$id'\n\n";
	$object->{file} = $signals->{$signal}{file};
	$object->{chnl} = $signals->{$signal}{chnl};
	$object->{tbeg} = $times->{$start}{time};
	$object->{tbegValid} = "true" unless $times->{$start}{fake};
	$object->{tend} = $times->{$end}{time};
	$object->{tendValid} = "true" unless $times->{$end}{fake};
	$object->{tdur} = $object->{tend} - $object->{tbeg};
	$object->{features} = $annotations->{$id}{features};
	if ($type eq "speaker") {
	    my $name = $object->{name};
	    $speaker_data{$name}{type} = "SPKR-INFO";
	    $speaker_data{$name}{name} = $name;
	    $speaker_data{$name}{stype} = $object->{stype};
	    $speaker_data{$name}{file} = $object->{file};
	    $speaker_data{$name}{chnl} = $object->{chnl};
            $channels->{$object->{chnl}} = 1;
	    $speaker_data{$name}{features} = $object->{features};
	}
	else {
	    push @objects, $object;
	}
    }
    return @objects;
}

#################################

sub propagate_speaker_info {

    my ($id_qual, $annotations, $parent) = @_;
    my ($children, $spkr, $child, $annotation);

    $children = $parent->{features}{_AtlasAnnChil_};
    return unless $children;
    $spkr = $parent->{features}{speakerID};
    foreach $child (split /\s+/, $children) {
        $child .= ":$id_qual";
	$annotation = $annotations->{$child};
	next unless $annotation;
	$annotation->{features}{speakerID} = $spkr;
	propagate_speaker_info ($id_qual, $annotations, $annotation);
    }
}

#################################

sub propagate_unclear_info {

    my ($id_qual, $annotations, $parent) = @_;
    my ($children, $child, $annotation);

    $children = $parent->{features}{_AtlasAnnChil_};
    return unless $children;
    foreach $child (split /\s+/, $children) {
        $child .= ":$id_qual";
	$annotation = $annotations->{$child};
	next unless $annotation;
	$annotation->{features}{unclear} = "true";
	propagate_unclear_info ($annotations, $annotation);
    }
}

#################################

sub propagate_fp_info {

    my ($id_qual, $annotations, $parent) = @_;
    my ($children, $child, $annotation);

    $children = $parent->{features}{_AtlasAnnChil_};
    return unless $children;
    foreach $child (split /\s+/, $children) {
        $child .= ":$id_qual";
	$annotation = $annotations->{$child};
	next unless $annotation;
	next unless $annotation->{type} eq "token";
	$annotation->{features}{category} = "filledPause";
	propagate_fp_info ($annotations, $annotation);
    }
}

#################################

sub propagate_nometadata_info {

    my ($id_qual, $annotations, $parent) = @_;
    my ($children, $child, $annotation);

    $children = $parent->{features}{_AtlasAnnChil_};
    return unless $children;
    foreach $child (split /\s+/, $children) {
	$annotation = $annotations->{$child};
	next unless $annotation;
	next unless $annotation->{type} eq "token";
	$annotation->{features}{nometadata} = "true";
	propagate_nometadata_info ($annotations, $annotation);
    }
}

#################################

sub extract_spkr_object {

    my ($features) = @_;
    my ($name, $subtype, %object);

    ($name = $features->{speakerID})
	or die "\n\nFATAL ERROR:  speaker annotation without a defined 'speakerID'\n\n";
    #(not $speaker_data{$name})
    #	or die "\n\nFATAL ERROR:  multiple definitions of speakerID '$name'\n\n";
    $subtype = lc $features->{type};
    $subtype = "unknown" if not $subtype;
    $subtype = "adult_male" if $subtype eq "male";
    $subtype = "adult_female" if $subtype eq "female";
    $subtype = "unknown" if $subtype !~ /^(adult_male|adult_female|child)$/;
    $object{type} = "SPKR-INFO";
    $object{stype} = $subtype;
    $object{name} = $name;
    return {%object};
}

#################################

sub extract_noscore_object {

    my (%object);

    $object{type} = "NOSCORE";
    $object{id} = ++$noscoreID;
    return {%object};
}

#################################

sub extract_no_rt_metadata_object {

    my (%object);

    $object{type} = "NO_RT_METADATA";
    $object{id} = ++$noRTMetadataID;
    return {%object};
}

#################################

sub extract_segment_object {

    my ($features) = @_;
    my (%object);

    $object{type} = "SEGMENT";
    $object{id} = ++$segID;
    $object{name} = $features->{speakerID};
    return {%object};
}

#################################

sub extract_stt_object {

    my ($features) = @_;
    my ($text, $category, $subtype, $type, %object);

    $text = lc $features->{text};
    $category = lc $features->{category};
    ($type, $subtype) =
	((not $category)                                                  ? ("LEXEME"    , "lex"         ) :
	 ($category =~ /(^|;\s*)filledpause/                              ? ("LEXEME"    , "fp"          ) :
	  ($category =~ /(^|;\s*)(prefragment|postfragment)/              ? ("LEXEME"    , "frag"        ) :
	   ($category =~ /(^|;\s*)(mispronounced|idiosyncratic|laughter)/ ? ("LEXEME"    , "un-lex"      ) :
	    ($category =~ /(^|;\s*)foreign/                               ? ("LEXEME"    , "for-lex"     ) :
	     ($category =~ /(^|;\s*)spokenletter/                         ? ("LEXEME"    , "alpha"       ) :
	      ($category =~ /(^|;\s*)acronym/                             ? ("LEXEME"    , "acronym"     ) :
	       ($category =~ /(^|;\s*)interjection/                       ? ("LEXEME"    , "interjection") :
		($category =~ /(^|;\s*)propername/                        ? ("LEXEME"    , "propername"  ) :
		 ($category =~ /(^|;\s*)vocalnoise/  ?
                      (($text =~ /^(laughter|laugh|laughing)/     ? ("NON-LEX"   , "laugh"   ) :
			($text =~ /^breath/                       ? ("NON-LEX"   , "breath"  ) :
			 ($text =~ /^lip-?smack/                  ? ("NON-LEX"   , "lipsmack") :
			  ($text =~ /^(cough|coughs|coughing)/    ? ("NON-LEX"   , "cough"   ) :
			   ($text =~ /^(sneeze|sneezing)/         ? ("NON-LEX"   , "sneeze"  ) :
			    ("NON-LEX"   , "other"))))))) :
		  ($category =~ /(^|;\s*)nonvocalnoise/  ? ("NON-SPEECH", "noise") :
		   ($category =~ /(^|;\s*)nonvocalnoise/ ? ("NON-SPEECH", "music") :
		    ("NON-LEX"   , "other",
		     warn "WARNING:  unknown category ($category) for token $text\n")))))))))))));
    $subtype = "un-lex" if $subtype eq "lex" and $features->{unclear};
    $object{type} = $type;
    $object{ortho} = $features->{text} if $features->{text};
    $object{stype} = $subtype;
    $object{name} = $features->{speakerID} if $type ne "NON-SPEECH";
    $object{id} = (($type eq "LEXEME")   ? ++$lexID :
		   (($type eq "NON-LEX") ? ++$nlxID :
		                           ++$nspID));
    normalize_text(\%object) if $features->{text};
    return {%object};
}

#################################

sub extract_mde_object {

    my ($mdetype, $features) = @_;
    my ($type, $subtype, %object);

    if ($mdetype eq "SU") {
	$type = "SUboundary";
	$subtype = $features->{type};
	($subtype =~ /^(statement|question|backchannel|incomplete|clausal|coordinating|unannotated)$/)
	    or die "\n\nFATAL ERROR:  illegal SU subtype ($subtype)\n\n";
	$object{id} = ++$suID;
    }
    elsif ($mdetype eq "depod") {
	$type = "EDIT";
	$subtype = $features->{comment};
	$subtype =
	    ((not $subtype)             ? "simple"     :
	     ($subtype =~ /restart/     ? "restart"    :
	      ($subtype =~ /repetition/ ? "repetition" :
	       ($subtype =~ /revision/  ? "revision"   :
		($subtype =~ /complex/  ? "complex"    :
		                          "simple")))));
	$object{id} = ++$edtID;
    }
    elsif ($mdetype =~ /filledPause|discourseMarker|explicitEditingTerm/) {
	$type = "FILLER";
	$subtype = 
	    ($mdetype =~ /filledPause/      ? "filled_pause"     :
	     ($mdetype =~ /discourseMarker/ ? "discourse_marker" :
                                              "explicit_editing_term"));
	$object{id} = ++$flrID;
    }
    elsif ($mdetype eq "aside") {
	$type = "A/P";
	$subtype = "<NA>";
	$object{id} = ++$apID;
    }
     
    $object{type} = $type;
    $object{stype} = $subtype;
    $object{name} = $features->{speakerID};
    return {%object};
}

#################################

sub add_IPs {

    my ($spkr, $rttm) = @_;
    my ($object, $ip, $ipp, $prev_edit, @ipdata, @ips);

    foreach $object (@$rttm) {
	push @ipdata, $object if $object->{name} and $object->{name} eq $spkr;
    }
    @ipdata = sort sort_ips @ipdata;

    foreach $object (@ipdata) {
	if ($object->{type} =~ /FILLER|EDIT/) {
	    undef $ip;
	    $ip->{type} = "IP";
	    $ip->{file} = $object->{file};
	    $ip->{chnl} = $object->{chnl};
	    $ip->{name} = $spkr;
	    $ip->{id} = ++$ipID;
	    if ($object->{type} eq "FILLER") {
		$ipp = $ips[@ips-1] if $prev_edit;
  	      # if ( $prev_edit and $object->{tbeg} 
              #      < $ipp->{tbeg}+$dt_MaxIPMerge ) {
		if ($prev_edit) {
		    $ipp->{stype} = "edit&filler";
		  # $ipp->{tbeg} = ($ipp->{tbeg} + $object->{tbeg})/2;
		}
		else {
		    $ip->{stype} = "filler";
		    $ip->{tbeg} = $ip->{tend} = $object->{tbeg};
		    $ip->{tbegValid} = $ip->{tendValid} = $object->{tbegValid};
		    push @ips, $ip;
		}
	    }
	    else {
		$ip->{stype} = "edit";
		$ip->{tbeg} = $ip->{tend} = $object->{tend};
		$ip->{tbegValid} = $ip->{tendValid} = $object->{tendValid};
		push @ips, $ip;
		$prev_edit = 1;
	    }
	}
      # elsif ($object->{type} =~ /LEXEME|NON-LEX/) {
	elsif ($object->{type} =~ /LEXEME|SUboundary/) {
	    undef $prev_edit;
	}
    }
    push @$rttm, @ips if @ips;
}

#################################

sub sort_ips {
    
    return (ip_time($a) < ip_time($b)-0.0001 ? -1 :
	    (ip_time($a) > ip_time($b)+0.0001 ?  1 :
             $sort_order{$a->{type}} <=> $sort_order{$b->{type}}));
}

#################################

sub ip_time {
    
    my ($object) = @_;
    return $object->{type} =~ /EDIT/ ? $object->{tend} : $object->{tbeg};
}

#################################

sub add_SUs {

    my ($spkr, $rttm) = @_;
    my ($object, $su, $cb, @sudata, @sus);

    foreach $object (@$rttm) {
	next unless $object->{name};
	next if $object->{name} ne $spkr;
	next unless $object->{type} =~/^(SUboundary|LEXEME)$/;
	push @sudata, $object;
    }
    @sudata = sort sort_sus @sudata;

    foreach $object (@sudata) {
	if ($object->{type} eq "LEXEME") {
	    if ($su) {
		$su->{tend} = $object->{tend};
		$su->{tendValid} = $object->{tendValid};
		$su->{tdur} = $su->{tend} - $su->{tbeg};
	    }
	    else {
		$su->{type} = "SU";
		$su->{file} = $object->{file};
		$su->{chnl} = $object->{chnl};
		$su->{tbeg} = $object->{tbeg};
		$su->{tbegValid} = $object->{tbegValid};
		$su->{tend} = $object->{tend};
		$su->{tendValid} = $object->{tendValid};
		$su->{tdur} = $su->{tend} - $su->{tbeg};
		$su->{name} = $spkr;
		$su->{id} = ++$suID;
	    }
	}
	elsif ($object->{stype} =~ /coordinating|clausal/) {
	    undef $cb;
	    $cb->{type} = "CB";
	    $cb->{file} = $object->{file};
	    $cb->{chnl} = $object->{chnl};
	    $cb->{tbeg} = $cb->{tend} = $object->{tend};
	    $cb->{tbegValid} = $cb->{tendValid} = $object->{tendValid};
	    $cb->{stype} = $object->{stype};
	    $cb->{name} = $spkr;
	    $cb->{id} = ++$suID;
	    push @sus, $cb;
	    $su or warn "WARNING:  clausal boundary at '$cb->{tbeg}' is not inside an SU\n";
	}
	elsif ($su) {
	    $su->{stype} = $object->{stype};
	    push @sus, $su;
	    undef $su;
	}
	else {
	    warn "WARNING:  SU from object of type $object->{type} ending at '$object->{tend}' for spkr '$spkr' contains no lexemes and will be discarded\n";
	}
    }
    (not $su) or warn "WARNING:  dangling LEXEMEs at end of file with no enclosing SU\n";
    push @$rttm, @sus if @sus;
}

#################################

sub sort_sus {

    return  1 if $a->{type} eq "SUboundary" and $a->{tend} > $b->{tend}-0.0001;
    return -1 if $b->{type} eq "SUboundary" and $b->{tend} > $a->{tend}-0.0001;
    return  1 if $a->{tbeg} > $b->{tend}-0.0001;
    return -1 if $b->{tbeg} > $a->{tend}-0.0001;
    return 0;
}

#################################

sub sort_uems {

    return  1 if $a->{tbeg} > $b->{tbeg};
    return -1 if $b->{tbeg} > $a->{tbeg};
    return 0;
}

#################################

sub generate_scoring_uems {

    my ($rttm, $spkr_uemsegs, $disfl_uemsegs, $channel) = @_;
    my ($object, $spkr_uemseg, $disfl_uemseg, $tendPrevLex);
    my ($tendCurrSpkrSeg, $tendCurrDisflSeg);
    my ($scoringSuppressed, $suppressAtLeastUntil);    # due to NON-LEX

    $tendPrevLex = 0.0;          # default value
    $tendCurrSpkrSeg = 0.0;          # default value
    $tendCurrDisflSeg = 0.0;          # default value
    $scoringSuppressed = 0;      # i.e., false
    $suppressAtLeastUntil = 0.0; # default value
    undef $begNoscore; 
    undef $endNoscore;

    foreach $object (@$rttm) {
        next unless $channel == $object->{chnl};
        # Generate uem segments
        if ( $object->{type} eq "NOSCORE" or
             $object->{type} eq "NO_RT_METADATA" or
             ($object->{type} eq "SU" and $object->{stype} eq "unannotated") or
             ($object->{type} eq "LEXEME" and $object->{stype} eq "un-lex") or
             ( $object->{type} eq "SEGMENT" and
               $object->{name} eq "unknown" and
               $speaker_data{$object->{name}}{stype} eq "unknown" ) ) {

            if (defined $endNoscore) {
                if ($object->{tbeg} > $endNoscore) {
                    $begNoscore = $object->{tbeg};
                }
            } else {
                $begNoscore = $object->{tbeg};
            }

            if (defined $endNoscore) {
                if ($endNoscore < $object->{tend}) {
                    $endNoscore = $object->{tend};
                }
            } else {
                $endNoscore = $object->{tend};
            }
	    if ($spkr_uemseg) {
                if ( $spkr_uemseg->{tbeg} < $begNoscore and
                     $spkr_uemseg->{tend} > $begNoscore and
                     $spkr_uemseg->{tend} <= $endNoscore ) { 
                    $spkr_uemseg->{tend} = $begNoscore;
                    if (defined $spkr_uemseg->{tbeg}) {
	    	        push @$spkr_uemsegs, $spkr_uemseg;
                    }
		    undef $spkr_uemseg;
                } elsif ( $spkr_uemseg->{tend} > $endNoscore and
                          $spkr_uemseg->{tbeg} > $begNoscore and
                          $spkr_uemseg->{tbeg} <= $endNoscore ) { 
                    $spkr_uemseg->{tbeg} = $endNoscore;
                } elsif ( $spkr_uemseg->{tbeg} < $begNoscore and
                          $spkr_uemseg->{tend} > $endNoscore ) {
                    my $savdEnd = $spkr_uemseg->{tend};
                    my $savdFile = $spkr_uemseg->{file};
                    my $savdChnl = $spkr_uemseg->{chnl}; 
                    $spkr_uemseg->{tend} = $begNoscore;
                    if (defined $spkr_uemseg->{tbeg}) {
	    	        push @$spkr_uemsegs, $spkr_uemseg;
                    }
                    undef $spkr_uemseg;
                    $spkr_uemseg->{file} = $savdFile;
                    $spkr_uemseg->{chnl} = $savdChnl;
                    $spkr_uemseg->{tbeg} = $endNoscore;
                    $spkr_uemseg->{tend} = $savdEnd;
                    $spkr_uemseg->{tdur} =
                                $spkr_uemseg->{tend} - $spkr_uemseg->{tbeg};
                } elsif ($spkr_uemseg->{tend} <= $begNoscore) {
                    if (defined $spkr_uemseg->{tbeg}) {
	    	        push @$spkr_uemsegs, $spkr_uemseg;
                    }
		    undef $spkr_uemseg;
                }
            }
	    if ($disfl_uemseg) {
                if ( $disfl_uemseg->{tbeg} < $begNoscore and
                     $disfl_uemseg->{tend} > $begNoscore and
                     $disfl_uemseg->{tend} <= $endNoscore ) { 
                    $disfl_uemseg->{tend} = $begNoscore;
	    	    push @$disfl_uemsegs, $disfl_uemseg;
		    undef $disfl_uemseg;
                } elsif ( $disfl_uemseg->{tend} > $endNoscore and
                          $disfl_uemseg->{tbeg} > $begNoscore and
                          $disfl_uemseg->{tbeg} <= $endNoscore ) { 
                    $disfl_uemseg->{tbeg} = $endNoscore;
                } elsif ( $disfl_uemseg->{tbeg} < $begNoscore and
                          $disfl_uemseg->{tend} > $endNoscore ) {
                    my $savdEnd = $disfl_uemseg->{tend};
                    my $savdFile = $disfl_uemseg->{file};
                    my $savdChnl = $disfl_uemseg->{chnl}; 
                    $disfl_uemseg->{tend} = $begNoscore;
                    if (defined $disfl_uemseg->{tbeg}) {
	    	        push @$disfl_uemsegs, $disfl_uemseg;
                    }
                    undef $disfl_uemseg;
                    $disfl_uemseg->{file} = $savdFile;
                    $disfl_uemseg->{chnl} = $savdChnl;
                    $disfl_uemseg->{tbeg} = $endNoscore;
                    $disfl_uemseg->{tend} = $savdEnd;
                    $disfl_uemseg->{tdur} =
                                $disfl_uemseg->{tend} - $disfl_uemseg->{tbeg};
                } elsif ($disfl_uemseg->{tend} <= $begNoscore) {
                    if (defined $disfl_uemseg->{tbeg}) {
	    	        push @$disfl_uemsegs, $disfl_uemseg;
                    }
		    undef $disfl_uemseg;
                }
            }
        } elsif ($object->{type} =~ /SEGMENT|LEXEME|NON-LEX|NON-SPEECH/) {
	    if ($spkr_uemseg) { 
                if ($object->{type} =~ /NON-LEX|NON-SPEECH/) {
                    if (not defined $spkr_uemseg->{tbeg}) {
                        $suppressAtLeastUntil = $object->{tend};
                    } else {
                        if ($tendPrevLex > $spkr_uemseg->{tbeg}) {
                            $spkr_uemseg->{tend} = $tendPrevLex;
		            $spkr_uemseg->{tdur} =
                                    $tendPrevLex - $spkr_uemseg->{tbeg};
                        }
                        if ($spkr_uemseg->{tbeg} <= $object->{tbeg}) {
                            if ($spkr_uemseg->{tbeg} < $object->{tbeg}) {
                               if ($spkr_uemseg->{tend} > $object->{tbeg}) {
                                   if ($spkr_uemseg->{tend} > $object->{tend}) {
                                       my $savdFile = $spkr_uemseg->{file};
                                       my $savdChnl = $spkr_uemseg->{chnl};
                                       my $savdTend = $spkr_uemseg->{tend};
                                       $spkr_uemseg->{tend} = $object->{tbeg};
	    	                       push @$spkr_uemsegs, $spkr_uemseg;
		                       undef $spkr_uemseg;
                                       $spkr_uemseg->{file} = $savdFile;
                                       $spkr_uemseg->{chnl} = $savdChnl;
                                       $spkr_uemseg->{tbeg} = $object->{tend};
                                       $spkr_uemseg->{tend} = $savdTend;
                                   } else {
                                       $spkr_uemseg->{tend} = $object->{tbeg};
                                       push @$spkr_uemsegs, $spkr_uemseg;
		                       undef $spkr_uemseg;
                                   }
                               } else {
	    	                   push @$spkr_uemsegs, $spkr_uemseg;
		                   undef $spkr_uemseg;
                               }
                            } elsif ($spkr_uemseg->{tend} > $object->{tend}) {
                               $spkr_uemseg->{tbeg} = $object->{tend};
                            } else {
		               undef $spkr_uemseg;
                            }
                            $scoringSuppressed = 1;  # i.e., true
                            $suppressAtLeastUntil = $object->{tend};
                        }
                    }
	        }
	        elsif ($object->{type} eq "SEGMENT") {
                    ## Note that this is a SEGMENT that did not match
                    ## the initial "if" -- see the NOSCORE and NO_RTMETADATA
                    ## logic above
                    if ($tendCurrSpkrSeg > $object->{tbeg}) {
                        # we have an overlap
                        # Note that "overlap" is defined by the SEGMENTs
                        if ($spkr_uemseg->{tbeg} < $object->{tbeg}) {
                            if ($opt_x) {
                                # exclude overlap region from the uem file
                                my $endOverlap = $spkr_uemseg->{tend};
                                my $savdFile = $spkr_uemseg->{file};
                                my $savdChnl = $spkr_uemseg->{chnl}; 
                                $spkr_uemseg->{tend} = $object->{tbeg}; #start of overlap
                                $spkr_uemseg->{tdur} = $spkr_uemseg->{tend} - $spkr_uemseg->{tbeg};
	    	                push @$spkr_uemsegs, $spkr_uemseg;
                                undef $spkr_uemseg;
                                if (defined $endNoscore) {
                                    if ($object->{tend} > $endNoscore) {
                                        $spkr_uemseg->{file} = $savdFile;
                                        $spkr_uemseg->{chnl} = $savdChnl;
		                        $spkr_uemseg->{tbeg} =
                                                $endOverlap > $endNoscore ?
                                                $endOverlap :
                                                $endNoscore;
                                        $spkr_uemseg->{tend} = $object->{tend};
                                    }
                                   #else the segment ends within noscore region
                                   # ... and we leave spkr_uemseg undef'd
                                } else {
                                    $spkr_uemseg->{file} = $savdFile;
                                    $spkr_uemseg->{chnl} = $savdChnl;
		                    $spkr_uemseg->{tbeg} = $endOverlap;
                                    $spkr_uemseg->{tend} = $object->{tend};
                                    $spkr_uemseg->{tdur} =
                                            $object->{tend} - $endOverlap;
                                }
                            } else {   # keep overlap region in the uem file
                                if (defined $begNoscore and defined $endNoscore) {
                                    if ($object->{tend} > $endNoscore) {
                                        $spkr_uemseg->{file} = $object->{file};
                                        $spkr_uemseg->{chnl} = $object->{chnl};
                                        if ($begNoscore < $object->{tbeg}) {
                                            if ($object->{tbeg} <= $endNoscore) {
                                                $spkr_uemseg->{tbeg} = $endNoscore;
                                            }
                                        } 
                                        if (not defined $spkr_uemseg->{tbeg}) {
                                            $spkr_uemseg->{tbeg} = $object->{tbeg};
                                        }
                                        $spkr_uemseg->{tend} = $object->{tend};
		                        $spkr_uemseg->{tdur} = $spkr_uemseg->{tend} - $spkr_uemseg->{tbeg};
                                    } elsif ($object->{tbeg} < $begNoscore) {
                                        if (not defined $spkr_uemseg->{tbeg}) {
                                            $spkr_uemseg->{tbeg} = $object->{tbeg}
                                        }
                                        $spkr_uemseg->{tend} = $begNoscore;
		                        $spkr_uemseg->{tdur} = $spkr_uemseg->{tend} - $spkr_uemseg->{tbeg};
                                    } else {
                                        undef $spkr_uemseg;  # all no-scored
                                        next;
                                    }
                                } else {
                                        if (not defined $spkr_uemseg->{tbeg}) {
                                            $spkr_uemseg->{tbeg} = $object->{tbeg}
                                        }
                                        if ($object->{tend} > $tendCurrSpkrSeg) {
		                            $tendCurrSpkrSeg = $object->{tend};
                                        }
                                        $spkr_uemseg->{tend} = $tendCurrSpkrSeg;
		                        $spkr_uemseg->{tdur} = $spkr_uemseg->{tend} - $spkr_uemseg->{tbeg};
                                }
                            }
                        }
                    } else {   # not an overlap situation
                        if (defined $begNoscore and defined $endNoscore) {
                            if ($object->{tend} > $endNoscore) {
                                $spkr_uemseg->{file} = $object->{file};
                                $spkr_uemseg->{chnl} = $object->{chnl};
                                if ($begNoscore < $object->{tbeg}) {
                                    if ($object->{tbeg} <= $endNoscore) {
                                        $spkr_uemseg->{tbeg} = $endNoscore;
                                    }
                                } 
                                if (not defined $spkr_uemseg->{tbeg}) {
                                    $spkr_uemseg->{tbeg} = $object->{tbeg};
                                }
                                $spkr_uemseg->{tend} = $object->{tend};
		                $spkr_uemseg->{tdur} = $spkr_uemseg->{tend} - $spkr_uemseg->{tbeg};
                            } elsif ($object->{tbeg} < $begNoscore) {
                                if (not defined $spkr_uemseg->{tbeg}) {
                                    $spkr_uemseg->{tbeg} = $object->{tbeg}
                                }
                                $spkr_uemseg->{tend} = $begNoscore;
		                $spkr_uemseg->{tdur} = $spkr_uemseg->{tend} - $spkr_uemseg->{tbeg};
                            } else {
                                undef $spkr_uemseg;  # all no-scored
                                next;
                            }
                        } else {
                                if (not defined $spkr_uemseg->{tbeg}) {
                                    $spkr_uemseg->{tbeg} = $object->{tbeg}
                                }
                                if ($object->{tend} > $tendCurrSpkrSeg) {
		                    $tendCurrSpkrSeg = $object->{tend};
                                }
                                $spkr_uemseg->{tend} = $tendCurrSpkrSeg;
		                $spkr_uemseg->{tdur} = $spkr_uemseg->{tend} - $spkr_uemseg->{tbeg};
                        }
                    }
              
                    if ($tendCurrSpkrSeg < $object->{tend}) {
                        $tendCurrSpkrSeg = $object->{tend};
                    }
                    if (defined $begNoscore and defined $endNoscore) {
                        if ( $spkr_uemseg->{tbeg} < $begNoscore and
                             $spkr_uemseg->{tend} > $begNoscore and
                             $spkr_uemseg->{tend} <= $endNoscore ) { 
                            $spkr_uemseg->{tend} = $begNoscore;
	    	            push @$spkr_uemsegs, $spkr_uemseg;
		            undef $spkr_uemseg;
                        } elsif ( $spkr_uemseg->{tend} > $endNoscore and
                                  $spkr_uemseg->{tbeg} > $begNoscore and
                                  $spkr_uemseg->{tbeg} < $endNoscore ) { 
                            $spkr_uemseg->{tbeg} = $endNoscore;
                        } elsif ( $spkr_uemseg->{tbeg} < $begNoscore and
                                $spkr_uemseg->{tend} > $endNoscore ) {
                           my $savdEnd = $spkr_uemseg->{tend};
                           my $savdFile = $spkr_uemseg->{file};
                           my $savdChnl = $spkr_uemseg->{chnl}; 
                           $spkr_uemseg->{tend} = $begNoscore;
	    	           push @$spkr_uemsegs, $spkr_uemseg;
                           undef $spkr_uemseg;
                           $spkr_uemseg->{file} = $savdFile;
                           $spkr_uemseg->{chnl} = $savdChnl;
                           $spkr_uemseg->{tbeg} = $endNoscore;
                           $spkr_uemseg->{tend} = $savdEnd;
                           $spkr_uemseg->{tdur} = $spkr_uemseg->{tend} - $spkr_uemseg->{tbeg};
                        } elsif ( $spkr_uemseg->{tbeg} >= $begNoscore
                                  and $spkr_uemseg->{tend} <= $endNoscore ) {
                           undef $spkr_uemseg;
                        }
                    } 
                    if ($scoringSuppressed == 1) {
                        if ((not defined $begNoscore and not defined $endNoscore)
                            or $object->{tbeg} >= $endNoscore
                            or $object->{tend} <= $begNoscore) {
                           if ($object->{tend} > $suppressAtLeastUntil) {
                               if ($object->{tbeg} >= $suppressAtLeastUntil) {
                                   $spkr_uemseg->{tbeg} = $object->{tbeg};
                               } else {
                                   $spkr_uemseg->{tbeg} = $suppressAtLeastUntil;
                               }
                               $scoringSuppressed = 0;  # i.e., false
                           }
                        }
                    }
	        }
                if ($object->{type} =~ /LEXEME/) {
                    $tendPrevLex = $object->{tend};
                    if ($scoringSuppressed == 1) {
                        if ((not defined $begNoscore and not defined $endNoscore)
                            or $object->{tbeg} >= $endNoscore
                            or $object->{tend} <= $begNoscore) {
                           if ($object->{tend} > $suppressAtLeastUntil) {
                               if ($object->{tbeg} >= $suppressAtLeastUntil) {
                                   $spkr_uemseg->{tbeg} = $object->{tbeg};
                               } else {
                                   $spkr_uemseg->{tbeg} = $suppressAtLeastUntil;
                               }
                               $scoringSuppressed = 0;  # i.e., false
                           }
                        }
                    }
                }
            }
	    if ($disfl_uemseg) { 
	        if ($object->{type} eq "SEGMENT") {
                    if ($tendCurrDisflSeg > $object->{tbeg}) {
                        # we have an overlap
                        # Note that "overlap" is defined by the SEGMENTs
                        if ($disfl_uemseg->{tbeg} < $object->{tbeg}) {
                            if ($opt_x) {
                                # exclude overlap region from the uem file
                                my $endOverlap = $disfl_uemseg->{tend};
                                my $savdFile = $disfl_uemseg->{file};
                                my $savdChnl = $disfl_uemseg->{chnl}; 
                                $disfl_uemseg->{tend} = $object->{tbeg}; #start of overlap
                                $disfl_uemseg->{tdur} = $disfl_uemseg->{tend} - $disfl_uemseg->{tbeg};
	    	                push @$disfl_uemsegs, $disfl_uemseg;
                                undef $disfl_uemseg;
                                $disfl_uemseg->{file} = $savdFile;
                                $disfl_uemseg->{chnl} = $savdChnl;
		                $disfl_uemseg->{tbeg} = $endOverlap;

                                $disfl_uemseg->{tend} = $object->{tend};
                                $disfl_uemseg->{tdur} = $object->{tend} - $endOverlap;
                            } else {   # keep overlap region in the uem file
                                if (defined $begNoscore and defined $endNoscore) {
                                    if ($object->{tend} > $endNoscore) {
                                        $disfl_uemseg->{file} = $object->{file};
                                        $disfl_uemseg->{chnl} = $object->{chnl};
                                        if ($begNoscore < $object->{tbeg}) {
                                            if ($object->{tbeg} <= $endNoscore) {
                                                $disfl_uemseg->{tbeg} = $endNoscore;
                                            }
                                        } 
                                        if (not defined $disfl_uemseg->{tbeg}) {
                                            $disfl_uemseg->{tbeg} = $object->{tbeg};
                                        }
                                        $disfl_uemseg->{tend} = $object->{tend};
		                        $disfl_uemseg->{tdur} = $disfl_uemseg->{tend} - $disfl_uemseg->{tbeg};
                                    } elsif ($object->{tbeg} < $begNoscore) {
                                        if (not defined $disfl_uemseg->{tbeg}) {
                                            $disfl_uemseg->{tbeg} = $object->{tbeg}
                                        }
                                        $disfl_uemseg->{tend} = $begNoscore;
		                        $disfl_uemseg->{tdur} = $disfl_uemseg->{tend} - $spkr_uemseg->{tbeg};
                                    } else {
                                        undef $disfl_uemseg;  # all no-scored
                                        next;
                                    }
                                } else {
                                        if (not defined $disfl_uemseg->{tbeg}) {
                                            $disfl_uemseg->{tbeg} = $object->{tbeg}
                                        }
                                        if ($object->{tend} > $tendCurrDisflSeg) {
		                            $tendCurrDisflSeg = $object->{tend};
                                        }
                                        $disfl_uemseg->{tend} = $tendCurrDisflSeg;
		                        $disfl_uemseg->{tdur} = $disfl_uemseg->{tend} - $disfl_uemseg->{tbeg};
                                }
                            }
                        }
                    } else {   # not an overlap situation
                        if (defined $begNoscore and defined $endNoscore) {
                            if ($object->{tend} > $endNoscore) {
                                $disfl_uemseg->{file} = $object->{file};
                                $disfl_uemseg->{chnl} = $object->{chnl};
                                if ($begNoscore < $object->{tbeg}) {
                                    if ($object->{tbeg} <= $endNoscore) {
                                        $disfl_uemseg->{tbeg} = $endNoscore;
                                    }
                                } 
                                if (not defined $disfl_uemseg->{tbeg}) {
                                    $disfl_uemseg->{tbeg} = $object->{tbeg};
                                }
                                $disfl_uemseg->{tend} = $object->{tend};
		                $disfl_uemseg->{tdur} = $disfl_uemseg->{tend} - $disfl_uemseg->{tbeg};
                            } elsif ($object->{tbeg} < $begNoscore) {
                                if (not defined $disfl_uemseg->{tbeg}) {
                                    $disfl_uemseg->{tbeg} = $object->{tbeg}
                                }
                                $disfl_uemseg->{tend} = $begNoscore;
		                $disfl_uemseg->{tdur} = $disfl_uemseg->{tend} - $spkr_uemseg->{tbeg};
                            } else {
                                undef $disfl_uemseg;  # all no-scored
                                next;
                            }
                        } else {
                                if (not defined $disfl_uemseg->{tbeg}) {
                                    $disfl_uemseg->{tbeg} = $object->{tbeg}
                                }
                                if ($object->{tend} > $tendCurrDisflSeg) {
		                    $tendCurrDisflSeg = $object->{tend};
                                }
                                $disfl_uemseg->{tend} = $tendCurrDisflSeg;
		                $disfl_uemseg->{tdur} = $disfl_uemseg->{tend} - $disfl_uemseg->{tbeg};
                        }
                    }

                    if ($tendCurrDisflSeg < $object->{tend}) {
                        $tendCurrDisflSeg = $object->{tend};
                    }
                    if (defined $begNoscore and defined $endNoscore) {
                        if ( $disfl_uemseg->{tbeg} < $begNoscore and
                             $disfl_uemseg->{tend} > $begNoscore and
                             $disfl_uemseg->{tend} <= $endNoscore ) { 
                            $disfl_uemseg->{tend} = $begNoscore;
	    	            push @$disfl_uemsegs, $disfl_uemseg;
		            undef $disfl_uemseg;
                        } elsif ( $disfl_uemseg->{tend} > $endNoscore and
                                  $disfl_uemseg->{tbeg} > $begNoscore and
                                  $disfl_uemseg->{tbeg} < $endNoscore ) { 
                            $disfl_uemseg->{tbeg} = $endNoscore;
                        } elsif ( $disfl_uemseg->{tbeg} < $begNoscore and
                                $disfl_uemseg->{tend} > $endNoscore ) {
                           my $savdEnd = $disfl_uemseg->{tend};
                           my $savdFile = $disfl_uemseg->{file};
                           my $savdChnl = $disfl_uemseg->{chnl}; 
                           $disfl_uemseg->{tend} = $begNoscore;
	    	           push @$disfl_uemsegs, $disfl_uemseg;
                           undef $disfl_uemseg;
                           $disfl_uemseg->{file} = $savdFile;
                           $disfl_uemseg->{chnl} = $savdChnl;
                           $disfl_uemseg->{tbeg} = $endNoscore;
                           $disfl_uemseg->{tend} = $savdEnd;
                           $disfl_uemseg->{tdur} = $disfl_uemseg->{tend} - $disfl_uemseg->{tbeg};
                        } elsif ( $disfl_uemseg->{tbeg} >= $begNoscore
                                  and $disfl_uemseg->{tend} <= $endNoscore ) {
                           undef $disfl_uemseg;
                        }
                    } 
	        }
	    }

            if (not $disfl_uemseg
                and $object->{type} =~ /SEGMENT/) {

                if (not (defined $begNoscore
                         and defined $endNoscore
                         and $object->{tbeg} >= $begNoscore
                         and $object->{tend} <= $endNoscore)) {
	            $disfl_uemseg->{file} = $object->{file};
	            $disfl_uemseg->{chnl} = $object->{chnl};
                    if ($tendCurrDisflSeg < $object->{tend}) {
                        $tendCurrDisflSeg = $object->{tend};
                    }
                    if (defined $begNoscore and defined $endNoscore) {
	                if ($object->{tbeg} < $begNoscore
                            and $object->{tend} > $begNoscore) {
                           $disfl_uemseg->{tend} = $begNoscore;
                        } else {
                           $disfl_uemseg->{tend} = $object->{tend};
                        }
	                if ($object->{tend} > $endNoscore
                            and $object->{tbeg} < $endNoscore) {
                           $disfl_uemseg->{tbeg} = $endNoscore;
                        } else {
                           $disfl_uemseg->{tbeg} = $object->{tbeg};
                        }
                    } else {
                        $disfl_uemseg->{tbeg} = $object->{tbeg};
                        $disfl_uemseg->{tend} = $object->{tend};
                    }
	            $disfl_uemseg->{tdur} =
                            $disfl_uemseg->{tend} - $disfl_uemseg->{tbeg};
                    if (defined $begNoscore and defined $endNoscore) {
                        if ( $disfl_uemseg->{tbeg} < $begNoscore and
                             $disfl_uemseg->{tend} > $begNoscore and
                             $disfl_uemseg->{tend} <= $endNoscore ) { 
                            $disfl_uemseg->{tend} = $begNoscore;
	        	        push @$disfl_uemsegs, $disfl_uemseg;
    	        	        undef $disfl_uemseg;
                        } elsif ( $disfl_uemseg->{tend} > $endNoscore and
                                  $disfl_uemseg->{tbeg} > $begNoscore and
                                  $disfl_uemseg->{tbeg} < $endNoscore ) { 
                            $disfl_uemseg->{tbeg} = $endNoscore;
                        } elsif ( $disfl_uemseg->{tbeg} < $begNoscore and
                                $disfl_uemseg->{tend} > $endNoscore ) {
                           my $savdEnd = $disfl_uemseg->{tend};
                           my $savdFile = $disfl_uemseg->{file};
                           my $savdChnl = $disfl_uemseg->{chnl}; 
                           $disfl_uemseg->{tend} = $begNoscore;
	    	           push @$disfl_uemsegs, $disfl_uemseg;
                           undef $disfl_uemseg;
                           $disfl_uemseg->{file} = $savdFile;
                           $disfl_uemseg->{chnl} = $savdChnl;
                           $disfl_uemseg->{tbeg} = $endNoscore;
                           $disfl_uemseg->{tend} = $savdEnd;
                           $disfl_uemseg->{tdur} = $disfl_uemseg->{tend} - $disfl_uemseg->{tbeg};
                        } elsif ( $disfl_uemseg->{tbeg} >= $begNoscore
                                  and $disfl_uemseg->{tend} <= $endNoscore ) {
                           undef $disfl_uemseg;
                        }
                    }
                }
	    } ## END if not $disfl_uemseg

            if (not $spkr_uemseg
                and ($object->{type} =~ /SEGMENT|LEXEME/)) {
	        $spkr_uemseg->{file} = $object->{file};
	        $spkr_uemseg->{chnl} = $object->{chnl};
                if ($object->{type} eq "SEGMENT") {
                    if (not (defined $begNoscore
                             and defined $endNoscore
                             and $object->{tbeg} >= $begNoscore
                             and $object->{tend} <= $endNoscore)) {

                        if ($tendCurrSpkrSeg < $object->{tend}) {
                            $tendCurrSpkrSeg = $object->{tend};
                        }
                        if (defined $begNoscore and defined $endNoscore) {
	                    if ($object->{tbeg} < $begNoscore
                                and $object->{tend} > $begNoscore) {
                               $spkr_uemseg->{tend} = $begNoscore;
                            } else {
                               $spkr_uemseg->{tend} = $object->{tend};
                            }
	                    if ($object->{tend} > $endNoscore
                                and $object->{tbeg} < $endNoscore) {
                               $spkr_uemseg->{tbeg} = $endNoscore;
                            } else {
                               $spkr_uemseg->{tbeg} = $object->{tbeg};
                            }
                        } else {
                            $spkr_uemseg->{tbeg} = $object->{tbeg};
                            $spkr_uemseg->{tend} = $object->{tend};
                        }
	                $spkr_uemseg->{tdur} = $spkr_uemseg->{tend} - $spkr_uemseg->{tbeg};
	                if ($scoringSuppressed == 0) {
                            if (defined $begNoscore and defined $endNoscore) {
                                if ( $spkr_uemseg->{tbeg} < $begNoscore and
                                 $spkr_uemseg->{tend} > $begNoscore and
                                 $spkr_uemseg->{tend} <= $endNoscore ) { 
                                    $spkr_uemseg->{tend} = $begNoscore;
	        	                push @$spkr_uemsegs, $spkr_uemseg;
    	        	                undef $spkr_uemseg;
                                } elsif ( $spkr_uemseg->{tend} > $endNoscore and
                                          $spkr_uemseg->{tbeg} > $begNoscore and
                                          $spkr_uemseg->{tbeg} < $endNoscore ) { 
                                    $spkr_uemseg->{tbeg} = $endNoscore;
                                } elsif ( $spkr_uemseg->{tbeg} < $begNoscore and
                                        $spkr_uemseg->{tend} > $endNoscore ) {
                                   my $savdEnd = $spkr_uemseg->{tend};
                                   my $savdFile = $spkr_uemseg->{file};
                                   my $savdChnl = $spkr_uemseg->{chnl}; 
                                   $spkr_uemseg->{tend} = $begNoscore;
	    	                   push @$spkr_uemsegs, $spkr_uemseg;
                                   undef $spkr_uemseg;
                                   $spkr_uemseg->{file} = $savdFile;
                                   $spkr_uemseg->{chnl} = $savdChnl;
                                   $spkr_uemseg->{tbeg} = $endNoscore;
                                   $spkr_uemseg->{tend} = $savdEnd;
                                   $spkr_uemseg->{tdur} = $spkr_uemseg->{tend} - $spkr_uemseg->{tbeg};
                                } elsif ( $spkr_uemseg->{tbeg} >= $begNoscore
                                          and $spkr_uemseg->{tend} <= $endNoscore ) {
                                   undef $spkr_uemseg;
                                }
                            }
                        }
                        #else, since scoringSuppressed,
                        #      $spkr_uemseg->{tbeg} to be filled in when
                        #      see first LEXEME token in segment
                    } else {
                        undef $spkr_uemseg;
                    }
                }
                elsif ($object->{type} =~ /LEXEME/
                       and $object->{tend} <= $tendCurrSpkrSeg) {
                    if ($object->{tend} > $suppressAtLeastUntil ) {
                        $tendPrevLex = $object->{tend};

                        if (defined $begNoscore and defined $endNoscore) {
                            if ($object->{tbeg} < $begNoscore
                                and $tendCurrSpkrSeg > $begNoscore) {
                               $spkr_uemseg->{tend} = $begNoscore;
                            } else {
                               $spkr_uemseg->{tend} = $tendCurrSpkrSeg;
                            }
                        } else {
                            $spkr_uemseg->{tend} = $tendCurrSpkrSeg;
                        }

                        if ($scoringSuppressed == 0) {
                            if (defined $begNoscore and defined $endNoscore) {
                                if ($object->{tend} > $endNoscore
                                    and $object->{tbeg} < $endNoscore) { 
                                   $spkr_uemseg->{tbeg} = $endNoscore;
                                } else {
                                   $spkr_uemseg->{tbeg} = $object->{tbeg};
                                }
                            } else {
                                $spkr_uemseg->{tbeg} = $object->{tbeg};
                            }
	                    $spkr_uemseg->{tdur} =
                                    $spkr_uemseg->{tend} - $spkr_uemseg->{tbeg};
	                } else { 
                            # scoringSuppressed == 1
                            if ($object->{tbeg} >= $suppressAtLeastUntil) {
                                $spkr_uemseg->{tbeg} = $object->{tbeg};
                            } else {
                                $spkr_uemseg->{tbeg} = $suppressAtLeastUntil;
                            }
                            $scoringSuppressed = 0;  # i.e., false
                            if (defined $begNoscore and defined $endNoscore) {
                                if ( $spkr_uemseg->{tbeg} < $begNoscore and
                                     $spkr_uemseg->{tend} > $begNoscore and
                                     $spkr_uemseg->{tend} <= $endNoscore ) { 
                                    $spkr_uemseg->{tend} = $begNoscore;
	    	                    push @$spkr_uemsegs, $spkr_uemseg;
    	    	                    undef $spkr_uemseg;
                                } elsif ( $spkr_uemseg->{tend} > $endNoscore and
                                          $spkr_uemseg->{tbeg} > $begNoscore and
                                          $spkr_uemseg->{tbeg} < $endNoscore ) {
                                    $spkr_uemseg->{tbeg} = $endNoscore;
                                } elsif ($spkr_uemseg->{tbeg} < $begNoscore and
                                         $spkr_uemseg->{tend} > $endNoscore ) {
                                    my $savdEnd = $spkr_uemseg->{tend};
                                    my $savdFile = $spkr_uemseg->{file};
                                    my $savdChnl = $spkr_uemseg->{chnl}; 
                                    $spkr_uemseg->{tend} = $begNoscore;
	    	                    push @$spkr_uemsegs, $spkr_uemseg;
                                    undef $spkr_uemseg;
                                    $spkr_uemseg->{file} = $savdFile;
                                    $spkr_uemseg->{chnl} = $savdChnl;
                                    $spkr_uemseg->{tbeg} = $endNoscore;
                                    $spkr_uemseg->{tend} = $savdEnd;
                                    $spkr_uemseg->{tdur} = $spkr_uemseg->{tend} - $spkr_uemseg->{tbeg};
                                 } elsif ( $spkr_uemseg->{tbeg} >= $begNoscore
                                      and $spkr_uemseg->{tend} <= $endNoscore ) {
                                    undef $spkr_uemseg;
                                 }
                            }
                        }
                    } elsif ($object->{type} eq "LEXEME" 
                             and $object->{tend} <= $suppressAtLeastUntil) {
                        undef $spkr_uemseg;
                    }
                } else {
                    undef $spkr_uemseg;
                }
	    } ## END if not $spkr_uemseg

            if ( $tendPrevLex == 0.0
                 and $object->{type} =~ /LEXEME/
                 and ( (defined $begNoscore and $object->{tbeg} < $begNoscore)
                       or (defined $endNoscore and $object->{tend} > $endNoscore))) {
                ## this code is not expected to be executed
                print "**** Error ****  tendPrevLex==0.0 at end of generate_spkr_scoring_uem()\n";
                $tendPrevLex = $object->{tend};
            }
	}
    }
    push @$spkr_uemsegs, $spkr_uemseg if $spkr_uemseg;
    push @$disfl_uemsegs, $disfl_uemseg if $disfl_uemseg;
}

#################################

sub add_speaker_diarization {

    my ($spkr, $rttm) = @_;
    my ($object, $spkrseg, @spkrsegs);

    foreach $object (@$rttm) {
        ## Do spkr segs
	next unless $object->{name};
	next if ($object->{name} ne $spkr);
	next unless $object->{type} =~ /LEXEME|NON-LEX/;
	if ($spkrseg) {
	    if ($object->{tbeg} > $spkrseg->{tend}+$dt_MaxSpkrMerge) {
		push @spkrsegs, $spkrseg;
		undef $spkrseg;
	    }
	    else {
		$spkrseg->{tend} = $object->{tend};
		$spkrseg->{tendValid} = $object->{tendValid};
		$spkrseg->{tdur} = $spkrseg->{tend} - $spkrseg->{tbeg};
	    }
	}
	if (not $spkrseg) {
	    $spkrseg->{type} = "SPEAKER";
	    $spkrseg->{id} = ++$spkrID;
	    $spkrseg->{file} = $object->{file};
	    $spkrseg->{chnl} = $object->{chnl};
	    $spkrseg->{tbeg} = $object->{tbeg};
	    $spkrseg->{tbegValid} = $object->{tbegValid};
	    $spkrseg->{tend} = $object->{tend};
	    $spkrseg->{tendValid} = $object->{tendValid};
	    $spkrseg->{tdur} = $object->{tend} - $object->{tbeg};
	    $spkrseg->{name} = $spkr;
	}
    }
    push @spkrsegs, $spkrseg if $spkrseg;
    push @$rttm, @spkrsegs if @spkrsegs;
}

#################################

sub output_rttm_object_file {

    my ($rttm, $file) = @_;
    my ($object, $name);

    open DATA, ">$file" or die "\n\nFATAL ERROR:  unable to open RTTM object output file '$file'$usage";

    print DATA ";;This is an RTTM file.  Each record contains 9 whitespace separated fields:\n";
    print DATA ";; 1:type 2:file 3:chnl 4:tbeg 5:tdur 6:ortho 7:subtype 8:spkrname 9:conf\n";

    foreach $name (sort keys %speaker_data) {
	print_object ($speaker_data{$name});
    }

    @$rttm = sort sort_objects @$rttm;
    foreach $object (@$rttm) {
	print_object($object)
	    if $object->{type} =~ /^(NO_RT_METADATA|SEGMENT|LEXEME|NON-LEX|NON-SPEECH|FILLER|EDIT|IP|SU|CB|A\/P|SPEAKER)$/;
        print_object($object) if ($opt_n and $object->{type} eq "NOSCORE");
    }
    close DATA;
}

#################################

sub print_object {

    my ($object) = @_;

    printf DATA "%-12s %-12s %3s", $object->{type}, $object->{file}, $object->{chnl};
    defined $object->{tbeg} ?
	(printf DATA " %8.3f%s", $object->{tbeg} , ($object->{tbegValid} ? " ":"*")) :
	    (printf DATA " %9s", "<NA> ");
    defined $object->{tdur} ?
	(printf DATA " %6.3f%s", $object->{tdur} , ($object->{tendValid} ? " ":"*")) :
	    (printf DATA " %7s", "<NA> ");
    printf DATA " %-16s %-20s %-20s %s\n",
    ($object->{ortho} ? $object->{ortho} : "<NA>"),
    ($object->{stype} ? $object->{stype} : "<NA>"),
    ($object->{name} ? $object->{name} : "<NA>"),
    ($object->{features}{conf} ? $object->{features}{conf} : "<NA>");
}

#################################

sub sort_objects {

    return ($a->{tbeg} < $b->{tbeg}-0.0001 ? -1 :
	    ($a->{tbeg} > $b->{tbeg}+0.0001 ?  1 :
	     (($a->{type} =~ /end/ and $b->{type} =~ /beg/) ? -1 :
	      (($a->{type} =~ /beg/ and $b->{type} =~ /end/) ?  1 :
	       $sort_order{$a->{type}} <=> $sort_order{$b->{type}}))));
}

#################################

sub sort_events {

    return ($a->{time} < $b->{time}-0.0001 ? -1 :
	    ($b->{time} < $a->{time}-0.0001 ? 1 :
	     (($a->{object} eq $b->{object}) ? ($a->{type} =~ /beg/ ? -1 : 1) :
	      ($a->{type} =~ /beg/ ? 
	       ($b->{type} =~ /end/ ?  1 : 
		$sort_order{$a->{object}{type}} <=> $sort_order{$b->{object}{type}}) :
	       ($a->{type} =~ /end/ ?
		($b->{type} =~ /beg/ ? -1 :
		 $sort_order{$b->{object}{type}} <=> $sort_order{$a->{object}{type}}) :
		($b->{type} =~ /end/ ? 
		 $sort_order{$b->{object}{type}} <=> $sort_order{$a->{object}{type}} :
		 $sort_order{$a->{object}{type}} <=> $sort_order{$b->{object}{type}}))))));
}

#################################

sub extract_sgml_span {
    
    my ($name, $data) = @_;
    
    return () unless defined $name and defined $data;
    return () unless ($data =~ m|<$name(\s+[^>]*)*>(.*)|si);
    my $tag = $1;
    my $remainder = $2;
    return (undef, $remainder) if ($tag and $tag =~ m|/$|);
    ($data =~ m|<$name(\s+[^>]*)*>(.*?)</$name\s*>(.*)|si) ? ($2, $3) : ();
}

#################################

sub extract_sgml_tag_and_span {
    
    my ($name, $data) = @_;
    
    return () unless defined $name and defined $data;
    return () unless ($data =~ m|<$name(\s+[^>]*)*>(.*)|si);
    my $tag = $1;
    my $remainder = $2;
    return ($tag, undef, $remainder) if ($tag and $tag =~ m|/$|);
    ($data =~ m|<$name(\s+[^>]*)*>(.*?)</$name\s*>(.*)|si) ? ($1, $2, $3) : ();
}

#################################

sub extract_sgml_tag_attribute {

    my ($name, $data) = @_;

    return () unless defined $name and defined $data;
    ($data =~ m|$name\s*=\s*\"([^\"]*)\"|si) ? ($1) : ();
}

#################################

sub date_time_stamp {

    my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = localtime();
    my @months = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
    my ($date, $time);

    $time = sprintf "%2.2d:%2.2d:%2.2d", $hour, $min, $sec;
    $date = sprintf "%4.4s %3.3s %s", 1900+$year, $months[$mon], $mday;
    return ($date, $time);
}

#################################

sub normalize_text {
    my ($object) = @_;
    if ($object->{ortho}) {
	# add more XML symbolic patterns as needed
	#
	$object->{ortho} =~ s/&amp;/&/;
	
	# make alpha token to have a '.' (period) following it
	# if there isn't one already
	#
	if ($object->{stype} eq "alpha" and
	    $object->{ortho} !~ /[a-zA-Z]\./) {
	    $object->{ortho} =~ s/([a-zA-Z])/$1./;
	}
	
	# make token with preceeding '^' (caret) to have lex class of
	# propername
	#
	if ($object->{stype} eq "lex") {
	    if ($object->{ortho} =~ /\^/) {
		$object->{ortho} =~ s/\^//;
		$object->{stype} = "propernoun";
	    }
	}
    }
}

#################################

sub normalize_swb1_text {

    my ($annotations) = @_;
    my ($id);

    foreach $id (keys %{$annotations}) {
	if ($annotations->{$id}{features}{text}) {
	    # take out the word completion of a fragmented word
	    # i.e. 'sh[e]-' as 'sh' with category as postfragment
	    #      '-[de]rived' as 'rived' with category as prefragment
	    #
	    if ($annotations->{$id}{features}{text} =~ /([-a-z.]+)\[.*?\]-/i) {
		$annotations->{$id}{features}{text} = $1;
		$annotations->{$id}{features}{category} = $annotations->{$id}{features}{category}."postFragment;";
	    } elsif ($annotations->{$id}{features}{text} =~ /-\[.*?\]([-a-z.]+)/i) {
		$annotations->{$id}{features}{text} = $1;
		$annotations->{$id}{features}{category} = $annotations->{$id}{features}{category}."preFragment;";
	    }
	    # replace variant pronunciation words with intended word
	    # i.e. 'because_1' as 'because'
	    #
	    $annotations->{$id}{features}{text} =~ s/_\d+//;
	}
    }
}