#!/usr1/local/bin/perl5 -I/Home/home1/mbene/lib/perl5 use Term::ANSIColor; # Create a list of all the glossary terms. $glossary = "/www/htdocs/imagine/docs/dictionary.html.put.together"; @glossaryList = &CreateGlossaryList($glossary); # Open the file to be searched and read the contents into $current_file. $filename = $ARGV[0]; #print "\nCurrent file is $filename...\n"; #print "\nGlossary list created\n"; $current_file = ""; if (-z $filename) { warn "File $filename as 0 size! \n"; } elsif (! -w $filename) { warn "File $filename is not writeable to me! \n"; } else { $string_found_flag = 0; open (HTMLIN, $filename) || warn "Cannot open file $filename for input!\n"; while ($line = ) { $current_file .= $line; # push(@current, $line); } close (HTMLIN); } foreach $term (@glossaryList) { $found = 0; $newTerm = $term; # print "\$newTerm: $newTerm \$term: $term\n"; if ($newTerm =~ /_/) { $newTerm =~ s/_/ /g; # Because text has ' ', not '_'. } if ((length $newTerm) > 4) { chop ($newTerm); } $replacement = ""; if ($current_file =~ /dict_\w\w\.html\#$term/) { $found = 1; } if ($current_file =~ /dictionary\.html\#$term/) { $found = 1; } while (($found eq 0)&&($current_file =~ /[^\#\.\/](\b$newTerm)(\w*)(\S*)/isg)) { $word = $1; $ending = $2; $tail = $3; if(defined ($ending)) { $word = $word . $ending; } if ($tail eq "'s") { $word = $word . $tail; $tail = ""; } $position = (pos $current_file)-((length $word)+(length $tail)); pos $current_file = $position + length $word; #print "\n\$word: $word \$tail: $tail \$newTerm: $newTerm"; if (($tail !~ /(<\/a>|\.htm|\.gif|\.jpg|\.tif|^\"|^\/)/i)&&(((length $word) == (length $newTerm))|((length $newTerm) > 2))&& ($word !~ /_dtt/)) { ($found, $word) = &PromptUser($found, $current_file, $position, $word, $term); if ($found eq 1) { # pos $current_file = $position; # $current_file =~ s/($word)/$replacement$word<\/A>/; pos $current_file = $position + length $word; $current_fileA = substr($current_file, 0, $position); $current_fileB = "$replacement$word"; $current_fileC = substr($current_file, (pos $current_file)); $current_file = $current_fileA . $current_fileB . $current_fileC; $string_found_flag = 1; } } } #print "\nI am here. \$found is $found."; if ($found eq 0) { $newTerm2 = $newTerm; #print "\n\$term: $term \$newTerm: $newTerm"; if ($term eq "AGN") { $newTerm = "active galactic nucle";} elsif ($term eq "ASM") { $newTerm = "all sky monitor";} elsif ($term eq "black_hole_laws") { $newTerm = "black hole dynamic";} elsif ($term eq "CGRO") { $newTerm = "compton gamma ray observatory";} elsif ($term eq "cluster_of_galaxies") { $newTerm = "galaxy cluster";} elsif ($term eq "Nicolaus_Copernicus") { $newTerm = "Copernicus";} elsif ($term eq "Doppler_effect") { $newTerm = "doppler";} elsif ($term eq "Albert_Einstein") { $newTerm = "einstein";} elsif ($term eq "FFT") { $newTerm = "Fast Fourier Transformation";} elsif ($term eq "GRB") { $newTerm = "gamma-ray burst";} elsif ($term eq "GMC") { $newTerm = "giant molecular cloud";} elsif ($term eq "Edwin_Hubble") { $newTerm = "Hubble";} elsif ($term eq "William_Herschel") { $newTerm = "Herschel";} elsif ($term eq "Christiaan_Huygens") { $newTerm = "Huygens";} elsif ($term eq "ISM") { $newTerm = "interstellar medium";} elsif ($term eq "ionized_gas") { $newTerm = "ionic gas";} elsif ($term eq "Johannes_Kepler") { $newTerm = "Kepler";} elsif ($term eq "Joseph_Lagrange") { $newTerm = "Lagrange";} elsif ($term eq "megaton") { $newTerm = "mega-ton";} elsif ($term eq "Charles_Messier") { $newTerm = "Messier";} elsif ($term eq "Isaac_Newton") { $newTerm = "Newton";} elsif ($term eq "law_of_gravity") { $newTerm = "law_of_universal_gravit";} elsif ($term eq "QSS") { $newTerm = "quasi-stellar";} elsif ($term eq "reflection_law") { $newTerm = "law of reflection";} elsif ($term eq "Wilhelm_Roentgen") { $newTerm = "Roentgen";} elsif ($term eq "gravitational_constant") { $newTerm = "constant of gravitation";} elsif ($term eq "Wiens_law") { $newTerm = "Wien";} elsif ($term eq "em_waves") { $newTerm = "electromagnetic";} elsif ($term eq "em_spectrum") { $newTerm = "electromagnetic spectrum";} #print "\n\$term: $term \$newTerm: $newTerm\n"; if ($newTerm2 ne $newTerm) { while (($found eq 0)&&($current_file =~ /[^\#\.\/](\b$newTerm)(\w*)(\S*)/isg)) { $word = $1; $ending = $2; $tail = $3; if(defined ($ending)) { $word = $word . $ending; } if ($tail eq "'s") { $word = $word . $tail; $tail = ""; } $position = (pos $current_file)-((length $word)+(length $tail)); pos $current_file = $position + length $word; #print "\n\$word: $word \$tail: $tail \$newTerm: $newTerm"; if (($tail !~ /(<\/a>|\.htm|\.gif|\.jpg|\.tif|^\"|^\/)/i)&&(((length $word) == (length $newTerm))|((length $newTerm) > 2))&& ($word !~ /_dtt/)) { ($found, $word) = &PromptUser($found, $current_file, $position, $word, $term); if ($found eq 1) { pos $current_file = $position + length $word; $current_fileA = substr($current_file, 0, $position); $current_fileB = "$replacement$word"; $current_fileC = substr($current_file, (pos $current_file)); $current_file = $current_fileA . $current_fileB . $current_fileC; $string_found_flag = 1; } } } } } if (($found eq 0)&&($newTerm =~ /(-| )/)) { $char = $1; if ($char eq '-') { $newTerm =~ s/-/ /; } else { $newTerm =~ s/ /-/; } while (($found eq 0)&&($current_file =~ /[^\#\.\/](\b$newTerm)(\w*)(\S*)/isg)) { $word = $1; $ending = $2; $tail = $3; if(defined ($ending)) { $word = $word . $ending; } if ($tail eq "'s") { $word = $word . $tail; $tail = ""; } $position = (pos $current_file)-((length $word)+(length $tail)); pos $current_file = $position + length $word; #print "\n\$word: $word \$tail: $tail \$newTerm: $newTerm"; if (($tail !~ /(<\/a>|\.htm|\.gif|\.jpg|\.tif|^\"|^\/)/i)&&(((length $word) == (length $newTerm))|((length $newTerm) > 2))&& ($word !~ /_dtt/)) { ($found, $word) = &PromptUser($found, $current_file, $position, $word, $term); if ($found eq 1) { pos $current_file = $position + length $word; $current_fileA = substr($current_file, 0, $position); $current_fileB = "$replacement$word"; $current_fileC = substr($current_file, (pos $current_file)); $current_file = $current_fileA . $current_fileB . $current_fileC; $string_found_flag = 1; } } } } } # Write any changed text back into the exact same filename if ($string_found_flag) { open (HTMLOUT, ">$filename") || warn "Cannot open file $filename for output.\n"; print HTMLOUT $current_file; } close (HTMLOUT); print "\nDone with $filename\n"; #------------------------------------------------------------------------------------------ sub CreateGlossaryList { my $glossary = shift @_; my ($line, $beginning, $term, $remainder, @list); open (INFILE, $glossary) || warn "The file $glossary could not be found.\n"; while () { $line = $_; if ($line !~ /^ $length-40) { $sentence = substr($sentence, $position-40); } else { if ($position > 10) { $start = $position-10; } else { $start = 0; } # $start = $position; $end = 80; $sentence = substr($sentence, $start, $end); } # $sentence =~ s/([^\n]*\n)*([^\n]*)$word/$2$word/; # $sentence =~ s/(.*$word)([^\n]*)(\n[^\n]*)*/$1$2/; # $sentence =~ s/([^\n]*\n)*([^\n]*\n[^\n]*)$word/$2$word/; $sentence =~ s/(.*$word)([^\n]*\n[^\n]*)(\n[^\n]*)*/$1$2/; # $response = "Zilch"; while (($cont eq 0)&&($sentence =~ /$word/g)) { while ($cont eq 0) { # print "\n\nLink $word with $term? The line is: \n"; print "\n\nLink ", colored($word,'bold'), " with ", colored($term,'bold'),"? The line is: \n\n"; $thisPosition = (pos $sentence) - (length $word); $sentence1 = substr($sentence,0, $thisPosition); $sentence2 = substr($sentence, (pos $sentence)); print $sentence1; print color("underline"), $word; print color("reset"), $sentence2, "\n\n"; print "Please enter Y, N, M (modify word), or S (skip word): "; $response = ; chomp ($response); if ($response =~ /^Y/i) { $cont = 1; $found = 1; } elsif ($response =~ /^N/i) { $cont = 1; $found = 0; } elsif ($response =~ /^S/i) { $cont = 1; $found = 2; } elsif ($response =~ /^M/i) { $cont = 1; print "Enter the word to be replaced: "; $word = ; chomp ($word); $found = 1; } # else { # print "\nPlease enter Y, N, M, or S: "; # $response = ; # } } } return ($found, $word); } # end PromptUser