#!/usr/bin/perl # # #Title : WebSAT_P2001 #File : websat_p2001_single.pl #Author : Charles L. Sheppard # # #WebSAT_P2001 is part of the NIST Web Metrics Testbed developed in the Visualization #and Virtual Reality Group (VVRG) of the Information Technology Laboratory #(ITL) at the National Institute of Standards and Technology (NIST). NIST is #an agency of the US Government, and as such is not subject to copyright. #The underlying HTML parsing engine for websat.pl is code developed by Jim #Davis at Xerox Parc (jdavis@parc.xerox.com). # #Include necessary parameters to handle WEB input. require ("cgi-lib.pl"); use LWP; use URI::URL; #Fills %in with key/value pairs corresponding to form fields### &ReadParse; #Initialize MIME response header print "Content-type:text/html\n\n"; ###Accept user input.### #HTML version $htmlver = $in{'html_version'}; #URL $urldoc = $in{'urldoc'}; #Determine settings for categories if ($in{'headinfo'}) { #Check for head information $headanal{'hi1'} = 1; $headanal{'hi2'} = 1; $headanal{'hi31'} = 1; $headanal{'hi32'} = 1; $headanal{'hi33'} = 1; $headanal{'hi34'} = 1; $headanal{'hi35'} = 1; $headanal{'hi36'} = 1; $headanal{'hi37'} = 1; $headanal{'hi4'} = 1; } else { $headanal{'hi1'} = 0; $headanal{'hi2'} = 0; $headanal{'hi31'} = 0; $headanal{'hi32'} = 0; $headanal{'hi33'} = 0; $headanal{'hi34'} = 0; $headanal{'hi35'} = 0; $headanal{'hi36'} = 0; $headanal{'hi37'} = 0; $headanal{'hi4'} = 0; } if ($in{'bodyinfo'}) { #Check for body information $bodyanal{'bi1.1'} = 1; $bodyanal{'bi1.2'} = 1; $bodyanal{'bi2'} = 1; $bodyanal{'bi3'} = 1; $bodyanal{'bi41'} = 1; $bodyanal{'bi42'} = 1; $bodyanal{'bi43'} = 1; $bodyanal{'bi44'} = 1; $bodyanal{'bi45'} = 1; $bodyanal{'bi46'} = 1; $bodyanal{'bi47'} = 1; $bodyanal{'bi48'} = 1; $bodyanal{'bi5'} = 1; $bodyanal{'bi6'} = 1; $bodyanal{'bi7'} = 1; $bodyanal{'bi8'} = 1; $bodyanal{'bi9'} = 1; $bodyanal{'bi10'} = 1; $bodyanal{'bi11'} = 1; $bodyanal{'bi12'} = 1; $bodyanal{'bi13'} = 1; $bodyanal{'bi14'} = 1; } else { $bodyanal{'bi1.1'} = 0; $bodyanal{'bi1.2'} = 0; $bodyanal{'bi2'} = 0; $bodyanal{'bi3'} = 0; $bodyanal{'bi41'} = 0; $bodyanal{'bi42'} = 0; $bodyanal{'bi43'} = 0; $bodyanal{'bi44'} = 0; $bodyanal{'bi45'} = 0; $bodyanal{'bi46'} = 0; $bodyanal{'bi47'} = 0; $bodyanal{'bi48'} = 0; $bodyanal{'bi5'} = 0; $bodyanal{'bi6'} = 0; $bodyanal{'bi7'} = 0; $bodyanal{'bi8'} = 0; $bodyanal{'bi9'} = 0; $bodyanal{'bi10'} = 0; $bodyanal{'bi11'} = 0; $bodyanal{'bi12'} = 0; $bodyanal{'bi13'} = 0; $bodyanal{'bi14'} = 0; } #Remove whitespaces and verify legitimate URL entry. $urldoc =~ s/\s//g; if ($urldoc =~ m|^(?:(?:http:)?//)?[-a-zA-Z_.0-9]+(?::[0-9]+)?(?:/[-=~:a-zA-Z\$_@!%^&*().0-9+?]*)*$|) { #insure that optional "http://" is there in the end ### $urldoc = "http://" . $urldoc unless ($urldoc =~ /^http:\/\//); #Define parameters needed to capture URL document. ###Instantiate a UserAgent $ua = new LWP::UserAgent; #You might want to add proxy lines similar to the following for #intranet usage. # #$ua->proxy(['http'], 'http://proxy.<2n level domain>.<1st level domain>:/'); #$ua->no_proxy('<2n level domain>.<1st level domain>'); #Instantiate a url object with the complete and valid address $url = new URI::URL($urldoc); #Instantiate a request using the GET method at the above address $request = new HTTP::Request('GET', $url); #pass the request to the userAgent $response = $ua->request($request); #Store returned URL document in arrays for parsing and display. if ($response->is_success) { #Store the returned HTML document as file to be parsed. @htmlfile = split (/\n/, $response->content); @docfile = @htmlfile; #Capture HTML document for later output. $doc_line = 0; #Put back \n character that was removed by the above #split operation. while ($doc_line < @htmlfile) { $htmlfile[$doc_line] .= "\n"; $doc_line++; } } else { print "Error: $response->code $response->message"; exit; } $whitespace_significant = 0; # global variables: $base_url = ""; $absolute_link = 0; $relative_link = 0; $within_doc_link = 0; $new_browser_window = 0; $extern_IMG_absolute = 0; $extern_IMG_relative = 0; $internal_IMG_link = 0; $height_specified = 0; $width_specified = 0; $no_height_width = 0; $IMG_with_ALT = 0; $img_count = 0; $pdownload_prob = 0; $has_doctype = 0; $has_title = 0; $has_meta_description = 0; $has_meta_keywords = 0; $has_meta_content_sel = 0; $has_meta_robots = 0; $has_meta_lang = 0; $uses_dublin_core = 0; $has_copyright = 0; $has_trademark = 0; $has_secure_des = 0; $has_page_date = 0; $has_mod_date = 0; $has_content_date = 0; $has_nextup_date = 0; $has_expire_date = 0; $has_phone = 0; $has_holiday = 0; $has_origin = 0; $has_lang = 0; $has_navigate = 0; $has_longitude = 0; $has_latitude = 0; $has_cross_street = 0; $no_h1_navigate = 0; $no_h2_navigate = 0; $no_h3_navigate = 0; $no_h4_navigate = 0; $no_h5_navigate = 0; $no_h6_navigate = 0; $no_mailto = 0; $link_fault = 0; $img_access_fault = 0; $invalid_link = 0; $base_tag = 0; $encap = 0; $frame = 0; $noframes = 0; $deprecated_element = 0; $deprecated_attribute = 0; undef(%dep_elem); undef(%dep_attrib); $uses_rel_ref = 0; ### Moved from below to here because the execution order left these ### undefined during the main execution of the script $Empty{"BASE"} = 1; $Empty{"BR"} = 1; $Empty{"HR"} = 1; $Empty{"IMG"} = 1; $Empty{"ISINDEX"} = 1; $Empty{"LINK"} = 1; $Empty{"META"} = 1; $Empty{"NEXTID"} = 1; $Empty{"INPUT"} = 1; # For each element, the names of elements which minimize it. ### See previous comment $Minimize{"DT"} = "DT:DD"; $Minimize{"DD"} = "DT"; $Minimize{"LI"} = "LI"; ###Added a more complete listing of tags that close

###the

tag is "always optional", so perhaps this should be a special case $Minimize{"P"} = "P:DT:DD:LI:H1:H2:H3:H4:H5:H6:BLOCKQUOTE:UL:OL:DL:TABLE:BODY:HTML:ADDRESS:CENTER"; $Minimize{"TR"} = "TR"; ### Corrected $Minimize{"TD"} = "TD:TH:TR"; $Minimize{"TH"} = "TD:TH:TR"; #Initiate parsing of a URL document. &parse_html_stream(); } else { die ("You entered an illegal URL!!! Please correct and reenter.
\n"); } # # Generate the resulting analysis. # print &startHTML("Resulting IEEE Best Practice Analysis"); print ("


\n"); print ("
\n"); print ("Document located at: "); print ("$urldoc"); # print "

Analysis Results

\n"; print "\n"; print "\n"; print "\n"; print "\n"; print "\n"; print "\n"; #For header information. # #Document type declaration. if ($headanal{'hi1'}) { print "\n"; print "\n"; print "\n"; } if ($headanal{'hi1'} && !$has_doctype) { print "\n"; } elsif ($headanal{'hi1'} && $has_doctype) { print "\n"; } if ($headanal{'hi1'}) { print "\n"; } # #Title. if ($headanal{'hi2'}) { print "\n"; print "\n"; } if ($headanal{'hi2'} && !$has_title) { print "\n"; } elsif ($headanal{'hi2'} && $has_title) { print "\n"; } if ($headanal{'hi2'}) { print "\n"; } # #Metadata. # if ($headanal{'hi31'} || $headanal{'hi32'} || $headanal{'hi33'} || $headanal{'hi34'}|| $headanal{'hi35'} || $headanal{'hi36'} || $headanal{'hi37'}) { print "\n"; print "\n"; print "\n"; print "\n"; } # #Intellectual property rights (IPR) # if ($bodyanal{'bi1.1'} || $bodyanal{'bi1.2'}) { print "\n"; print "\n"; print "\n"; print "\n"; print "\n"; } # #Security designations # if ($bodyanal{'bi2'} && !($htmlver eq "2.0")) { print "\n"; print "\n"; print "\n"; print "\n"; } # #Dates # if ($bodyanal{'bi3'} && !($htmlver eq "2.0")) { print "\n"; print "\n"; print "\n"; print "\n"; } # #International considerations # if (($bodyanal{'bi41'} || $bodyanal{'bi42'} || $bodyanal{'bi43'} || $bodyanal{'bi44'} || $bodyanal{'bi45'} || $bodyanal{'bi46'} || $bodyanal{'bi47'} || $bodyanal{'bi48'}) && !($htmlver eq "2.0")) { print "\n"; print "\n"; print "\n"; print "\n"; } # #Bandwidth efficiencies # if ($bodyanal{'bi5'} && !($htmlver eq "2.0")) { print "\n"; print "\n"; print "\n"; print "\n"; } # #Navigation aids # if ($bodyanal{'bi6'} && ($htmlver eq "4.0")) { print "\n"; print "\n"; print "\n"; print "\n"; } # #Active links. # if ($bodyanal{'bi7'}) { print "\n"; print "\n"; print "\n"; print "\n"; } # #Absolute and relative links. # if ($bodyanal{'bi8'}) { print "\n"; print "\n"; print "\n"; print "\n"; } # #Encapsulation and frames # if ($bodyanal{'bi10'}) { print "\n"; print "\n"; print "\n"; print "\n"; } # #Graphical images # if ($bodyanal{'bi11'}) { print "\n"; print "\n"; print "\n"; print "\n"; } # #Deprecated HTML elements and attributes # if ($bodyanal{'bi12'} && ($htmlver eq "4.0")) { print "\n"; print "\n"; print "\n"; print "\n"; } # #Physical location information. # if ($bodyanal{'bi13'}) { print "\n"; print "\n"; print "\n"; print "\n"; } # #Server Technology Independence. # if ($bodyanal{'bi14'}) { print "\n"; print "\n"; print "\n"; print "\n"; } print "
CategoryFeatureAnalysis
Header InformationDocument Type Declaration* Not Used.* Used.
Title* Not Used.* Used.
Metadata\n"; } #No use of description attribute. if ($headanal{'hi31'} && !$has_meta_description) { print "

* Description attribute - Not Used.\n"; } elsif ($headanal{'hi31'} && $has_meta_description) { print "

* Description attribute - Used.\n"; } # #No use of keywords attribute. if ($headanal{'hi32'} && !$has_meta_keywords) { print "

* Keywords attribute - Not Used.\n"; } elsif ($headanal{'hi32'} && $has_meta_keywords) { print "

* Keywords attribute - Used.\n"; } # #No use of Dublin Core attributes. if ($headanal{'hi33'} && !$uses_dublin_core) { print "

* Dublin Core attributes - Not Used.\n"; } elsif ($headanal{'hi33'} && $uses_dublin_core) { print "

* Dublin Core attributes - Used.\n"; } # #No use of the content selection attribute. if ($headanal{'hi34'} && !$has_meta_content_sel) { print "

* Content selection attribute - Not Used.\n"; } elsif ($headanal{'hi34'} && $has_meta_content_sel) { print "

* Content selection attribute - Used.\n"; } # #No use of the robot exclusion attribute. if ($headanal{'hi35'} && !$has_meta_robots) { print "

* Robot exclusion attribute - Not Used.\n"; } elsif ($headanal{'hi35'} && $has_meta_robots) { print "

* Robot exclusion attribute - Used.\n"; } # #No use of the lang attribute. if ($headanal{'hi37'} && !$has_meta_lang) { print "

* Human language attribute - Not Used.\n"; } elsif ($headanal{'hi37'} && $has_meta_lang) { print "

* Human language attribute - Used.\n"; } if ($headanal{'hi31'} || $headanal{'hi32'} || $headanal{'hi33'} || $headanal{'hi34'}|| $headanal{'hi35'} || $headanal{'hi36'} || $headanal{'hi37'}) { print "

Body InformationIntellectual property rights\n"; } # #No use of a copyright attribute. if ($bodyanal{'bi1.1'} && !$has_copyright) { print "

* Copyright information - Not Used.\n"; } elsif ($bodyanal{'bi1.1'} && $has_copyright) { print "

* Copyright information - Used.\n"; } # #No use of a trademark attribute. if ($bodyanal{'bi1.2'} && !$has_trademark) { print "

* Trademark information - Not Used.\n"; } elsif ($bodyanal{'bi1.2'} && $has_trademark) { print "

* Trademark information - Used.\n"; } if ($bodyanal{'bi1.1'} || $bodyanal{'bi1.2'}) { print "

Security designations\n"; } # #No use of the security designation value. if ($bodyanal{'bi2'} && !$has_secure_des && !($htmlver eq "2.0")) { print "

* Not Used.\n"; } elsif ($bodyanal{'bi2'} && $has_secure_des && !($htmlver eq "2.0")) { print "

* Used.\n"; } if ($bodyanal{'bi2'} && !($htmlver eq "2.0")) { print "

Dates\n"; $no_date_info = 0; } # #Use of a page date value. if ($bodyanal{'bi3'} && $has_page_date && !($htmlver eq "2.0")) { print "

* Used a most recent change date.\n"; $no_date_info = 1; } # #Use of a modified date value. if ($bodyanal{'bi3'} && $has_mod_date && !($htmlver eq "2.0")) { print "

* Used a modified date.\n"; $no_date_info = 1; } # #Use of a content date value. if ($bodyanal{'bi3'} && $has_content_date && !($htmlver eq "2.0")) { print "

* Used a content date.\n"; $no_date_info = 1; } # #Use of a next update value. if ($bodyanal{'bi3'} && $has_nextup_date && !($htmlver eq "2.0")) { print "

* Used a next update.\n"; $no_date_info = 1; } # #Use of an expiration date value. if ($bodyanal{'bi3'} && $has_expire_date && !($htmlver eq "2.0")) { print "

* Used an expiration date.\n"; $no_date_info = 1; } # #No date information. if ($bodyanal{'bi3'} && !$no_date_info && !($htmlver eq "2.0")) { print "

* Not Used.\n"; $no_date_info = 1; } if ($bodyanal{'bi3'} && !($htmlver eq "2.0")) { print "

International considerations\n"; } # #No use of an international phone number. if ($bodyanal{'bi41'} && !$has_phone && !($htmlver eq "2.0")) { print "

* Phone numbers - Not Used.\n"; } elsif ($bodyanal{'bi41'} && $has_phone && !($htmlver eq "2.0")) { print "

* Phone numbers - Used.\n"; } # #No use of an international holiday. if ($bodyanal{'bi43'} && !$has_holiday && !($htmlver eq "2.0")) { print "

* Holidays - Not Used.\n"; } elsif ($bodyanal{'bi43'} && $has_holiday && !($htmlver eq "2.0")) { print "

* Holidays - Used.\n"; } # #No use of an international place of origin. if ($bodyanal{'bi44'} && !$has_origin && !($htmlver eq "2.0")) { print "

* Place of origin - Not Used.\n"; } elsif ($bodyanal{'bi44'} && $has_origin && !($htmlver eq "2.0")) { print "

* Place of origin - Used.\n"; } # #No use of an international language. if ($bodyanal{'bi45'} && !$has_lang && !($htmlver eq "2.0")) { print "

* Language - Not Used.\n"; } elsif ($bodyanal{'bi45'} && $has_lang && !($htmlver eq "2.0")) { print "

* Language - Used.\n"; } if (($bodyanal{'bi41'} || $bodyanal{'bi42'} || $bodyanal{'bi43'} || $bodyanal{'bi44'} || $bodyanal{'bi45'} || $bodyanal{'bi46'} || $bodyanal{'bi47'} || $bodyanal{'bi48'}) && !($htmlver eq "2.0")) { print "

Bandwidth efficiencies\n"; } # #Potential download problems do to poor links that will generate poor #bandwidth efficiences. if ($bodyanal{'bi5'} && $pdownload_prob && !($htmlver eq "2.0")) { if ($pdownload_prob > 1) { print "

* There are $pdownload_prob links to items larger than 35000 bytes.";} else {print "

* There is $pdownload_prob link to items larger than 35000 bytes.";} } elsif ($bodyanal{'bi5'} && !$pdownload_prob && !($htmlver eq "2.0")) { print "

* No problem with links to large items."; } if ($bodyanal{'bi5'} && !($htmlver eq "2.0")) { print "

Navigation aids\n"; $no_nav_prob = 0; } # #No use of navigational aid supporting a future link on H1. if ($bodyanal{'bi6'} && $no_h1_navigate && ($htmlver eq "4.0")) { print "

* Found $no_h1_navigate H1 tags without ID attribute for future links.

\n"; $no_nav_prob = 1; } # #No use of navigational aid supporting a future link on H2. if ($bodyanal{'bi6'} && $no_h2_navigate && ($htmlver eq "4.0")) { print "

* Found $no_h2_navigate H2 tags without ID attribute for future links.

\n"; $no_nav_prob = 1; } # #No use of navigational aid supporting a future link on H3. if ($bodyanal{'bi6'} && $no_h3_navigate && ($htmlver eq "4.0")) { print "

* Found $no_h3_navigate H3 tags without ID attribute for future links.

\n"; $no_nav_prob = 1; } # #No use of navigational aid supporting a future link on H4. if ($bodyanal{'bi6'} && $no_h4_navigate && ($htmlver eq "4.0")) { print "

* Found $no_h4_navigate H4 tags without ID attribute for future links.

\n"; $no_nav_prob = 1; } # #No use of navigational aid supporting a future link on H5. if ($bodyanal{'bi6'} && $no_h5_navigate && ($htmlver eq "4.0")) { print "

* Found $no_h5_navigate H5 tags without ID attribute for future links.

\n"; $no_nav_prob = 1; } # #No use of navigational aid supporting a future link on H6. if ($bodyanal{'bi6'} && $no_h6_navigate && ($htmlver eq "4.0")) { print "

* Found $no_h6_navigate H6 tags without ID attribute for future links.

\n"; $no_nav_prob = 1; } # #No use of navigational aid supporting a future link for author or point of contact. if ($bodyanal{'bi6'} && !$no_mailto) { print "

* Found no mailto link for author or other point of contact.

\n"; $no_nav_prob = 1; } if ($bodyanal{'bi6'} && !$no_nav_prob) { print "

* No navigation problems.

\n"; } if ($bodyanal{'bi6'} && ($htmlver eq "4.0")) { print "
Active links\n"; } # #Dead links. if ($bodyanal{'bi7'} && $invalid_link) { print "

* Found $invalid_link inactive link(s).

\n"; } elsif ($bodyanal{'bi7'} && !$invalid_link) { print "

* No inactive links.

\n"; } if ($bodyanal{'bi7'}) { print "
Absolute and relative links\n"; } # #Absolute and Relative links check. if ($bodyanal{'bi8'} && $absolute_link) { print "

* Found $absolute_link absolute (persistent) link(s).

\n"; } if ($bodyanal{'bi8'} && $relative_link) { if ($base_tag) { print "

* Found $relative_link relative link(s).

\n"; } else { print "

* Found $relative_link relative link(s)without the use of a BASE tag.

\n"; } } if ($bodyanal{'bi8'}) { print "
Encapsulation and frames\n"; $encap_frame = 0; } # #Encapsulation and Frames. if ($bodyanal{'bi10'} && !$encap) { print "

* Found no protection against encapsulation.

\n"; $encap_frame = 1; } if ($bodyanal{'bi10'} && $frame && !$noframes) { print "

* Found frames usage without specified noframes.

\n"; $encap_frame = 1; } elsif ($bodyanal{'bi10'} && $frame && $noframes) { print "

* Found usage of both frames and noframes.

\n"; $encap_frame = 1; } if ($bodyanal{'bi10'} && !$encap_frame) { print "

* Found no problems.

\n"; } if ($bodyanal{'bi10'}) { print "
Graphical images\n"; $graphics = 0; } # #Graphical Images. if ($bodyanal{'bi11'} && $no_height_width) { if ($no_height_width > 1) { print "

* Found $no_height_width image(s) without height and width specifications.

\n"; $graphics = 1; } } $img_without_alt = $img_count - $IMG_with_ALT; if ($bodyanal{'bi11'} && $img_without_alt) { if ($img_without_alt > 1) { print "

* Found $img_without_alt image(s) without an ALT tag.

\n"; $graphics = 1; } } if ($bodyanal{'bi11'} && !$graphics) { print "

* No potential problems.

\n"; } if ($bodyanal{'bi11'}) { print "
Deprecated HTML elements and attributes\n"; $deprecates = 0; } # #Use of deprecated HTML elements and attributes. if ($bodyanal{'bi12'} && ($htmlver eq "4.0") && $deprecated_element) { print "* Found $deprecated_element deprecated element(s).\n"; print "
    \n"; foreach $elem (sort keys(%dep_elem)) { print ("
  • $dep_elem{$elem} <$elem> tag(s)\n"); } print "
\n"; $deprecates = 1; } if ($bodyanal{'bi12'} && ($htmlver eq "4.0") && $deprecated_attribute) { print "

* Found $deprecated_attribute deprecated attribute(s).

\n"; print "\n"; print "\n"; foreach $elem (sort keys(%dep_attrib)) { print "\n"; print "\n"; } print "
Elements\# of deprecated attributes
$elem"; foreach $attrib (sort keys(%{$dep_attrib{$elem}})) { print ("

$dep_attrib{$elem}->{$attrib} $attrib\n"); } print "

\n"; } if ($bodyanal{'bi12'} && ($htmlver eq "4.0")) { print "
Physical location information\n"; $locat_info = 0; } # #Physical location information. if ($bodyanal{'bi13'} && !$has_longitude && !$has_latitude && !$has_cross_street) { print "

* Not Used.

\n"; }elsif ($bodyanal{'bi13'} && (($has_longitude && $has_latitude) || $has_cross_street)) { if ($has_longitude && $has_latitude) { if ($has_cross_street) { print "

* Used.

\n"; } else { print "

* Uses longitude and latitude.

\n"; } } elsif ($has_cross_street) { print "

* Uses cross street.

\n"; } } if ($bodyanal{'bi13'}) { print "
Server Technology Independence\n"; $locat_info = 0; } # #Server Technology Independence. if ($bodyanal{'bi14'} && $uses_rel_ref) { print "

* Not used in $uses_rel_ref case(s).

\n"; } elsif ($bodyanal{'bi14'} && !$uses_rel_ref) { print "

* Maintained.

\n"; } if ($bodyanal{'bi14'}) { print "
\n"; print &endHTML; # # Main subrountine for driving the parsing of the HTML documents. # sub parse_html_stream { local ($token, $new); ## initialization @stack=(); $found_tag = 0; $line_buffer = ""; ## application specific initialization &html_begin_doc(); main: while (1) { # Identify a token if ($line_buffer =~ /^(\s+)/) { $token = $1; $line_buffer = $'; &html_whitespace ($token); } elsif ($line_buffer =~ /^(\)/) { $token = $1; $line_buffer = $'; &html_comment ($token); } elsif ($line_buffer =~ /^(\]*\>)/) { $token = $1; $line_buffer = $'; if (($token =~ /^\]*\>)/) { $token = $1; $line_buffer = $'; &html_etag ($token); } elsif ($line_buffer =~ /^(\<[^!\/][^\>]*\>)/) { $token = $1; $line_buffer = $'; $found_tag++; &html_tag ($token); } elsif ($line_buffer =~ /^([^\s<]+)/) { $token = $1; $line_buffer = $'; $token = &substitute_entities($token); &html_content ($token); } else { # No valid token in buffer. Maybe it's empty, or maybe there's an # incomplete tag. So get some more data. $new = shift(@htmlfile); if (! defined ($new)) {last main;} # if we're trying to find a match for a tag. if ($line_buffer =~ /^\ -1) { print STDERR "Stack not empty at end of document\n"; &print_html_stack(); } } sub html_tag { local ($tag) = @_; local ($element) = &tag_element ($tag); local (%attributes) = &tag_attributes ($tag); # the tag might minimize (be an implicit end) for a previous tag # if not, any non-minimized tags ->go back<- on to the stack local ($curr) = ""; local (@temp_stack) = (); while (@stack) { $curr = pop(@stack); local ($curr_tag) = &tag_element($curr); ### Prevents minimization in the case of nested tables... if (($curr_tag =~ /table/i) && ($element =~ /t(d|h|r)/i)) { unshift(@temp_stack, $curr); last; } elsif (&Minimizes($curr_tag, $element)) { print TEMPERR "$element minimized $curr_tag\n"; &html_end($curr_tag, 0); } else { unshift(@temp_stack, $curr); } } push (@stack, @temp_stack); push (@stack, $tag); &html_begin ($element, $tag, *attributes); if (&Empty($element)) { pop(@stack); &html_end ($element, 0); } } sub html_etag { local ($tag) = @_; local ($element) = &tag_element ($tag); ### Reactivates word count if ($element =~ /^title$/i) { $in_title = 0; } local ($curr) = ""; local (@temp_stack) = (); local ($found) = 0; ### When a non matching tag is found, it is replaced onto the stack while (@stack && !$found) { $curr = pop(@stack); local ($curr_tag) = &tag_element($curr); if (($curr_tag =~ /table/i) && ($element =~ /t(d|h|r)/i)) { &html_end($curr_tag, 0); $found = 1; } elsif ($curr_tag eq $element) { &html_end($curr_tag, 1); $found = 1; } else { unshift(@temp_stack, $curr); } } push (@stack, @temp_stack); if (!$found) { print TEMPERR "No match found for /$element. You will lose\n"; } } # Does element E2 minimize E1? sub Minimizes { local ($e1, $e2) = @_; local ($value) = 0; foreach $elt (split (":", $Minimize{$e1})) { if ($elt eq $e2) {$value = 1;} } $value; } # Empty tags have no content and hence no end tags sub Empty { local ($element) = @_; $Empty{$element}; } sub print_html_stack { print TEMPERR "\n ==\n"; foreach $elt (reverse @stack) {print TEMPERR " $elt\n";} print TEMPERR " ==========\n"; } # The element on top of stack, if any. sub stack_top_element { if ($#stack > -1) { &tag_element ($stack[$#stack]);} } sub stack_pop_element { &tag_element (pop (@stack)); } # The element from the tag, normalized. sub tag_element { local ($tag) = @_; $tag =~ /<\/?([^\s>]+)/; local ($element) = $1; $element =~ tr/a-z/A-Z/; $element; } # associative array of the attributes of a tag. sub tag_attributes { local ($tag) = @_; $tag =~ /^<[a-z]+\s+(.*)>$/i; &parse_attributes($1); } # string should be something like # KEY="value" KEY2="longer value" KEY3="tags o doom" # output is an associative array (like a lisp property list) # attributes names are not case sensitive, do I downcase them # Maybe (probably) I should substitute for entities when parsing attributes. sub parse_attributes { local ($string) = @_; local (%attributes); local ($name, $val); get: while (1) { if ($string =~ /^ *([a-z]+)=\"([^\"]*)\"/i) { $name = $1; $val = $2; $string = $'; $name =~ tr/A-Z/a-z/; $attributes{$name} = $val; } elsif ($string =~ /^ *([a-z]+)=(\S*)/i) { $name = $1; $val = $2; $string = $'; $name =~ tr/A-Z/a-z/; $attributes{$name} = $val;} elsif ($string =~ /^ *(a-z]+)/i) { $name = $1; $val = ""; $string = $'; $name =~ tr/A-Z/a-z/; $attributes{$name} = $val;} else {last;}} %attributes; } sub substitute_entities { local ($string) = @_; $string =~ s/&/&/g; $string =~ s/<//g; $string =~ s/"/\"/g; local($ch); while (/&#([^;]*);/g) { $ch=sprintf("%c", $1); $string =~ s/&#[^;]*;/$ch/; } $string; } # This subroutine is called when a tag begins and records special information # depending on the identity of the tag and the status of certain flags. sub html_begin { local ($element, $tag, *attributes) = @_; local ($routine) = $Begin{$element}; if ($routine eq "") { if (($element eq "!DOCTYPE") && $headanal{'hi1'}) { $has_doctype = 1; } elsif (($element eq "TITLE") && $headanal{'hi2'}) { $has_title = 1; } elsif (($element eq "META") && ($headanal{'hi31'} || $headanal{'hi32'} || $headanal{'hi33'} || $headanal{'hi34'} || $headanal{'hi35'} || $headanal{'hi36'} || $headanal{'hi37'})) { &check_META_tag ($element, $tag, *attribute); } elsif ($element eq "LINK") { &check_LINK_tag ($element, $tag, *attribute); } elsif (($element eq "DIV") && (($htmlver eq "3.2") || ($htmlver eq "4.0"))) { &check_DIV_tag ($element, $tag, *attribute); } elsif (($element eq "SPAN") && (($htmlver eq "3.2") || ($htmlver eq "4.0"))) { &check_SPAN_tag ($element, $tag, *attribute); } elsif ($element eq "A") { &check_A_tag ($element, $tag, *attribute); } elsif ($element eq "AREA") { &check_AREA_tag ($element, $tag, *attribute); } elsif ($element eq "CAPTION") { &check_CAPTION_tag ($element, $tag, *attribute); } elsif ($element eq "IFRAME") { &check_IFRAME_tag ($element, $tag, *attribute); } elsif ($element eq "IMG") { $img_count++; &check_IMG_tag ($element, $tag, *attribute); } elsif ($element eq "INPUT") { &check_INPUT_tag ($element, $tag, *attribute); } elsif ($element eq "OBJECT") { &check_OBJECT_tag ($element, $tag, *attribute); } elsif ($element eq "LEGEND") { &check_LEGEND_tag ($element, $tag, *attribute); } elsif ($element eq "TABLE") { &check_TABLE_tag ($element, $tag, *attribute); } elsif ($element eq "HR") { &check_HR_tag ($element, $tag, *attribute); } elsif ($element eq "DIV") { &check_DIV_tag ($element, $tag, *attribute); } elsif ($element eq "P") { &check_P_tag ($element, $tag, *attribute); } elsif ($element eq "H1") { &check_H1_tag ($element, $tag, *attribute); } elsif ($element eq "H2") { &check_H2_tag ($element, $tag, *attribute); } elsif ($element eq "H3") { &check_H3_tag ($element, $tag, *attribute); } elsif ($element eq "H4") { &check_H4_tag ($element, $tag, *attribute); } elsif ($element eq "H5") { &check_H5_tag ($element, $tag, *attribute); } elsif ($element eq "H6") { &check_H6_tag ($element, $tag, *attribute); } elsif ($element eq "BODY") { &check_BODY_tag ($element, $tag, *attribute); } elsif ($element eq "TR") { &check_TR_tag ($element, $tag, *attribute); } elsif ($element eq "TD") { &check_TD_tag ($element, $tag, *attribute); } elsif ($element eq "TH") { &check_TH_tag ($element, $tag, *attribute); } elsif ($element eq "BR") { &check_BR_tag ($element, $tag, *attribute); } elsif ($element eq "DL") { &check_DL_tag ($element, $tag, *attribute); } elsif ($element eq "OL") { &check_OL_tag ($element, $tag, *attribute); } elsif ($element eq "LI") { &check_LI_tag ($element, $tag, *attribute); } elsif ($element eq "UL") { &check_UL_tag ($element, $tag, *attribute); } elsif ($element eq "SCRIPT") { &check_SCRIPT_tag ($element, $tag, *attribute); } elsif ($element eq "HTML") { &check_HTML_tag ($element, $tag, *attribute); } elsif ($element eq "PRE") { &check_PRE_tag ($element, $tag, *attribute); } elsif ($element eq "BASE") { $base_tag = 1; if ($bodyanal{'bi10'}) { &check_BASE_tag ($element, $tag, *attribute); } } elsif ($element eq "FRAME") { $frame = 1; } elsif ($element eq "NOFRAMES") { $noframes = 1; } # Deprecated HTML elements. # For formatted output use associate array. elsif ($bodyanal{'bi12'} && ($htmlver eq "4.0") && ($element eq "APPLET")) { $deprecated_element++; $dep_elem{"APPLET"}++; } elsif ($bodyanal{'bi12'} && ($htmlver eq "4.0") && ($element eq "BLINK")) { $deprecated_element++; $dep_elem{"BLINK"}++; } elsif ($bodyanal{'bi12'} && ($htmlver eq "4.0") && ($element eq "FONT")) { $deprecated_element++; $dep_elem{"FONT"}++; } elsif ($bodyanal{'bi12'} && ($htmlver eq "4.0") && ($element eq "MARQUEE")) { $deprecated_element++; $dep_elem{"MARQUEE"}++; } elsif ($bodyanal{'bi12'} && ($htmlver eq "4.0") && ($element eq "B")) { $deprecated_element++; $dep_elem{"B"}++; } elsif ($bodyanal{'bi12'} && ($htmlver eq "4.0") && ($element eq "I")) { $deprecated_element++; $dep_elem{"I"}++; } elsif ($bodyanal{'bi12'} && ($htmlver eq "4.0") && ($element eq "U")) { $deprecated_element++; $dep_elem{"U"}++; } elsif ($bodyanal{'bi12'} && ($htmlver eq "4.0") && ($element eq "STRIKE")) { $deprecated_element++; $dep_elem{"STRIKE"}++; } elsif ($bodyanal{'bi12'} && ($htmlver eq "4.0") && ($element eq "S")) { $deprecated_element++; $dep_elem{"S"}++; } elsif ($bodyanal{'bi12'} && ($htmlver eq "4.0") && ($element eq "BASEFONT")) { $deprecated_element++; $dep_elem{"BASEFONT"}++; } elsif ($bodyanal{'bi12'} && ($htmlver eq "4.0") && ($element eq "CENTER")) { $deprecated_element_element++; $dep_elem{"CENTER"}++; } elsif ($bodyanal{'bi12'} && ($htmlver eq "4.0") && ($element eq "MENU")) { $deprecated_element++; $dep_elem{"MENU"}++; } elsif ($bodyanal{'bi12'} && ($htmlver eq "4.0") && ($element eq "LISTING")) { $deprecated_element++; $dep_elem{"LISTING"}++; } elsif ($bodyanal{'bi12'} && ($htmlver eq "4.0") && ($element eq "PLAINTEXT")) { $deprecated_element++; $dep_elem{"PLAINTEXT"}++; } elsif ($bodyanal{'bi12'} && ($htmlver eq "4.0") && ($element eq "XMP")) { $deprecated_element++; $dep_elem{"XMP"}++; } else {$no_tag = 1;} } else {eval "&$routine;"} } # This subroutine is called when a tag ends. sub html_end { local ($element, $explicit) = @_; local ($routine) = $End{$element}; if ($routine eq "") {} else {eval "&$routine(\"$element\", $explicit)";} } sub html_content { local ($word) = @_; } sub html_whitespace { local ($whitespace) = @_; } sub html_comment { local ($tag) = @_; } sub html_begin_doc { } sub html_end_doc { } #Identify attributes of tags. sub check_META_tag { local ($element, $tag, *attributes) = @_; local ($link, $val); $tag =~ /^<[a-z]+\s+(.*)>$/i; $link = $1; get: while (1) { if ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*\"([^\"]*)\"/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if ($_ eq "name") { if (($val =~ /^description.*/i) && $headanal{'hi31'}) { $has_meta_description = 1; } elsif (($val =~ /^keywords.*/i) && $headanal{'hi32'}) { $has_meta_keywords = 1; } elsif (($val =~ /^robots.*/i) && $headanal{'hi35'}) { $has_meta_robots = 1; } elsif (($val =~ /^title.*/i) && $headanal{'hi33'}) { $uses_dublin_core = 1; } elsif (($val =~ /^author.*/i) && $headanal{'hi33'}) { $uses_dublin_core = 1; } elsif (($val =~ /^creator.*/i) && $headanal{'hi33'}) { $uses_dublin_core = 1; } elsif (($val =~ /^subject.*/i) && $headanal{'hi33'}) { $uses_dublin_core = 1; } elsif (($val =~ /^publisher.*/i) && $headanal{'hi33'}) { $uses_dublin_core = 1; } elsif (($val =~ /^contributor.*/i) && $headanal{'hi33'}) { $uses_dublin_core = 1; } elsif (($val =~ /^date.*/i) && $headanal{'hi33'}) { $uses_dublin_core = 1; } elsif (($val =~ /^type.*/i) && $headanal{'hi33'}) { $uses_dublin_core = 1; } elsif (($val =~ /^format.*/i) && $headanal{'hi33'}) { $uses_dublin_core = 1; } elsif (($val =~ /^identifier.*/i) && $headanal{'hi33'}) { $uses_dublin_core = 1; } elsif (($val =~ /^source.*/i) && $headanal{'hi33'}) { $uses_dublin_core = 1; } elsif (($val =~ /^language.*/i) && $headanal{'hi33'}) { $uses_dublin_core = 1; } elsif (($val =~ /^relation.*/i) && $headanal{'hi33'}) { $uses_dublin_core = 1; } elsif (($val =~ /^coverage.*/i) && $headanal{'hi33'}) { $uses_dublin_core = 1; } elsif (($val =~ /^rights.*/i) && ($headanal{'hi33'} || $bodyanal{'bi1.1'})) { $uses_dublin_core = 1; $has_copyright = 1; } } elsif (($_ eq "http-equiv") && $headanal{'hi34'}) { if ($val =~ /^pics\-.*/i) { $has_meta_content_sel = 1; } } elsif (($_ eq "lang") && $headanal{'hi37'}) { $has_meta_lang = 1; $has_lang = 1; } } elsif ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*(\S*)/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if ($_ eq "name") { if (($val =~ /^description.*/i) && $headanal{'hi31'}) { $has_meta_description = 1; } elsif (($val =~ /^keywords.*/i) && $headanal{'hi32'}) { $has_meta_keywords = 1; } elsif (($val =~ /^robots.*/i) && $headanal{'hi35'}) { $has_meta_robots = 1; } elsif (($val =~ /^title.*/i) && $headanal{'hi33'}) { $uses_dublin_core = 1; } elsif (($val =~ /^author.*/i) && $headanal{'hi33'}) { $uses_dublin_core = 1; } elsif (($val =~ /^creator.*/i) && $headanal{'hi33'}) { $uses_dublin_core = 1; } elsif (($val =~ /^subject.*/i) && $headanal{'hi33'}) { $uses_dublin_core = 1; } elsif (($val =~ /^publisher.*/i) && $headanal{'hi33'}) { $uses_dublin_core = 1; } elsif (($val =~ /^contributor.*/i) && $headanal{'hi33'}) { $uses_dublin_core = 1; } elsif (($val =~ /^date.*/i) && $headanal{'hi33'}) { $uses_dublin_core = 1; } elsif (($val =~ /^type.*/i) && $headanal{'hi33'}) { $uses_dublin_core = 1; } elsif (($val =~ /^format.*/i) && $headanal{'hi33'}) { $uses_dublin_core = 1; } elsif (($val =~ /^identifier.*/i) && $headanal{'hi33'}) { $uses_dublin_core = 1; } elsif (($val =~ /^source.*/i) && $headanal{'hi33'}) { $uses_dublin_core = 1; } elsif (($val =~ /^language.*/i) && $headanal{'hi33'}) { $uses_dublin_core = 1; } elsif (($val =~ /^relation.*/i) && $headanal{'hi33'}) { $uses_dublin_core = 1; } elsif (($val =~ /^coverage.*/i) && $headanal{'hi33'}) { $uses_dublin_core = 1; } elsif (($val =~ /^rights.*/i) && ($headanal{'hi33'} || $bodyanal{'bi1.1'})) { $uses_dublin_core = 1; $has_copyright = 1; } } elsif (($_ eq "http-equiv") && $headanal{'hi34'}) { if ($val =~ /^pics\-.*/i) { $has_meta_content_sel = 1; } } elsif (($_ eq "lang") && $headanal{'hi37'}) { $has_meta_lang = 1; $has_lang = 1; } } else {last;} } } sub check_LINK_tag { local ($element, $tag, *attributes) = @_; local ($link, $val); $tag =~ /^<[a-z]+\s+(.*)>$/i; $link = $1; get: while (1) { if ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*\"([^\"]*)\"/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if ($_ eq "rights") { $has_copyright = 1; } elsif ($_ eq "trademark") { $has_trademark = 1; } } elsif ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*(\S*)/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if ($_ eq "rights") { $has_copyright = 1; } elsif ($_ eq "trademark") { $has_trademark = 1; } } else {last;} } } sub check_DIV_tag { local ($element, $tag, *attributes) = @_; local ($link, $val); $tag =~ /^<[a-z]+\s+(.*)>$/i; $link = $1; get: while (1) { if ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*\"([^\"]*)\"/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if ($_ eq "id") { if (($val =~ /^securitydesignation.*/i) && $bodyanal{'bi2'}) { $has_secure_des = 1; } elsif (($val =~ /^pagedate.*/i) && $bodyanal{'bi3'}) { $has_page_date = 1; } elsif (($val =~ /^datemodified.*/i) && $bodyanal{'bi3'}) { $has_mod_date = 1; } elsif (($val =~ /^contentdate.*/i) && $bodyanal{'bi3'}) { $has_content_date = 1; } elsif (($val =~ /^nextupdate.*/i) && $bodyanal{'bi3'}) { $has_nextup_date = 1; } elsif (($val =~ /^expirationdate.*/i) && $bodyanal{'bi3'}) { $has_expire_date = 1; } elsif (($val =~ /^phone.*/i) && $bodyanal{'bi41'}) { $has_phone = 1; } elsif (($val =~ /^holiday.*/i) && $bodyanal{'bi43'}) { $has_holiday = 1; } elsif (($val =~ /^origin.*/i) && $bodyanal{'bi44'}) { $has_origin = 1; } } elsif ($_ eq "class") { if (($val =~ /^securitydesignation.*/i) && $bodyanal{'bi2'}) { $has_secure_des = 1; } elsif (($val =~ /^pagedate.*/i) && $bodyanal{'bi3'}) { $has_page_date = 1; } elsif (($val =~ /^datemodified.*/i) && $bodyanal{'bi3'}) { $has_mod_date = 1; } elsif (($val =~ /^contentdate.*/i) && $bodyanal{'bi3'}) { $has_content_date = 1; } elsif (($val =~ /^nextupdate.*/i) && $bodyanal{'bi3'}) { $has_nextup_date = 1; } elsif (($val =~ /^expirationdate.*/i) && $bodyanal{'bi3'}) { $has_expire_date = 1; } elsif (($val =~ /^phone.*/i) && $bodyanal{'bi41'}) { $has_phone = 1; } elsif (($val =~ /^holiday.*/i) && $bodyanal{'bi43'}) { $has_holiday = 1; } elsif (($val =~ /^origin.*/i) && $bodyanal{'bi44'}) { $has_origin = 1; } } elsif ($_ eq "lang") { $has_lang = 1; } elsif ($_ eq "align") { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } elsif ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*(\S*)/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if ($_ eq "id") { if (($val =~ /^securitydesignation.*/i) && $bodyanal{'bi2'}) { $has_secure_des = 1; } elsif (($val =~ /^pagedate.*/i) && $bodyanal{'bi3'}) { $has_page_date = 1; } elsif (($val =~ /^modifieddate.*/i) && $bodyanal{'bi3'}) { $has_mod_date = 1; } elsif (($val =~ /^contentdate.*/i) && $bodyanal{'bi3'}) { $has_content_date = 1; } elsif (($val =~ /^nextupdate.*/i) && $bodyanal{'bi3'}) { $has_nextup_date = 1; } elsif (($val =~ /^expirationdate.*/i) && $bodyanal{'bi3'}) { $has_expire_date = 1; } elsif (($val =~ /^phone.*/i) && $bodyanal{'bi41'}) { $has_phone = 1; } elsif (($val =~ /^holiday.*/i) && $bodyanal{'bi43'}) { $has_holiday = 1; } elsif (($val =~ /^origin.*/i) && $bodyanal{'bi44'}) { $has_origin = 1; } } elsif ($_ eq "class") { if (($val =~ /^securitydesignation.*/i) && $bodyanal{'bi2'}) { $has_secure_des = 1; } elsif (($val =~ /^pagedate.*/i) && $bodyanal{'bi3'}) { $has_page_date = 1; } elsif (($val =~ /^modifieddate.*/i) && $bodyanal{'bi3'}) { $has_mod_date = 1; } elsif (($val =~ /^contentdate.*/i) && $bodyanal{'bi3'}) { $has_content_date = 1; } elsif (($val =~ /^nextupdate.*/i) && $bodyanal{'bi3'}) { $has_nextup_date = 1; } elsif (($val =~ /^expirationdate.*/i) && $bodyanal{'bi3'}) { $has_expire_date = 1; } elsif (($val =~ /^phone.*/i) && $bodyanal{'bi41'}) { $has_phone = 1; } elsif (($val =~ /^holiday.*/i) && $bodyanal{'bi43'}) { $has_holiday = 1; } elsif (($val =~ /^origin.*/i) && $bodyanal{'bi44'}) { $has_origin = 1; } } elsif ($_ eq "lang") { $has_lang = 1; } elsif ($_ eq "align") { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } else {last;} } } sub check_SPAN_tag { local ($element, $tag, *attributes) = @_; local ($link, $val); $tag =~ /^<[a-z]+\s+(.*)>$/i; $link = $1; get: while (1) { if ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*\"([^\"]*)\"/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if ($_ eq "id") { if (($val =~ /^securitydesignation.*/i) && $bodyanal{'bi2'}) { $has_secure_des = 1; } elsif (($val =~ /^pagedate.*/i) && $bodyanal{'bi3'}) { $has_page_date = 1; } elsif (($val =~ /^modifieddate.*/i) && $bodyanal{'bi3'}) { $has_mod_date = 1; } elsif (($val =~ /^contentdate.*/i) && $bodyanal{'bi3'}) { $has_content_date = 1; } elsif (($val =~ /^nextupdate.*/i) && $bodyanal{'bi3'}) { $has_nextup_date = 1; } elsif (($val =~ /^expirationdate.*/i) && $bodyanal{'bi3'}) { $has_expire_date = 1; } elsif (($val =~ /^phone.*/i) && $bodyanal{'bi41'}) { $has_phone = 1; } elsif (($val =~ /^holiday.*/i) && $bodyanal{'bi43'}) { $has_holiday = 1; } elsif (($val =~ /^origin.*/i) && $bodyanal{'bi44'}) { $has_origin = 1; } } elsif ($_ eq "lang") { $has_lang = 1; } elsif ($_ eq "class") { if (($val =~ /^securitydesignation.*/i) && $bodyanal{'bi2'}) { $has_secure_des = 1; } elsif (($val =~ /^pagedate.*/i) && $bodyanal{'bi3'}) { $has_page_date = 1; } elsif (($val =~ /^modifieddate.*/i) && $bodyanal{'bi3'}) { $has_mod_date = 1; } elsif (($val =~ /^contentdate.*/i) && $bodyanal{'bi3'}) { $has_content_date = 1; } elsif (($val =~ /^nextupdate.*/i) && $bodyanal{'bi3'}) { $has_nextup_date = 1; } elsif (($val =~ /^expirationdate.*/i) && $bodyanal{'bi3'}) { $has_expire_date = 1; } elsif (($val =~ /^phone.*/i) && $bodyanal{'bi41'}) { $has_phone = 1; } elsif (($val =~ /^holiday.*/i) && $bodyanal{'bi43'}) { $has_holiday = 1; } elsif (($val =~ /^origin.*/i) && $bodyanal{'bi44'}) { $has_origin = 1; } elsif (($val =~ /^longitude.*/i) && $bodyanal{'bi13'}) { $has_longitude = 1; } elsif (($val =~ /^latitude.*/i) && $bodyanal{'bi13'}) { $has_latitude = 1; } elsif (($val =~ /^cross\_street.*/i) && $bodyanal{'bi13'}) { $has_cross_street = 1; } } } elsif ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*(\S*)/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if ($_ eq "id") { if (($val =~ /^securitydesignation.*/i) && $bodyanal{'bi2'}) { $has_secure_des = 1; } elsif (($val =~ /^pagedate.*/i) && $bodyanal{'bi3'}) { $has_page_date = 1; } elsif (($val =~ /^modifieddate.*/i) && $bodyanal{'bi3'}) { $has_mod_date = 1; } elsif (($val =~ /^contentdate.*/i) && $bodyanal{'bi3'}) { $has_content_date = 1; } elsif (($val =~ /^nextupdate.*/i) && $bodyanal{'bi3'}) { $has_nextup_date = 1; } elsif (($val =~ /^expirationdate.*/i) && $bodyanal{'bi3'}) { $has_expire_date = 1; } elsif (($val =~ /^phone.*/i) && $bodyanal{'bi41'}) { $has_phone = 1; } elsif (($val =~ /^holiday.*/i) && $bodyanal{'bi43'}) { $has_holiday = 1; } elsif (($val =~ /^origin.*/i) && $bodyanal{'bi44'}) { $has_origin = 1; } } elsif ($_ eq "lang") { $has_lang = 1; } elsif ($_ eq "class") { if (($val =~ /^securitydesignation.*/i) && $bodyanal{'bi2'}) { $has_secure_des = 1; } elsif (($val =~ /^pagedate.*/i) && $bodyanal{'bi3'}) { $has_page_date = 1; } elsif (($val =~ /^modifieddate.*/i) && $bodyanal{'bi3'}) { $has_mod_date = 1; } elsif (($val =~ /^contentdate.*/i) && $bodyanal{'bi3'}) { $has_content_date = 1; } elsif (($val =~ /^nextupdate.*/i) && $bodyanal{'bi3'}) { $has_nextup_date = 1; } elsif (($val =~ /^expirationdate.*/i) && $bodyanal{'bi3'}) { $has_expire_date = 1; } elsif (($val =~ /^phone.*/i) && $bodyanal{'bi41'}) { $has_phone = 1; } elsif (($val =~ /^holiday.*/i) && $bodyanal{'bi43'}) { $has_holiday = 1; } elsif (($val =~ /^origin.*/i) && $bodyanal{'bi44'}) { $has_origin = 1; } elsif (($val =~ /^longitude.*/i) && $bodyanal{'bi13'}) { $has_longitude = 1; } elsif (($val =~ /^latitude.*/i) && $bodyanal{'bi13'}) { $has_latitude = 1; } elsif (($val =~ /^cross\_street.*/i) && $bodyanal{'bi13'}) { $has_cross_street = 1; } } } else {last;} } } sub check_A_tag { local ($element, $tag, *attributes) = @_; local ($link, $val); $tag =~ /^<[A-Za-z]+\s+(.*)>$/; $link = $1; get: while (1) { if ($link =~ /^\s*([A-Za-z]+)\s*=\s*\"([^\"]*)\"/) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if ($_ eq "href") { if ($val =~ /^http\:.*/) { if (!($val =~ /.*html?$/) && !($val =~ /\=/)) { $uses_rel_ref++; } $absolute_link += 1; if ($bodyanal{'bi7'}) {&check_ACTIVE_link($val);} if (($bodyanal{'bi5'})&& ($htmlver eq "4.0") && (&get_LINK_pagesize($val) > 35000)) { $pdownload_prob++; } } elsif ($val =~ /^ftp\:.*/) { $absolute_link += 1; } elsif ($val =~ /^mailto\:.*/) { $absolute_link += 1; $no_mailto = 1; } elsif ($val =~ /^\#.*/) { $within_doc_link += 1; } elsif ($val =~ /^\/.*/) { $absolute_link += 1; if ($bodyanal{'bi7'}) {&check_ACTIVE_link($val);} if (($bodyanal{'bi5'})&& ($htmlver eq "4.0") && (&get_LINK_pagesize($val) > 35000)) { $pdownload_prob++; } } elsif ($val =~ /^[^\s\/\#].*html?/) { $relative_link += 1; if ($bodyanal{'bi7'}) {&check_ACTIVE_link($val);} if (($bodyanal{'bi5'})&& ($htmlver eq "4.0") && (&get_LINK_pagesize($val) > 35000)) { $pdownload_prob++; } } else { $relative_link += 1; if ($bodyanal{'bi7'}) {&check_ACTIVE_link($val);} if (($bodyanal{'bi5'})&& ($htmlver eq "4.0") && (&get_LINK_pagesize($val) > 35000)) { $pdownload_prob++; } } } elsif ($_ eq "target") { if ($val =~/^_blank$/) { $new_browser_window++; } } } elsif ($link =~ /^\s*([A-Za-z]+)\s*=\s*(\S*)/) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if ($_ eq "href") { if ($val =~ /^http\:.*/) { $absolute_link += 1; if ($bodyanal{'bi7'}) {&check_ACTIVE_link($val);} if (($bodyanal{'bi5'})&& ($htmlver eq "4.0") && (&get_LINK_pagesize($val) > 35000)) { $pdownload_prob++; } } elsif ($val =~ /^ftp\:.*/) { $absolute_link += 1; } elsif ($val =~ /^mailto\:.*/) { $absolute_link += 1; } elsif ($val =~ /^\#.*/) { $within_doc_link += 1; } elsif ($val =~ /^\/.*/) { $absolute_link += 1; if ($bodyanal{'bi7'}) {&check_ACTIVE_link($val);} if (($bodyanal{'bi5'})&& ($htmlver eq "4.0") && (&get_LINK_pagesize($val) > 35000)) { $pdownload_prob++; } } elsif ($val =~ /^[^\s\/\#].*html?/) { $relative_link += 1; if ($bodyanal{'bi7'}) {&check_ACTIVE_link($val);} if (($bodyanal{'bi5'})&& ($htmlver eq "4.0") && (&get_LINK_pagesize($val) > 35000)) { $pdownload_prob++; } } else { $relative_link += 1; if ($bodyanal{'bi7'}) {&check_ACTIVE_link($val);} if (($bodyanal{'bi5'})&& ($htmlver eq "4.0") && (&get_LINK_pagesize($val) > 35000)) { $pdownload_prob++; } } } elsif ($_ eq "target") { if ($val =~/^_blank$/) { $new_browser_window++; } } } else {last;} } } sub check_IMG_tag { local ($element, $tag, *attributes) = @_; local ($link, $val, $alt); $tag =~ /^<[A-Za-z]+\s+(.*)>$/; $link = $1; $alt = 0; get: while (1) { if ($link =~ /^\s*([A-Za-z]+)\s*=\s*\"([^\"]*)\"/) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "src") && $bodyanal{'bi11'}) { if ($val =~ /^http\:.*/) { $extern_IMG_absolute += 1; } elsif ($val =~ /^ftp\:.*/) { $extern_IMG_absolute += 1; } elsif ($val =~ /^mailto\:.*/) { $extern_IMG_absolute += 1; } elsif ($val =~ /^\#.*/) { $internal_IMG_link += 1; } elsif ($val =~ /^\/.*/) { $extern_IMG_absolute += 1; } elsif ($val =~ /^[^\s\/\#].*html?/) { $extern_IMG_relative += 1; } else { $extern_IMG_relative += 1; } &get_IMAGE_size($val); } elsif (($_ eq "alt") && $bodyanal{'bi11'}) { $IMG_with_ALT += 1; } elsif (($_ eq "height") && $bodyanal{'bi11'}) { $height_specified = 1; } elsif (($_ eq "width") && $bodyanal{'bi11'}) { $width_specified = 1; } elsif (($_ eq "align") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } elsif (($_ eq "border") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } elsif (($_ eq "hspace") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } elsif (($_ eq "vspace") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } elsif ($link =~ /^\s*([A-Za-z]+)\s*=\s*(\S*)/) { $_ = $1; $param1 = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "src") && $bodyanal{'bi11'}) { if ($val =~ /\=/) { $val = &corrected_val($val); $link = $param1 . "=" . $val . $link; } else { if ($val =~ /^http\:.*/) { $extern_IMG_absolute += 1; } elsif ($val =~ /^ftp\:.*/) { $extern_IMG_absolute += 1; } elsif ($val =~ /^mailto\:.*/) { $extern_IMG_absolute += 1; } elsif ($val =~ /^\#.*/) { $internal_IMG_link += 1; } elsif ($val =~ /^\/.*/) { $extern_IMG_absolute += 1; } elsif ($val =~ /^[^\s\/\#].*html?/) { $extern_IMG_relative += 1; } else { $extern_IMG_relative += 1; } &get_IMAGE_size($val); } } elsif (($_ eq "alt") && $bodyanal{'bi11'}) { if ($val =~ /\=/) { $val = &corrected_val($val); $link = $param1 . "=" . $val . $link; } else { $IMG_with_ALT += 1; } } elsif (($_ eq "ismap lang") && $bodyanal{'bi11'}) { if ($val =~ /\=/) { $val = &corrected_val($val); $link = $param1 . "=" . $val . $link; } } elsif (($_ eq "dir") && $bodyanal{'bi11'}) { if ($val =~ /\=/) { $val = &corrected_val($val); $link = $param1 . "=" . $val . $link; } } elsif (($_ eq "id") && $bodyanal{'bi11'}) { if ($val =~ /\=/) { $val = &corrected_val($val); $link = $param1 . "=" . $val . $link; } } elsif (($_ eq "class") && $bodyanal{'bi11'}) { if ($val =~ /\=/) { $val = &corrected_val($val); $link = $param1 . "=" . $val . $link; } } elsif (($_ eq "md") && $bodyanal{'bi11'}) { if ($val =~ /\=/) { $val = &corrected_val($val); $link = $param1 . "=" . $val . $link; } } elsif (($_ eq "align") && ($bodyanal{'bi11'} || $bodyanal{'bi12'})) { if ($bodyanal{'bi11'} && $bodyanal{'bi12'}) { if ($val =~ /\=/) { $val = &corrected_val($val); $link = $param1 . "=" . $val . $link; } $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } elsif ($bodyanal{'bi11'}) { if ($val =~ /\=/) { $val = &corrected_val($val); $link = $param1 . "=" . $val . $link; } } elsif ($bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } elsif (($_ eq "height") && $bodyanal{'bi11'}) { if ($val =~ /\=/) { $val = &corrected_val($val); $link = $param1 . "=" . $val . $link; } else { $height_specified = 1; } } elsif (($_ eq "width") && $bodyanal{'bi11'}) { if ($val =~ /\=/) { $val = &corrected_val($val); $link = $param1 . "=" . $val . $link; } else { $width_specified = 1; } } elsif (($_ eq "units") && $bodyanal{'bi11'}) { if ($val =~ /\=/) { $val = &corrected_val($val); $link = $param1 . "=" . $val . $link; } } elsif (($_ eq "border") && ($bodyanal{'bi11'} || $bodyanal{'bi12'} )) { if ($bodyanal{'bi11'} && $bodyanal{'bi12'}) { if ($val =~ /\=/) { $val = &corrected_val($val); $link = $param1 . "=" . $val . $link; } $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } elsif ($bodyanal{'bi11'}) { if ($val =~ /\=/) { $val = &corrected_val($val); $link = $param1 . "=" . $val . $link; } } elsif ($bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } elsif (($_ eq "lowsrc") && $bodyanal{'bi11'}) { if ($val =~ /\=/) { $val = &corrected_val($val); $link = $param1 . "=" . $val . $link; } } elsif (($_ eq "hspace") && ($bodyanal{'bi11'} || $bodyanal{'bi12'})) { if ($bodyanal{'bi11'} && $bodyanal{'bi12'}) { if ($val =~ /\=/) { $val = &corrected_val($val); $link = $param1 . "=" . $val . $link; } $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } elsif ($bodyanal{'bi11'}) { if ($val =~ /\=/) { $val = &corrected_val($val); $link = $param1 . "=" . $val . $link; } } elsif ($bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } elsif (($_ eq "vspace") && ($bodyanal{'bi11'} || $bodyanal{'bi12'})) { if ($bodyanal{'bi11'} && $bodyanal{'bi12'}) { if ($val =~ /\=/) { $val = &corrected_val($val); $link = $param1 . "=" . $val . $link; } $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } elsif ($bodyanal{'bi11'}) { if ($val =~ /\=/) { $val = &corrected_val($val); $link = $param1 . "=" . $val . $link; } } elsif ($bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } elsif (($_ eq "usemap") && $bodyanal{'bi11'}) { if ($val =~ /\=/) { $val = &corrected_val($val); $link = $param1 . "=" . $val . $link; } } elsif (($_ eq "dynsrc") && $bodyanal{'bi11'}) { if ($val =~ /\=/) { $val = &corrected_val($val); $link = $param1 . "=" . $val . $link; } } elsif (($_ eq "start") && $bodyanal{'bi11'}) { if ($val =~ /\=/) { $val = &corrected_val($val); $link = $param1 . "=" . $val . $link; } } elsif (($_ eq "controls loop") && $bodyanal{'bi11'}) { if ($val =~ /\=/) { $val = &corrected_val($val); $link = $param1 . "=" . $val . $link; } } elsif (($_ eq "loopdelay") && $bodyanal{'bi11'}) { if ($val =~ /\=/) { $val = &corrected_val($val); $link = $param1 . "=" . $val . $link; } } } else { if (!($height_specified && $width_specified)) { $no_height_width += 1; } $height_specified = 0; $width_specified = 0; last; } } } sub corrected_val { local ($val) = @_; if ($val =~ /[Ss][Rr][Cc]\s*\=/) { $val =~ s/[Ss][Rr][Cc]\s*\=/ src =/; return ($val); } elsif ($val =~ /[Aa][Ll][Tt]\s*\=/) { $val =~ s/[Aa][Ll][Tt]\s*\=/ alt =/; return ($val); } elsif ($val =~ /[Ii][Ss][Mm][Aa][Pp]\s*[Ll][Aa][Nn][Gg]\s*\=/) { $val =~ s/[Ii][Ss][Mm][Aa][Pp]\s*[Ll][Aa][Nn][Gg]\s*\=/ ismap lang =/; return ($val); } elsif ($val =~ /[Dd][Ii][Rr]\s*\=/) { $val =~ s/[Dd][Ii][Rr]\s*\=/ dir =/; return ($val); } elsif ($val =~ /[Ii][Dd]\s*\=/) { $val =~ s/[Ii][Dd]\s*\=/ id =/; return ($val); } elsif ($val =~ /[Cc][Ll][Aa][Ss][Ss]\s*\=/) { $val =~ s/[Cc][Ll][Aa][Ss][Ss]\s*\=/ class =/; return ($val); } elsif ($val =~ /[Mm][Dd]\s*\=/) { $val =~ s/[Mm][Dd]\s*\=/ md =/; return ($val); } elsif ($val =~ /[Aa][Ll][Ii][Gg][Nn]\s*\=/) { $val =~ s/[Aa][Ll][Ii][Gg][Nn]\s*\=/ align =/; return ($val); } elsif ($val =~ /[Hh][Ee][Ii][Gg][Hh][Tt]\s*\=/) { $val =~ s/[Hh][Ee][Ii][Gg][Hh][Tt]\s*\=/ height =/; return ($val); } elsif ($val =~ /[Ww][Ii][Dd][Tt][Hh]\s*\=/) { $val =~ s/[Ww][Ii][Dd][Tt][Hh]\s*\=/ width =/; return ($val); } elsif ($val =~ /[Uu][Nn][Ii][Tt][Ss]\s*\=/) { $val =~ s/[Uu][Nn][Ii][Tt][Ss]\s*\=/ units =/; return ($val); } elsif ($val =~ /[Bb][Oo][Rr][Dd][Ee][Rr]\s*\=/) { $val =~ s/[Bb][Oo][Rr][Dd][Ee][Rr]\s*\=/ border =/; return ($val); } elsif ($val =~ /[Ll][Oo][Ww][Ss][Rr][Cc]\s*\=/) { $val =~ s/[Ll][Oo][Ww][Ss][Rr][Cc]\s*\=/ lowsrc =/; return ($val); } elsif ($val =~ /[Hh][Ss][Pp][Aa][Cc][Ee]\s*\=/) { $val =~ s/[Hh][Ss][Pp][Aa][Cc][Ee]\s*\=/ hspace =/; return ($val); } elsif ($val =~ /[Vv][Ss][Pp][Aa][Cc][Ee]\s*\=/) { $val =~ s/[Vv][Ss][Pp][Aa][Cc][Ee]\s*\=/ vspace =/; return ($val); } elsif ($val =~ /[Uu][Ss][Ee][Mm][Aa][Pp]\s*\=/) { $val =~ s/[Uu][Ss][Ee][Mm][Aa][Pp]\s*\=/ usemap =/; return ($val); } elsif ($val =~ /[Dd][Yy][Nn][Ss][Rr][Cc]\s*\=/) { $val =~ s/[Dd][Yy][Nn][Ss][Rr][Cc]\s*\=/ dynsrc =/; return ($val); } elsif ($val =~ /[Ss][Tt][Aa][Rr][Tt]\s*\=/) { $val =~ s/[Ss][Tt][Aa][Rr][Tt]\s*\=/ start =/; return ($val); } elsif ($val =~ /[Cc][Oo][Nn][Tt][Rr][Oo][Ll][Ss]\s*[Ll][Oo][Oo][Pp]\s*\=/) { $val =~ s/[Cc][Oo][Nn][Tt][Rr][Oo][Ll][Ss]\s*[Ll][Oo][Oo][Pp]\s*\=/ controls loop =/; return ($val); } elsif ($val =~ /[Ll][Oo][Oo][Pp][Dd][Ee][Ll][Aa][Yy]\s*\=/) { $val =~ s/[Ll][Oo][Oo][Pp][Dd][Ee][Ll][Aa][Yy]\s*\=/ loopdelay =/; return ($val); } else { return ($val); } } sub get_IMAGE_size { local ($val) = @_; $val =~ s/^\s+//; if ($val =~ /http:\/\//) { $actual_url = $val; } else { if ($base_url) { $base_url =~ m|^((?:http:)?//[^/]*)(?:/?(.*?)/?([^/]*.html?)?)?$|x; ($m1, $m2, $m3) = ("","", ""); $m1 = $1; $m2 = $2; $m3 = $3; } else { $urllink =~ m|^((?:http:)?//[^/]*)(?:/?(.*?)/?([^/]*.html?)?)?$|x; ($m1, $m2, $m3) = ("","", ""); $m1 = $1; $m2 = $2; $m3 = $3; } if ($val =~ /^\/\//) { $actual_url = "http:" . $val; } elsif ($val =~ /^\//) { $actual_url = $m1 . $val; } elsif ($val =~ /^\.\.\//) { &get_RELATIVE_position($val); while ($rel_position) { $val =~ s/^\.\.\///; $m2 =~ /(.*\/)/; $m2 = $1; $m2 =~ s/\/$//; $rel_position -= 1; } if ($m2) { $actual_url = $m1 . "/" . $m2 . "/" . $val; } else { $actual_url = $m1 . "/" . $val; } } else { if (!$m2) { $actual_url = $m1 . "/" . $val; } else { $actual_url = $m1 . "/" . $m2 . "/" . $val; } } } $ua = new LWP::UserAgent; # # #You might want to add proxy lines similar to the following for #intranet usage. # #$ua->proxy(['http'], 'http://proxy.<2n level domain>.<1st level domain>:/'); #$ua->no_proxy('<2n level domain>.<1st level domain>'); # # $url = new URI::URL($actual_url); $request = new HTTP::Request('HEAD', $url); $response = $ua->request($request); #Check the result if ($response->is_success) { $total_IMG_sizes += $response->header('Content-length'); } else { $img_access_fault++; } } sub check_H1_tag { local ($element, $tag, *attributes) = @_; local ($link, $val); $tag =~ /^<[a-z]+\s+(.*)>$/i; $link = $1; get: while (1) { if ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*\"([^\"]*)\"/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "id") && ($htmlver eq "4.0") && $bodyanal{'bi6'}) { $has_navigate = 1; } elsif (($_ eq "align") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } elsif ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*(\S*)/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "id") && ($htmlver eq "4.0") && $bodyanal{'bi6'}) { $has_navigate = 1; } elsif (($_ eq "align") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } else {last;} } if (!$has_navigate) { $no_h1_navigate++; } else { $has_navigate = 0; } } sub check_H2_tag { local ($element, $tag, *attributes) = @_; local ($link, $val); $tag =~ /^<[a-z]+\s+(.*)>$/i; $link = $1; get: while (1) { if ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*\"([^\"]*)\"/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "id") && ($htmlver eq "4.0") && $bodyanal{'bi6'}) { $has_navigate = 1; } elsif (($_ eq "align") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } elsif ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*(\S*)/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "id") && ($htmlver eq "4.0") && $bodyanal{'bi6'}) { $has_navigate = 1; } elsif (($_ eq "align") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } else {last;} } if (!$has_navigate) { $no_h2_navigate++; } else { $has_navigate = 0; } } sub check_H3_tag { local ($element, $tag, *attributes) = @_; local ($link, $val); $tag =~ /^<[a-z]+\s+(.*)>$/i; $link = $1; get: while (1) { if ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*\"([^\"]*)\"/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "id") && ($htmlver eq "4.0") && $bodyanal{'bi6'}) { $has_navigate = 1; } elsif (($_ eq "align") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } elsif ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*(\S*)/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "id") && ($htmlver eq "4.0") && $bodyanal{'bi6'}) { $has_navigate = 1; } elsif (($_ eq "align") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } else {last;} } if (!$has_navigate) { $no_h3_navigate++; } else { $has_navigate = 0; } } sub check_H4_tag { local ($element, $tag, *attributes) = @_; local ($link, $val); $tag =~ /^<[a-z]+\s+(.*)>$/i; $link = $1; get: while (1) { if ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*\"([^\"]*)\"/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "id") && ($htmlver eq "4.0") && $bodyanal{'bi6'}) { $has_navigate = 1; } elsif (($_ eq "align") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } elsif ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*(\S*)/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "id") && ($htmlver eq "4.0") && $bodyanal{'bi6'}) { $has_navigate = 1; } elsif (($_ eq "align") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } else {last;} } if (!$has_navigate) { $no_h4_navigate++; } else { $has_navigate = 0; } } sub check_H5_tag { local ($element, $tag, *attributes) = @_; local ($link, $val); $tag =~ /^<[a-z]+\s+(.*)>$/i; $link = $1; get: while (1) { if ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*\"([^\"]*)\"/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "id") && ($htmlver eq "4.0") && $bodyanal{'bi6'}) { $has_navigate = 1; } elsif (($_ eq "align") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } elsif ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*(\S*)/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "id") && ($htmlver eq "4.0") && $bodyanal{'bi6'}) { $has_navigate = 1; } elsif (($_ eq "align") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } else {last;} } if (!$has_navigate) { $no_h5_navigate++; } else { $has_navigate = 0; } } sub check_H6_tag { local ($element, $tag, *attributes) = @_; local ($link, $val); $tag =~ /^<[a-z]+\s+(.*)>$/i; $link = $1; get: while (1) { if ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*\"([^\"]*)\"/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "id") && ($htmlver eq "4.0") && $bodyanal{'bi6'}) { $has_navigate = 1; } elsif (($_ eq "align") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } elsif ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*(\S*)/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "id") && ($htmlver eq "4.0") && $bodyanal{'bi6'}) { $has_navigate = 1; } elsif (($_ eq "align") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } else {last;} } if (!$has_navigate) { $no_h6_navigate++; } else { $has_navigate = 0; } } sub check_BASE_tag { local ($element, $tag, *attributes) = @_; local ($link, $val); $tag =~ /^<[a-z]+\s+(.*)>$/i; $link = $1; get: while (1) { if ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*\"([^\"]*)\"/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if ($_ eq "target") { if ($val =~/^_top$/) { $encap = 1; } } } elsif ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*(\S*)/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if ($_ eq "target") { if ($val =~/^_top$/) { $encap = 1; } } } else {last;} } } sub check_AREA_tag { } sub check_CAPTION_tag { local ($element, $tag, *attributes) = @_; local ($link, $val); $tag =~ /^<[a-z]+\s+(.*)>$/i; $link = $1; get: while (1) { if ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*\"([^\"]*)\"/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "align") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } elsif ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*(\S*)/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "align") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } else {last;} } } sub check_IFRAME_tag { local ($element, $tag, *attributes) = @_; local ($link, $val); $tag =~ /^<[a-z]+\s+(.*)>$/i; $link = $1; get: while (1) { if ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*\"([^\"]*)\"/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "align") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } elsif ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*(\S*)/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "align") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } else {last;} } } sub check_INPUT_tag { local ($element, $tag, *attributes) = @_; local ($link, $val); $tag =~ /^<[a-z]+\s+(.*)>$/i; $link = $1; get: while (1) { if ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*\"([^\"]*)\"/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "align") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } elsif ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*(\S*)/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "align") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } else {last;} } } sub check_OBJECT_tag { local ($element, $tag, *attributes) = @_; local ($link, $val); $tag =~ /^<[a-z]+\s+(.*)>$/i; $link = $1; get: while (1) { if ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*\"([^\"]*)\"/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "align") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } elsif (($_ eq "border") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } elsif (($_ eq "hspace") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } elsif (($_ eq "vspace") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } elsif ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*(\S*)/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "align") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } elsif (($_ eq "border") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } elsif (($_ eq "hspace") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } elsif (($_ eq "vspace") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } else {last;} } } sub check_LEGEND_tag { local ($element, $tag, *attributes) = @_; local ($link, $val); $tag =~ /^<[a-z]+\s+(.*)>$/i; $link = $1; get: while (1) { if ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*\"([^\"]*)\"/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "align") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } elsif ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*(\S*)/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "align") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } else {last;} } } sub check_TABLE_tag { local ($element, $tag, *attributes) = @_; local ($link, $val); $tag =~ /^<[a-z]+\s+(.*)>$/i; $link = $1; get: while (1) { if ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*\"([^\"]*)\"/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "align") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } elsif (($_ eq "bgcolor") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } elsif ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*(\S*)/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "align") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } elsif (($_ eq "bgcolor") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } else {last;} } } sub check_HR_tag { local ($element, $tag, *attributes) = @_; local ($link, $val); $tag =~ /^<[a-z]+\s+(.*)>$/i; $link = $1; get: while (1) { if ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*\"([^\"]*)\"/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "align") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } elsif (($_ eq "noshade") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } elsif (($_ eq "size") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } elsif (($_ eq "width") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } elsif ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*(\S*)/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "align") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } elsif (($_ eq "noshade") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } elsif (($_ eq "size") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } elsif (($_ eq "width") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } else {last;} } } sub check_P_tag { local ($element, $tag, *attributes) = @_; local ($link, $val); $tag =~ /^<[a-z]+\s+(.*)>$/i; $link = $1; get: while (1) { if ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*\"([^\"]*)\"/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "align") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } elsif ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*(\S*)/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "align") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } else {last;} } } sub check_BODY_tag { local ($element, $tag, *attributes) = @_; local ($link, $val); $tag =~ /^<[a-z]+\s+(.*)>$/i; $link = $1; get: while (1) { if ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*\"([^\"]*)\"/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "alink") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } elsif (($_ eq "background") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } elsif (($_ eq "bgcolor") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } elsif (($_ eq "link") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } elsif (($_ eq "text") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } elsif (($_ eq "vlink") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } elsif ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*(\S*)/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "alink") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } elsif (($_ eq "background") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } elsif (($_ eq "bgcolor") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } elsif (($_ eq "link") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } elsif (($_ eq "text") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } elsif (($_ eq "vlink") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } else {last;} } } sub check_TR_tag { local ($element, $tag, *attributes) = @_; local ($link, $val); $tag =~ /^<[a-z]+\s+(.*)>$/i; $link = $1; get: while (1) { if ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*\"([^\"]*)\"/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "bgcolor") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } elsif ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*(\S*)/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "bgcolor") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } else {last;} } } sub check_TD_tag { local ($element, $tag, *attributes) = @_; local ($link, $val); $tag =~ /^<[a-z]+\s+(.*)>$/i; $link = $1; get: while (1) { if ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*\"([^\"]*)\"/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "bgcolor") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } elsif (($_ eq "height") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } elsif (($_ eq "nowrap") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } elsif (($_ eq "width") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } elsif ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*(\S*)/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "bgcolor") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } elsif (($_ eq "height") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } elsif (($_ eq "nowrap") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } elsif (($_ eq "width") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } else {last;} } } sub check_TH_tag { local ($element, $tag, *attributes) = @_; local ($link, $val); $tag =~ /^<[a-z]+\s+(.*)>$/i; $link = $1; get: while (1) { if ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*\"([^\"]*)\"/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "bgcolor") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } elsif (($_ eq "height") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } elsif (($_ eq "nowrap") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } elsif (($_ eq "width") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } elsif ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*(\S*)/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "bgcolor") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } elsif (($_ eq "height") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } elsif (($_ eq "nowrap") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } elsif (($_ eq "width") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } else {last;} } } sub check_BR_tag { local ($element, $tag, *attributes) = @_; local ($link, $val); $tag =~ /^<[a-z]+\s+(.*)>$/i; $link = $1; get: while (1) { if ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*\"([^\"]*)\"/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "clear") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } elsif ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*(\S*)/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "clear") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } else {last;} } } sub check_DL_tag { local ($element, $tag, *attributes) = @_; local ($link, $val); $tag =~ /^<[a-z]+\s+(.*)>$/i; $link = $1; get: while (1) { if ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*\"([^\"]*)\"/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "compact") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } elsif ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*(\S*)/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "compact") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } else {last;} } } sub check_OL_tag { local ($element, $tag, *attributes) = @_; local ($link, $val); $tag =~ /^<[a-z]+\s+(.*)>$/i; $link = $1; get: while (1) { if ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*\"([^\"]*)\"/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "compact") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } elsif (($_ eq "type") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } elsif (($_ eq "start") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } elsif ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*(\S*)/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "compact") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } elsif (($_ eq "type") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } elsif (($_ eq "start") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } else {last;} } } sub check_LI_tag { local ($element, $tag, *attributes) = @_; local ($link, $val); $tag =~ /^<[a-z]+\s+(.*)>$/i; $link = $1; get: while (1) { if ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*\"([^\"]*)\"/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "type") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } elsif (($_ eq "value") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } elsif ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*(\S*)/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "type") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } elsif (($_ eq "value") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } else {last;} } } sub check_UL_tag { local ($element, $tag, *attributes) = @_; local ($link, $val); $tag =~ /^<[a-z]+\s+(.*)>$/i; $link = $1; get: while (1) { if ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*\"([^\"]*)\"/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "compact") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } elsif (($_ eq "type") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } elsif ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*(\S*)/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "compact") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } elsif (($_ eq "type") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } else {last;} } } sub check_SCRIPT_tag { local ($element, $tag, *attributes) = @_; local ($link, $val); $tag =~ /^<[a-z]+\s+(.*)>$/i; $link = $1; get: while (1) { if ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*\"([^\"]*)\"/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "language") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } elsif ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*(\S*)/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "language") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } else {last;} } } sub check_HTML_tag { local ($element, $tag, *attributes) = @_; local ($link, $val); $tag =~ /^<[a-z]+\s+(.*)>$/i; $link = $1; get: while (1) { if ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*\"([^\"]*)\"/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "version") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } elsif ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*(\S*)/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "version") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } else {last;} } } sub check_PRE_tag { local ($element, $tag, *attributes) = @_; local ($link, $val); $tag =~ /^<[a-z]+\s+(.*)>$/i; $link = $1; get: while (1) { if ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*\"([^\"]*)\"/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "width") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } elsif ($link =~ /^\s*([a-z]+\-?[a-z]+)\s*=\s*(\S*)/i) { $_ = $1; tr/A-Z/a-z/; $val = $2; $link = $'; if (($_ eq "width") && ($htmlver eq "4.0") && $bodyanal{'bi12'}) { $deprecated_attribute++; $dep_attrib{$element}->{$_}++; } } else {last;} } } sub check_ACTIVE_link { local ($val) = @_; $val =~ s/^\s+//; if ($val =~ /http:\/\//) { $actual_url = $val; } else { if ($base_url) { $base_url =~ m|^((?:http:)?//[^/]*)(?:/?(.*?)/?([^/]*(?:.html?))?)?$|x; ($m1, $m2, $m3) = ("","", ""); $m1 = $1; $m2 = $2; $m3 = $3; } else { $urldoc =~ m|^((?:http:)?//[^/]*)(?:/?(.*?)/?([^/]*(?:.html?))?)?$|x; ($m1, $m2, $m3) = ("","", ""); $m1 = $1; $m2 = $2; $m3 = $3; } if ($val =~ /^\/\//) { $actual_url = "http:" . $val; } elsif ($val =~ /^\//) { if ($m1) { $actual_url = $m1 . $val; } else { if ($urldoc =~ /.*\/$/) { $url = $urldoc; $url =~ s/\/$//; $actual_url = $url . $val; } else { $actual_url = $urldoc . $val; } } } elsif ($val =~ /^\.\.\//) { &get_RELATIVE_position($val); while ($rel_position) { $val =~ s/^\.\.\///; $m2 =~ /(.*\/)/; $m2 = $1; $m2 =~ s/\/$//; $rel_position -= 1; } if ($m2) { $actual_url = $m1 . "/" . $m2 . "/" . $val; } else { $actual_url = $m1 . "/" . $val; } } else { if (!$m1) { if ($urldoc =~ /.*\/$/) { $actual_url = $urldoc . $val; } else { $urldoc =~ m|^((?:http:)?//[^/]*)(?:/?(.*?)/?([^/]*(?:.html?))?)?$|x; ($m1, $m2, $m3) = ("","", ""); $m1 = $1; $m2 = $2; $m3 = $3; if ($m3 =~ /.*html?$/) { $actual_url = $m1 . "/" . $m2 . "/" . $val; } else { $actual_url = $urldoc . "/" . $val; } } } elsif (!$m2) { $actual_url = $m1 . "/" . $val; } else { $actual_url = $m1 . "/" . $m2 . "/" . $val; } } } $ua = new LWP::UserAgent; # # #You might want to add proxy lines similar to the following for #intranet usage. # #$ua->proxy(['http'], 'http://proxy.<2n level domain>.<1st level domain>:/'); #$ua->no_proxy('<2n level domain>.<1st level domain>'); # # $url = new URI::URL($actual_url); $request = new HTTP::Request('HEAD', $url); $response = $ua->request($request); #Check the result if (!$response->is_success && ($response->code == 404)) { $invalid_link++; } } sub get_LINK_pagesize { local ($val) = @_; $val =~ s/^\s+//; if ($val =~ /http:\/\//) { $actual_url = $val; } else { if ($base_url) { $base_url =~ m|^((?:http:)?//[^/]*)(?:/?(.*?)/?([^/]*(?:.html?))?)?$|x; ($m1, $m2, $m3) = ("","", ""); $m1 = $1; $m2 = $2; $m3 = $3; } else { $urldoc =~ m|^((?:http:)?//[^/]*)(?:/?(.*?)/?([^/]*(?:.html?))?)?$|x; ($m1, $m2, $m3) = ("","", ""); $m1 = $1; $m2 = $2; $m3 = $3; } if ($val =~ /^\/\//) { $actual_url = "http:" . $val; } elsif ($val =~ /^\//) { if ($m1) { $actual_url = $m1 . $val; } else { if ($urldoc =~ /.*\/$/) { $url = $urldoc; $url =~ s/\/$//; $actual_url = $url . $val; } else { $actual_url = $urldoc . $val; } } } elsif ($val =~ /^\.\.\//) { &get_RELATIVE_position($val); while ($rel_position) { $val =~ s/^\.\.\///; $m2 =~ /(.*\/)/; $m2 = $1; $m2 =~ s/\/$//; $rel_position -= 1; } if ($m2) { $actual_url = $m1 . "/" . $m2 . "/" . $val; } else { $actual_url = $m1 . "/" . $val; } } else { if (!$m1) { if ($urldoc =~ /.*\/$/) { $actual_url = $urldoc . $val; } else { $urldoc =~ m|^((?:http:)?//[^/]*)(?:/?(.*?)/?([^/]*(?:.html?))?)?$|x; ($m1, $m2, $m3) = ("","", ""); $m1 = $1; $m2 = $2; $m3 = $3; if ($m3 =~ /.*html?$/) { $actual_url = $m1 . "/" . $m2 . "/" . $val; } else { $actual_url = $urldoc . "/" . $val; } } } elsif (!$m2) { $actual_url = $m1 . "/" . $val; } else { $actual_url = $m1 . "/" . $m2 . "/" . $val; } } } $ua = new LWP::UserAgent; # # #You might want to add proxy lines similar to the following for #intranet usage. # #$ua->proxy(['http'], 'http://proxy.<2n level domain>.<1st level domain>:/'); #$ua->no_proxy('<2n level domain>.<1st level domain>'); # # $url = new URI::URL($actual_url); $request = new HTTP::Request('HEAD', $url); $response = $ua->request($request); #Check the result if ($response->is_success) { $total_IMG_sizes += $response->header('Content-length'); } else { $link_fault++; } } sub get_RELATIVE_position { local ($val) = @_; $rel_position = 0; while ($val =~ /^\.\.\//) { $rel_position += 1; $val =~ s/^\.\.\///; } } sub startHTML { local ($title) = @_; local ($value); $value = < $title DONE } sub endHTML { local ($value); $value = <

Version 2.2
Page last modified: 15 May 2002
DONE }