#!/usr/bin/perl
#
#
#Title : WebSAT_P2001
#File : websat_p2001_single.pl
#Author : Charles L. Sheppard
#
#
#WebSAT_P2001 is part of the NIST Web Metrics Testbed developed in the Visualization
#and Virtual Reality Group (VVRG) of the Information Technology Laboratory
#(ITL) at the National Institute of Standards and Technology (NIST). NIST is
#an agency of the US Government, and as such is not subject to copyright.
#The underlying HTML parsing engine for websat.pl is code developed by Jim
#Davis at Xerox Parc (jdavis@parc.xerox.com).
#
#Include necessary parameters to handle WEB input.
require ("cgi-lib.pl");
use LWP;
use URI::URL;
#Fills %in with key/value pairs corresponding to form fields###
&ReadParse;
#Initialize MIME response header
print "Content-type:text/html\n\n";
###Accept user input.###
#HTML version
$htmlver = $in{'html_version'};
#URL
$urldoc = $in{'urldoc'};
#Determine settings for categories
if ($in{'headinfo'}) { #Check for head information
$headanal{'hi1'} = 1;
$headanal{'hi2'} = 1;
$headanal{'hi31'} = 1;
$headanal{'hi32'} = 1;
$headanal{'hi33'} = 1;
$headanal{'hi34'} = 1;
$headanal{'hi35'} = 1;
$headanal{'hi36'} = 1;
$headanal{'hi37'} = 1;
$headanal{'hi4'} = 1;
} else {
$headanal{'hi1'} = 0;
$headanal{'hi2'} = 0;
$headanal{'hi31'} = 0;
$headanal{'hi32'} = 0;
$headanal{'hi33'} = 0;
$headanal{'hi34'} = 0;
$headanal{'hi35'} = 0;
$headanal{'hi36'} = 0;
$headanal{'hi37'} = 0;
$headanal{'hi4'} = 0;
}
if ($in{'bodyinfo'}) { #Check for body information
$bodyanal{'bi1.1'} = 1;
$bodyanal{'bi1.2'} = 1;
$bodyanal{'bi2'} = 1;
$bodyanal{'bi3'} = 1;
$bodyanal{'bi41'} = 1;
$bodyanal{'bi42'} = 1;
$bodyanal{'bi43'} = 1;
$bodyanal{'bi44'} = 1;
$bodyanal{'bi45'} = 1;
$bodyanal{'bi46'} = 1;
$bodyanal{'bi47'} = 1;
$bodyanal{'bi48'} = 1;
$bodyanal{'bi5'} = 1;
$bodyanal{'bi6'} = 1;
$bodyanal{'bi7'} = 1;
$bodyanal{'bi8'} = 1;
$bodyanal{'bi9'} = 1;
$bodyanal{'bi10'} = 1;
$bodyanal{'bi11'} = 1;
$bodyanal{'bi12'} = 1;
$bodyanal{'bi13'} = 1;
$bodyanal{'bi14'} = 1;
} else {
$bodyanal{'bi1.1'} = 0;
$bodyanal{'bi1.2'} = 0;
$bodyanal{'bi2'} = 0;
$bodyanal{'bi3'} = 0;
$bodyanal{'bi41'} = 0;
$bodyanal{'bi42'} = 0;
$bodyanal{'bi43'} = 0;
$bodyanal{'bi44'} = 0;
$bodyanal{'bi45'} = 0;
$bodyanal{'bi46'} = 0;
$bodyanal{'bi47'} = 0;
$bodyanal{'bi48'} = 0;
$bodyanal{'bi5'} = 0;
$bodyanal{'bi6'} = 0;
$bodyanal{'bi7'} = 0;
$bodyanal{'bi8'} = 0;
$bodyanal{'bi9'} = 0;
$bodyanal{'bi10'} = 0;
$bodyanal{'bi11'} = 0;
$bodyanal{'bi12'} = 0;
$bodyanal{'bi13'} = 0;
$bodyanal{'bi14'} = 0;
}
#Remove whitespaces and verify legitimate URL entry.
$urldoc =~ s/\s//g;
if ($urldoc =~ m|^(?:(?:http:)?//)?[-a-zA-Z_.0-9]+(?::[0-9]+)?(?:/[-=~:a-zA-Z\$_@!%^&*().0-9+?]*)*$|) {
#insure that optional "http://" is there in the end ###
$urldoc = "http://" . $urldoc unless ($urldoc =~ /^http:\/\//);
#Define parameters needed to capture URL document.
###Instantiate a UserAgent
$ua = new LWP::UserAgent;
#You might want to add proxy lines similar to the following for
#intranet usage.
#
#$ua->proxy(['http'], 'http://proxy.<2n level domain>.<1st level domain>:
###the
\n");
}
#
# Generate the resulting analysis.
#
print &startHTML("Resulting IEEE Best Practice Analysis");
print ("
Category | \n"; print "Feature | \n"; print "Analysis | \n"; print "|||
---|---|---|---|---|---|
Header Information | \n"; print "Document Type Declaration | \n"; } if ($headanal{'hi1'} && !$has_doctype) { print "* Not Used. | \n"; } elsif ($headanal{'hi1'} && $has_doctype) { print "* Used. | \n"; } if ($headanal{'hi1'}) { print "||
Title | \n"; } if ($headanal{'hi2'} && !$has_title) { print "* Not Used. | \n"; } elsif ($headanal{'hi2'} && $has_title) { print "* Used. | \n"; } if ($headanal{'hi2'}) { print "|||
Metadata | \n"; print "\n";
}
#No use of description attribute.
if ($headanal{'hi31'} && !$has_meta_description) {
print " * Description attribute - Not Used.\n"; } elsif ($headanal{'hi31'} && $has_meta_description) { print " * Description attribute - Used.\n"; } # #No use of keywords attribute. if ($headanal{'hi32'} && !$has_meta_keywords) { print " * Keywords attribute - Not Used.\n"; } elsif ($headanal{'hi32'} && $has_meta_keywords) { print " * Keywords attribute - Used.\n"; } # #No use of Dublin Core attributes. if ($headanal{'hi33'} && !$uses_dublin_core) { print " * Dublin Core attributes - Not Used.\n"; } elsif ($headanal{'hi33'} && $uses_dublin_core) { print " * Dublin Core attributes - Used.\n"; } # #No use of the content selection attribute. if ($headanal{'hi34'} && !$has_meta_content_sel) { print " * Content selection attribute - Not Used.\n"; } elsif ($headanal{'hi34'} && $has_meta_content_sel) { print " * Content selection attribute - Used.\n"; } # #No use of the robot exclusion attribute. if ($headanal{'hi35'} && !$has_meta_robots) { print " * Robot exclusion attribute - Not Used.\n"; } elsif ($headanal{'hi35'} && $has_meta_robots) { print " * Robot exclusion attribute - Used.\n"; } # #No use of the lang attribute. if ($headanal{'hi37'} && !$has_meta_lang) { print " * Human language attribute - Not Used.\n"; } elsif ($headanal{'hi37'} && $has_meta_lang) { print " * Human language attribute - Used.\n"; } if ($headanal{'hi31'} || $headanal{'hi32'} || $headanal{'hi33'} || $headanal{'hi34'}|| $headanal{'hi35'} || $headanal{'hi36'} || $headanal{'hi37'}) { print " | \n";
print "||||
Body Information | \n"; print "Intellectual property rights | \n"; print "\n";
}
#
#No use of a copyright attribute.
if ($bodyanal{'bi1.1'} && !$has_copyright) {
print " * Copyright information - Not Used.\n"; } elsif ($bodyanal{'bi1.1'} && $has_copyright) { print " * Copyright information - Used.\n"; } # #No use of a trademark attribute. if ($bodyanal{'bi1.2'} && !$has_trademark) { print " * Trademark information - Not Used.\n"; } elsif ($bodyanal{'bi1.2'} && $has_trademark) { print " * Trademark information - Used.\n"; } if ($bodyanal{'bi1.1'} || $bodyanal{'bi1.2'}) { print " | \n";
print "|||
Security designations | \n"; print "\n";
}
#
#No use of the security designation value.
if ($bodyanal{'bi2'} && !$has_secure_des && !($htmlver eq "2.0")) {
print " * Not Used.\n"; } elsif ($bodyanal{'bi2'} && $has_secure_des && !($htmlver eq "2.0")) { print " * Used.\n"; } if ($bodyanal{'bi2'} && !($htmlver eq "2.0")) { print " | \n";
print "||||
Dates | \n"; print "\n";
$no_date_info = 0;
}
#
#Use of a page date value.
if ($bodyanal{'bi3'} && $has_page_date && !($htmlver eq "2.0")) {
print " * Used a most recent change date.\n"; $no_date_info = 1; } # #Use of a modified date value. if ($bodyanal{'bi3'} && $has_mod_date && !($htmlver eq "2.0")) { print " * Used a modified date.\n"; $no_date_info = 1; } # #Use of a content date value. if ($bodyanal{'bi3'} && $has_content_date && !($htmlver eq "2.0")) { print " * Used a content date.\n"; $no_date_info = 1; } # #Use of a next update value. if ($bodyanal{'bi3'} && $has_nextup_date && !($htmlver eq "2.0")) { print " * Used a next update.\n"; $no_date_info = 1; } # #Use of an expiration date value. if ($bodyanal{'bi3'} && $has_expire_date && !($htmlver eq "2.0")) { print " * Used an expiration date.\n"; $no_date_info = 1; } # #No date information. if ($bodyanal{'bi3'} && !$no_date_info && !($htmlver eq "2.0")) { print " * Not Used.\n"; $no_date_info = 1; } if ($bodyanal{'bi3'} && !($htmlver eq "2.0")) { print " | \n";
print "||||
International considerations | \n"; print "\n";
}
#
#No use of an international phone number.
if ($bodyanal{'bi41'} && !$has_phone && !($htmlver eq "2.0")) {
print " * Phone numbers - Not Used.\n"; } elsif ($bodyanal{'bi41'} && $has_phone && !($htmlver eq "2.0")) { print " * Phone numbers - Used.\n"; } # #No use of an international holiday. if ($bodyanal{'bi43'} && !$has_holiday && !($htmlver eq "2.0")) { print " * Holidays - Not Used.\n"; } elsif ($bodyanal{'bi43'} && $has_holiday && !($htmlver eq "2.0")) { print " * Holidays - Used.\n"; } # #No use of an international place of origin. if ($bodyanal{'bi44'} && !$has_origin && !($htmlver eq "2.0")) { print " * Place of origin - Not Used.\n"; } elsif ($bodyanal{'bi44'} && $has_origin && !($htmlver eq "2.0")) { print " * Place of origin - Used.\n"; } # #No use of an international language. if ($bodyanal{'bi45'} && !$has_lang && !($htmlver eq "2.0")) { print " * Language - Not Used.\n"; } elsif ($bodyanal{'bi45'} && $has_lang && !($htmlver eq "2.0")) { print " * Language - Used.\n"; } if (($bodyanal{'bi41'} || $bodyanal{'bi42'} || $bodyanal{'bi43'} || $bodyanal{'bi44'} || $bodyanal{'bi45'} || $bodyanal{'bi46'} || $bodyanal{'bi47'} || $bodyanal{'bi48'}) && !($htmlver eq "2.0")) { print " | \n";
print "||||
Bandwidth efficiencies | \n"; print "\n";
}
#
#Potential download problems do to poor links that will generate poor
#bandwidth efficiences.
if ($bodyanal{'bi5'} && $pdownload_prob && !($htmlver eq "2.0")) {
if ($pdownload_prob > 1) {
print " * There are $pdownload_prob links to items larger than 35000 bytes.";} else {print " * There is $pdownload_prob link to items larger than 35000 bytes.";} } elsif ($bodyanal{'bi5'} && !$pdownload_prob && !($htmlver eq "2.0")) { print " * No problem with links to large items."; } if ($bodyanal{'bi5'} && !($htmlver eq "2.0")) { print " | \n";
print "||||
Navigation aids | \n"; print "\n";
$no_nav_prob = 0;
}
#
#No use of navigational aid supporting a future link on H1.
if ($bodyanal{'bi6'} && $no_h1_navigate && ($htmlver eq "4.0")) {
print " * Found $no_h1_navigate H1 tags without ID attribute for future links. \n"; $no_nav_prob = 1; } # #No use of navigational aid supporting a future link on H2. if ($bodyanal{'bi6'} && $no_h2_navigate && ($htmlver eq "4.0")) { print "* Found $no_h2_navigate H2 tags without ID attribute for future links. \n"; $no_nav_prob = 1; } # #No use of navigational aid supporting a future link on H3. if ($bodyanal{'bi6'} && $no_h3_navigate && ($htmlver eq "4.0")) { print "* Found $no_h3_navigate H3 tags without ID attribute for future links. \n"; $no_nav_prob = 1; } # #No use of navigational aid supporting a future link on H4. if ($bodyanal{'bi6'} && $no_h4_navigate && ($htmlver eq "4.0")) { print "* Found $no_h4_navigate H4 tags without ID attribute for future links. \n"; $no_nav_prob = 1; } # #No use of navigational aid supporting a future link on H5. if ($bodyanal{'bi6'} && $no_h5_navigate && ($htmlver eq "4.0")) { print "* Found $no_h5_navigate H5 tags without ID attribute for future links. \n"; $no_nav_prob = 1; } # #No use of navigational aid supporting a future link on H6. if ($bodyanal{'bi6'} && $no_h6_navigate && ($htmlver eq "4.0")) { print "* Found $no_h6_navigate H6 tags without ID attribute for future links. \n"; $no_nav_prob = 1; } # #No use of navigational aid supporting a future link for author or point of contact. if ($bodyanal{'bi6'} && !$no_mailto) { print "* Found no mailto link for author or other point of contact. \n"; $no_nav_prob = 1; } if ($bodyanal{'bi6'} && !$no_nav_prob) { print "* No navigation problems. \n"; } if ($bodyanal{'bi6'} && ($htmlver eq "4.0")) { print " | \n";
print "||||
Active links | \n"; print "\n";
}
#
#Dead links.
if ($bodyanal{'bi7'} && $invalid_link) {
print " * Found $invalid_link inactive link(s). \n"; } elsif ($bodyanal{'bi7'} && !$invalid_link) { print "* No inactive links. \n"; } if ($bodyanal{'bi7'}) { print " | \n";
print "||||
Absolute and relative links | \n"; print "\n";
}
#
#Absolute and Relative links check.
if ($bodyanal{'bi8'} && $absolute_link) {
print " * Found $absolute_link absolute (persistent) link(s). \n"; } if ($bodyanal{'bi8'} && $relative_link) { if ($base_tag) { print "* Found $relative_link relative link(s). \n"; } else { print "* Found $relative_link relative link(s)without the use of a BASE tag. \n"; } } if ($bodyanal{'bi8'}) { print " | \n";
print "||||
Encapsulation and frames | \n"; print "\n";
$encap_frame = 0;
}
#
#Encapsulation and Frames.
if ($bodyanal{'bi10'} && !$encap) {
print " * Found no protection against encapsulation. \n"; $encap_frame = 1; } if ($bodyanal{'bi10'} && $frame && !$noframes) { print "* Found frames usage without specified noframes. \n"; $encap_frame = 1; } elsif ($bodyanal{'bi10'} && $frame && $noframes) { print "* Found usage of both frames and noframes. \n"; $encap_frame = 1; } if ($bodyanal{'bi10'} && !$encap_frame) { print "* Found no problems. \n"; } if ($bodyanal{'bi10'}) { print " | \n";
print "||||
Graphical images | \n"; print "\n";
$graphics = 0;
}
#
#Graphical Images.
if ($bodyanal{'bi11'} && $no_height_width) {
if ($no_height_width > 1) {
print " * Found $no_height_width image(s) without height and width specifications. \n"; $graphics = 1; } } $img_without_alt = $img_count - $IMG_with_ALT; if ($bodyanal{'bi11'} && $img_without_alt) { if ($img_without_alt > 1) { print "* Found $img_without_alt image(s) without an ALT tag. \n"; $graphics = 1; } } if ($bodyanal{'bi11'} && !$graphics) { print "* No potential problems. \n"; } if ($bodyanal{'bi11'}) { print " | \n";
print "||||
Deprecated HTML elements and attributes | \n"; print "\n";
$deprecates = 0;
}
#
#Use of deprecated HTML elements and attributes.
if ($bodyanal{'bi12'} && ($htmlver eq "4.0") && $deprecated_element) {
print "* Found $deprecated_element deprecated element(s).\n";
print "
* Found $deprecated_attribute deprecated attribute(s). \n"; print "
| \n";
print "||||
Physical location information | \n"; print "\n";
$locat_info = 0;
}
#
#Physical location information.
if ($bodyanal{'bi13'} && !$has_longitude && !$has_latitude && !$has_cross_street) {
print " * Not Used. \n"; }elsif ($bodyanal{'bi13'} && (($has_longitude && $has_latitude) || $has_cross_street)) { if ($has_longitude && $has_latitude) { if ($has_cross_street) { print "* Used. \n"; } else { print "* Uses longitude and latitude. \n"; } } elsif ($has_cross_street) { print "* Uses cross street. \n"; } } if ($bodyanal{'bi13'}) { print " | \n";
print "||||
Server Technology Independence | \n"; print "\n";
$locat_info = 0;
}
#
#Server Technology Independence.
if ($bodyanal{'bi14'} && $uses_rel_ref) {
print " * Not used in $uses_rel_ref case(s). \n"; } elsif ($bodyanal{'bi14'} && !$uses_rel_ref) { print "* Maintained. \n"; } if ($bodyanal{'bi14'}) { print " | \n";
print "
Version 2.2 Page last modified: 15 May 2002 |