#!/usr/bin/perl -w00 #++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ # # Copyright (c) 1997-2002 James Blustein # # TITLE: insert-link-track-calls.pl (insert-calls.pl for short) # AUTHOR: J. Blustein (see below for credit to # basic source) # CREATED: 17 Nov 1997 # LAST MODIFIED: 15 Sep 2002 (v3.0.2a) # # PURPOSE: Replaces HTML HREF anchors (anchors with outgoing links) with # calls to the JavaScript function used with the link-tracker.pl # script to record which links readers follow. # # NOTES: The HTML source code must be valid (according to the HTML 3.2 # standard or below) and abide by the following additional three # restrictions: # (a) no comments inside of anchor elements # (b) no occurance of either ` # on 17 November 1997 # #-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- require 5.004; use CGI qw(escape); use strict; use File::Basename; use constant VERSION => 'insert-calls.pl 3.0.2a (02 Jun 1999)'; use constant DEBUG_OUTPUT => 0; # 0 is off use constant VERBOSE => 1; use constant CALL_CGI_DIRECTLY => 0; # call JavaScript iff 0, # call CGI otherwise use constant JSPROG => "go"; # go(HREF, NAME, TITLE) use constant CGIPROG => "/cgi-bin/tracker"; use constant DOC => "1"; use constant CGI_SEP => "&"; my($PROG) = basename($0); BEGIN { print STDERR "This is `" . VERSION . "'\n"; } #++ # Constants # CGIPROG: The CGI program to call when a link is followed. # The NAME, HREF, and TITLE of the referring anchor will be # passed to the CGI program # # DOC: Something that can be resolved to the address (URL) of the # document into which the CGI calls are being inserted. This # is needed to resolve all the HREFs. You could use the entire # URL but that would take much longer to transmit than a short # identification number. # # CGI_SEP: Character that separates arguments to CGI program. # According to the specification a semicolon or an ampersnad # can be used (with a preference for a semicolon). I have to # use ampersand because the CGI.pm module doesn't work with # semicolons. #-- #++ # Global variables #-- my(%unique_name); # these values are used below to create a name for links # where no name is provided in the anchor #-- About %pattern and %tag -- # For my experiment I made several types of hypertext links (all of which # were coded in HTML). The NAME of the anchor included a label to identify # the link type. my %pattern; # identifying regexps for the major types of links my %tag; # if a link has no name then it will be assigned one # composed of its $tag and a unique number $tag{sem} = "S"; # S for semantic $tag{defn} = "D"; # D for definition $tag{struct} = "P"; # P for plain $tag{other} = "L"; # L for link $unique_name{sem} = 1; $unique_name{defn} = 1; $unique_name{struct} = 1; $unique_name{other} = 1; $pattern{sem} = '(?:' . join ('|', qw{ sent2sect s2sect sent2sent s2sent summary summ sent2para s2p s2para wg2sent } ) . ')'; $pattern{defn} = '(?:' . join ('|', qw{ defn:sys defn:term defn:abbr defn } ) . ')'; $pattern{struct} = '(?:' . join ('|', qw{ fn:back-jump list:LoF list:LoT list:ToC list:ToS list:jump list:localC struct struct:BottomUp struct:Footnote struct:LoF struct:LoT struct:Ref struct:Up2ToC struct:manual } ) . ')'; # METHOD: # If there is an anchor in this paragraph then grab the text before and # after the anchor and the parts of the anchor itself (for processing in # the while loop). Otherwise just print out the input line exactly as it # appears. while (<>) { if ( m{) # the rest of the tag # (.*?) # text after the tag up to (?= #-- copying the patterns matched into declared variables # Would split() do this better? # Would pointers help here? # This works and that's good enough for me now and it can be # improved in a future version $field[ 1] = $1; $field[ 2] = $2; $field[ 3] = $3; $field[ 4] = $4; $field[ 5] = $5; $field[ 6] = $6; $field[ 7] = $7; $field[ 8] = $8; $field[ 9] = $9; $field[10] = $10; $field[11] = $11; $field[12] = $12; $before=$field[ 1] if defined $field[ 1]; $rest =$field[11] if defined $field[11]; $after =$field[12] if defined $field[12]; #-- filling in the values of $href, $name, and $title #-- from the patterns matched above $href=""; $href=$field[ 4] if $field[ 2] =~ m/HREF/i; $href=$field[ 7] if defined $field[ 5] && $field[ 5] =~ m/HREF/i; $href=$field[10] if defined $field[ 8] && $field[ 8] =~ m/HREF/i; $name=""; $name=$field[ 4] if $field[ 2] =~ m/NAME/i; $name=$field[ 7] if defined $field[ 5] && $field[ 5] =~ m/NAME/i; $name=$field[10] if defined $field[ 8] && $field[ 8] =~ m/NAME/i; $title=""; $title=$field[ 4] if $field[ 2] =~ m/TITLE/i; $title=$field[ 7] if defined $field[ 5] && $field[ 5] =~ m/TITLE/i; $title=$field[10] if defined $field[ 8] && $field[ 8] =~ m/TITLE/i; # if there is an HREF then replace it with our CGI call # else print out the anchor unchanged (except possibly reordered) my($elements)=""; $elements .= "TITLE=\"$title\" " if "" ne $title; $elements .= "NAME=\"$name\" " if "" ne $name; if ("" eq $href) { $elements .= "$rest" if "" ne $rest; print "$before