#!/usr/bin/perl -s # $Id: vdelatex 376 2012-10-29 12:44:54Z vlado $ # Copyright 2001-2012 Vlado Keselj web.cs.dal.ca/~vlado use strict; use vars qw($VERSION); $VERSION = sprintf "1.%d", q$Revision: 376 $ =~ /(\d+)/g; use vars qw($AllowedWordsFile $tmpFile $TmpAFile $inBraces $inBrackets @NonSpellCommands @IgnoreRegs $VerbatimFlag $v $h ); if ($v) { print "$VERSION\n"; exit; } if ($h) { &help(); exit; } sub help { print <<"#EOT" } # vdelatex Version $VERSION # Perl script for spell-checking LaTeX and TeX files. # uses spell, sort, and comm # # By default, the file 'allowedWords' is used as a list of additional # allowed words. # # Usage: tree [switches] [directories] # -h Print help and exit. # -AllowedWordsFile=file use file 'file' for additional allowed words # instead of 'allowedWords' # -v Print version of the program and exit. # # Examples: # --------- # vdelatex text.tex ... # vdelatex -AllowedWordsFile=file text.latex t1.tex # # Command examples within the file: # --------------------------------- # %!vdelatex:allow:some_word_or_regex # %!vdelatex:allow:list,of,words # %!vdelatex:define non-spell command:\\att # %!vdelatex:ignore the rest # %!vdelatex:ignore this line # %!vdelatex:ignore this word # %!vdelatex:ignore begin # %!vdelatex:ignore end #EOT if ( ! defined($AllowedWordsFile) and -f 'allowedWords' ) { $AllowedWordsFile = 'allowedWords'; } elsif (defined($AllowedWordsFile) and ! -f $AllowedWordsFile) { die "no file: $AllowedWordsFile"; } $tmpFile = 'tmp.vdelatex'; $TmpAFile = "/tmp/vdelatex.$$.TmpAFile"; open(I,">$tmpFile") or die "vdelatex:cannot open $tmpFile for writing:"; $inBraces ="\\{[^\\}]*\\}"; $inBrackets="\\[[^\\]]*\\]"; @NonSpellCommands = qw(bibitem bibliography label pageref ref); @IgnoreRegs = ( '\\b(?:http|ftp):\\/\\/\S+' ); sub addIgnore { my $w = shift; $w = "\Q$w"; $w =~ s/^(\w)/\\b$1/; $w =~ s/(\w)$/$1\\b/; push @IgnoreRegs, $w; } if ( defined($AllowedWordsFile) ) { open(A, $AllowedWordsFile) or die; open(T, "| sort -u > $TmpAFile") or die "vdelatex:cannot open $TmpAFile for writing:"; while() { chomp; if (/[-{}\\@.]/) { addIgnore($_) } else { if (/^([a-z])([a-z -]*)$/) { print T "\U$1\L$2\n" } print T "$_\n"; } } close(A); close(T); } # to handle something like this in regexes: # cs.dal.ca # vlado@cs.dal.ca @IgnoreRegs = sort {length($b) <=> length($a)} @IgnoreRegs; $VerbatimFlag = ''; MAINLOOP: while(<>) { if (/\{[^}]*$/) { $_ .= <> } my $vdelatexflag = ( index($_, "%!vdelatex:") > -1 ); if ($vdelatexflag) { if (/^%!vdelatex:ignore the rest\s*$/) { while (<>) { } $_ = ''; last MAINLOOP; } if (/^%!vdelatex:define non-spell command:\\(\w+)/) { push @NonSpellCommands, $1; $_ = "\n"; } elsif (/^%!vdelatex:allow:/) { my $w = $'; chomp $w; if (index($w,',') > -1) { local $_; foreach (split(/,/,$w)) { addIgnore($_) } } else {addIgnore($w) } $_ = "\n"; } if (/%!vdelatex:ignore begin\b/) { my $keep = "$`\n"; my $nestingCount = 1; $_ = $'; while (/%!vdelatex:ignore (begin|end)\b/) { if ($1 eq 'begin') { ++$nestingCount } else { -- $nestingCount } $_ = $'; last if $nestingCount == 0; } if ($nestingCount > 0) { while (<>) { while (/%!vdelatex:ignore (begin|end)\b/) { if ($1 eq 'begin') { ++$nestingCount } else { -- $nestingCount } $_ = $'; last if $nestingCount == 0; } last if $nestingCount == 0; } } die "no %!vdelatex:ignore end" unless $nestingCount == 0; $_ = $keep.$_; } s/^.*%!vdelatex:ignore this line *\r?$/ &clean($&) /mge; } if ($VerbatimFlag && /\\end\{verbatim\}/) { $VerbatimFlag = '' } if (!$VerbatimFlag && /\\begin\{verbatim\}/) { $VerbatimFlag = 1 } if ($VerbatimFlag) { s/\$/ /g; next MAINLOOP; } if (/\\begin\{eqnarray\*\}/) { my $keep = "$` "; $_ = $'; while (! /\\end\{eqnarray\*\}/) { $_ = <>; die if $_ eq ''; } /\\end\{eqnarray\*\}/; $_ = "$keep $'"; } $vdelatexflag = ( index($_, "%!vdelatex:") > -1 ); if ($vdelatexflag) { s/\S+ *%!vdelatex:ignore this word *$//; } s/%.*$//; goto LINE_DONE if /^\s*$/; s/\\-//g; # remove \- s/\\[>=_&]/ /g; { my $w; foreach $w (@IgnoreRegs) { s/$w/ /g; } } s/^.*\\kill$/ /gim; s/\\\"//g; s/\\char"[0-9a-fA-F][0-9a-fA-F]/ /g; s/\\rule{[^{}]*}{[^{}]*}//g; s/\\(raise|m?kern) *[-0-9.]+(pt|mu)//g; s/\\begin\{(?:array|tabular)\}(?:\[[^\]]*\])?\{[rcl|]+\}//g; # Command taking one non-spell argument s/\\(psfig|(?:new)?pagestyle|bibliographystyle|include)$inBraces//g; s/\\(?:alph|arabic|include|input)$inBraces//g; s/\\(?:includegraphics)(?:$inBrackets)?$inBraces//g; s/\\(?:begin|end|vspace|cite|nocite|citentry|roman)$inBraces/ /g; s/\\(?:special)$inBraces/ /g; s/\\(?:re)?new(?:counter|command|environment)$inBraces/ /g; s/\\hspace\*?\{[^\}]*\}//g; { my $c; foreach $c (@NonSpellCommands) { if (index($_, $c) > -1) { s/\\$c$inBraces//g; } } } s/\\usepackage(?:$inBrackets)?$inBraces//g; s/\\makebox(?:$inBrackets){0,2}$inBraces//g; # two nonspell arguments s/\\(?:fontsize|newtheorem|addtocounter|setcounter|setlength)$inBraces$inBraces/ /g; s/ et al\./ /g; s/\\documentstyle\[[^\]]+\]/ /g; s/\\documentclass($inBrackets)?$inBraces//g; s/\\parindent=[0-9]+pt/ /g; s/{[0-9.]+(cm|pt|mm)}//g; s/\\\\(\[[^\]]+\])?/ /g; s/\\[a-zA-Z]+/ /g; s/^\.//g; #s/\$.{1,5}\$/ /g; #s/~/ /g; s/\\'e/'e/g; s/\\`e/''BQe/g; #s/[{}]/ /g; s/\$/ /g; s/\\ / /g; s/\b/ /g; #s/\s*(\w+)\s*/ $1 /g; #s/^ */ /; #s/ *$/ /; LINE_DONE: print I; } close(I); if ( defined($AllowedWordsFile) ) { system("cat $tmpFile | spell | sort -u | comm -23 - $TmpAFile"); } else { system("cat $tmpFile | spell | sort -u"); } unlink($TmpAFile); # program end sub clean { local $_ = shift; s/\S/ /g; return $_; }