#!/usr/bin/perl # Copyright 2004 Vlado Keselj www.cs.dal.ca/~vlado sub help { print <<"#EOT" } # Extract Folder MsgId pairs from a list of directories or folders, version $VERSION # # Each pair is extracted to a line of text of the form # "foldermsgid", where is the tab character. # For messages that do not have a Message-Id header the output line # looks like "folder$NoId". The first message in a folder that looks like # "folder internal data" (e.g., created by pine) is ignored. # # Uses: formail Unix utility # # Usage: extract-folder-msgid [switches] [directories] # -h Print help and exit. # -v Print version of the program and exit. #EOT use POSIX qw(tmpnam); use strict; use vars qw( $VERSION $Folder $NoId); #$VERSION = sprintf "%d.%d", q$Revision: 1.2 $ =~ /(\d+)/g; $VERSION = '1.2'; $NoId = ' --NoId--'; use Getopt::Std; use vars qw($opt_v $opt_h ); getopts("vh"); if ($opt_v) { print "$VERSION\n"; exit; } elsif ($opt_h || !@ARGV) { &help(); exit; } $| = 1; &go_recursive(@ARGV); sub go_recursive { while ($#_ > -1) { my $dir = shift; next if -l $dir || !-e $dir; # symbolic link or does not exist: ignore it if (not -d $dir) { # a file &process_folder($dir); next; } local ($_, *DIR); # recursively enter directory opendir(DIR, $dir) || die "can't opendir $dir: $!"; map { /^\.\.?$/ ? '' : (&go_recursive("$dir/$_")) } readdir(DIR); closedir(DIR); } } sub process_folder { my $folder = $Folder = shift; return if -z $folder; # ignore 0-size files my $tmpdir = tmpnam(); mkdir $tmpdir, 0700 or die "can't mkdir $tmpdir: $!"; $ENV{'FILENO'} = '0'; `formail -s sh -c 'cat -> $tmpdir/\$FILENO' < $folder`; my $msg = getfile("$tmpdir/0"); # get the first message if ($msg !~ /^From\ MAILER-DAEMON\ (.|\n)* # Let's guess if this \nSubject:.*FOLDER\ INTERNAL\ DATA # should be ignored (.|\n)*\n\n(?-x:This text is part of the internal format ) (?-x:of your mail folder, and is not\s+a real message\.)/x) { &process_msg($msg) } my @rmfiles = ("$tmpdir/0"); for (my $counter = 1; -e "$tmpdir/$counter"; ++$counter) { $msg = getfile("$tmpdir/$counter"); push @rmfiles, "$tmpdir/$counter"; &process_msg($msg); } unlink(@rmfiles); rmdir $tmpdir or die "rmdir $tmpdir: $!"; } sub process_msg { my $msg = shift; my $hdrs = $msg; $hdrs = $` if $hdrs =~ /\n\n/; if ($hdrs =~ /^Message-Id:(.*)/mi) { my $msgid = $1; my $tmp = $`.$'; die if $tmp =~ /^Message-Id:/i; $msgid =~ s/\s+//g; print "$Folder\t$msgid\n"; } else { print "$Folder\t$NoId\n" } } sub getfile($) { my $f = shift; local *F; open(F, "<$f") or die "getfile:cannot open $f:$!"; my @r = ; close(F); return wantarray ? @r : join ('', @r); }