1442 lines
52 KiB
Perl
Executable File
1442 lines
52 KiB
Perl
Executable File
#!/usr/bin/perl -w
|
|
#
|
|
# $Id$
|
|
#
|
|
# mb2md-3.20.pl Converts Mbox mailboxes to Maildir format.
|
|
#
|
|
# Public domain.
|
|
#
|
|
# currently maintained by:
|
|
# Juri Haberland <juri@koschikode.com>
|
|
# initially wrote by:
|
|
# Robin Whittle
|
|
#
|
|
# This script's web abode is http://batleth.sapienti-sat.org/projects/mb2md/ .
|
|
# For a changelog see http://batleth.sapienti-sat.org/projects/mb2md/changelog.txt
|
|
#
|
|
# The Mbox -> Maildir inner loop is based on qmail's script mbox2maildir, which
|
|
# was kludged by Ivan Kohler in 1997 from convertandcreate (public domain)
|
|
# by Russel Nelson. Both these convert a single mailspool file.
|
|
#
|
|
# The qmail distribution has a maildir2mbox.c program.
|
|
#
|
|
# What is does:
|
|
# =============
|
|
#
|
|
# Reads a directory full of Mbox format mailboxes and creates a set of
|
|
# Maildir format mailboxes. Some details of this are to suit Courier
|
|
# IMAP's naming conventions for Maildir mailboxes.
|
|
#
|
|
# http://www.inter7.com/courierimap/
|
|
#
|
|
# This is intended to automate the conversion of the old
|
|
# /var/spool/mail/blah file - with one call of this script - and to
|
|
# convert one or more mailboxes in a specifed directory with separate
|
|
# calls with other command line arguments.
|
|
#
|
|
# Run this as the user - in these examples "blah".
|
|
|
|
# This version supports conversion of:
|
|
#
|
|
# Date The date-time in the "From " line of the message in the
|
|
# Mbox format is the date when the message was *received*.
|
|
# This is transformed into the date-time of the file which
|
|
# contains the message in the Maildir mailbox.
|
|
#
|
|
# This relies on the Date::Parse perl module and the utime
|
|
# perl function.
|
|
#
|
|
# The script tries to cope with errant forms of the
|
|
# Mbox "From " line which it may encounter, but if
|
|
# there is something really screwy in a From line,
|
|
# then perhaps the script will fail when "touch"
|
|
# is given an invalid date. Please report the
|
|
# exact nature of any such "From " line!
|
|
#
|
|
#
|
|
# Flagged
|
|
# Replied
|
|
# Read = Seen
|
|
# Tagged for Deletion
|
|
#
|
|
# In the Mbox message, flags for these are found in the
|
|
# "Status: N" or "X-Status: N" headers, where "N" is 0
|
|
# or more of the following characters in the left column.
|
|
#
|
|
# They are converted to characters in the right column,
|
|
# which become the last characters of the file name,
|
|
# following the ":2," which indicates IMAP message status.
|
|
#
|
|
#
|
|
# F -> F Flagged
|
|
# A -> R Replied
|
|
# R -> S Read = Seen
|
|
# D -> T Tagged for Deletion (Trash)
|
|
#
|
|
# This is based on the work of Philip Mak who wrote a
|
|
# completely separate Mbox -> Maildir converter called
|
|
# perfect_maildir and posted it to the Mutt-users mailing
|
|
# list on 25 December 2001:
|
|
#
|
|
# http://www.mail-archive.com/mutt-users@mutt.org/msg21872.html
|
|
#
|
|
# Michael Best originally integrated those changes into mb2md.
|
|
#
|
|
#
|
|
# In addition, the names of the message files in the Maildir are of a
|
|
# regular length and are of the form:
|
|
#
|
|
# 7654321.000123.mbox:2,xxx
|
|
#
|
|
# Where "7654321" is the Unix time in seconds when the script was
|
|
# run and "000123" is the six zeroes padded message number as
|
|
# messages are converted from the Mbox file. "xxx" represents zero or
|
|
# more of the above flags F, R, S or T.
|
|
#
|
|
#
|
|
# ---------------------------------------------------------------------
|
|
#
|
|
#
|
|
# USAGE
|
|
# =====
|
|
#
|
|
# Run this as the user of the mailboxes, not as root.
|
|
#
|
|
#
|
|
# mb2md -h
|
|
# mb2md [-c] -m [-d destdir]
|
|
# mb2md [-c] -s sourcefile [-d destdir]
|
|
# mb2md [-c] -s sourcedir [-l wu-mailboxlist] [-R|-f somefolder] [-d destdir] [-r strip_extension]
|
|
#
|
|
# -c use the Content-Length: headers (if present) to find the
|
|
# beginning of the next message
|
|
# Use with caution! Results may be unreliable. I recommend to do
|
|
# a run without "-c" first and only use it if you are certain,
|
|
# that the mbox in question really needs the "-c" option
|
|
#
|
|
# -m If this is used then the source will
|
|
# be the single mailbox at /var/spool/mail/blah for
|
|
# user blah and the destination mailbox will be the
|
|
# "destdir" mailbox itself.
|
|
#
|
|
#
|
|
# -s source Directory or file relative to the user's home directory,
|
|
# which is where the the "somefolders" directories are located.
|
|
# Or if starting with a "/" it is taken as a
|
|
# absolute path, e.g. /mnt/oldmail/user
|
|
#
|
|
# or
|
|
#
|
|
# A single mbox file which will be converted to
|
|
# the destdir.
|
|
#
|
|
# -R If defined, do not skip directories found in a mailbox
|
|
# directory, but runs recursively into each of them,
|
|
# creating all wanted folders in Maildir.
|
|
# Incompatible with '-f'
|
|
#
|
|
# -f somefolder Directories, relative to "sourcedir" where the Mbox files
|
|
# are. All mailboxes in the "sourcedir"
|
|
# directory will be converted and placed in the
|
|
# "destdir" directory. (Typically the Inbox directory
|
|
# which in this instance is also functioning as a
|
|
# folder for other mailboxes.)
|
|
#
|
|
# The "somefolder" directory
|
|
# name will be encoded into the new mailboxes' names.
|
|
# See the examples below.
|
|
#
|
|
# This does not save an UW IMAP dummy message file
|
|
# at the start of the Mbox file. Small changes
|
|
# in the code could adapt it for looking for
|
|
# other distinctive patterns of dummy messages too.
|
|
#
|
|
# Don't let the source directory you give as "somefolders"
|
|
# contain any "."s in its name, unless you want to
|
|
# create subfolders from the IMAP user's point of
|
|
# view. See the example below.
|
|
#
|
|
# Incompatible with '-f'
|
|
#
|
|
#
|
|
# -d destdir Directory where the Maildir format directories will be created.
|
|
# If not given, then the destination will be ~/Maildir .
|
|
# Typically, this is what the IMAP server sees as the
|
|
# Inbox and the folder for all user mailboxes.
|
|
# If this begins with a '/' the path is considered to be
|
|
# absolute, otherwise it is relative to the users
|
|
# home directory.
|
|
#
|
|
# -r strip_ext If defined this extension will be stripped from
|
|
# the original mailbox file name before creating
|
|
# the corresponding maildir. The extension must be
|
|
# given without the leading dot ("."). See the example below.
|
|
#
|
|
# -l WU-file File containing the list of subscribed folders. If
|
|
# migrating from WU-IMAP the list of subscribed folders will
|
|
# be found in the file called .mailboxlist in the users
|
|
# home directory. This will convert all subscribed folders
|
|
# for a single user:
|
|
# /bin/mb2md -s mail -l .mailboxlist -R -d Maildir
|
|
# and for all users in a directory as root you can do the
|
|
# following:
|
|
# for i in *; do echo $i;su - $i -c "/bin/mb2md -s mail -l .mailboxlist -R -d Maildir";done
|
|
#
|
|
#
|
|
# Example
|
|
# =======
|
|
#
|
|
# We have a bunch of directories of Mbox mailboxes located at
|
|
# /home/blah/oldmail/
|
|
#
|
|
# /home/blah/oldmail/fffff
|
|
# /home/blah/oldmail/ggggg
|
|
# /home/blah/oldmail/xxx/aaaa
|
|
# /home/blah/oldmail/xxx/bbbb
|
|
# /home/blah/oldmail/xxx/cccc
|
|
# /home/blah/oldmail/xxx/dddd
|
|
# /home/blah/oldmail/yyyy/huey
|
|
# /home/blah/oldmail/yyyy/duey
|
|
# /home/blah/oldmail/yyyy/louie
|
|
#
|
|
# With the UW IMAP server, fffff and ggggg would have appeared in the root
|
|
# of this mail server, along with the Inbox. aaaa, bbbb etc, would have
|
|
# appeared in a folder called xxx from that root, and xxx was just a folder
|
|
# not a mailbox for storing messages.
|
|
#
|
|
# We also have the mailspool Inbox at:
|
|
#
|
|
# /var/spool/mail/blah
|
|
#
|
|
#
|
|
# To convert these, as user blah, we give the first command:
|
|
#
|
|
# mb2md -m
|
|
#
|
|
# The main Maildir directory will be created if it does not exist.
|
|
# (This is true of any argument options, not just "-m".)
|
|
#
|
|
# /home/blah/Maildir/
|
|
#
|
|
# It has the following subdirectories:
|
|
#
|
|
# /home/blah/Maildir/tmp/
|
|
# /home/blah/Maildir/new/
|
|
# /home/blah/Maildir/cur/
|
|
#
|
|
# Then /var/spool/blah file is read, split into individual files and
|
|
# written into /home/blah/Maildir/cur/ .
|
|
#
|
|
# Now we give the second command:
|
|
#
|
|
# mb2md -s oldmail -R
|
|
#
|
|
# This reads recursively all Mbox mailboxes and creates:
|
|
#
|
|
# /home/blah/Maildir/.fffff/
|
|
# /home/blah/Maildir/.ggggg/
|
|
# /home/blah/Maildir/.xxx/
|
|
# /home/blah/Maildir/.xxx.aaaa/
|
|
# /home/blah/Maildir/.xxx.bbbb/
|
|
# /home/blah/Maildir/.xxx.cccc/
|
|
# /home/blah/Maildir/.xxx.aaaa/
|
|
# /home/blah/Maildir/.yyyy/
|
|
# /home/blah/Maildir/.yyyy.huey/
|
|
# /home/blah/Maildir/.yyyy.duey/
|
|
# /home/blah/Maildir/.yyyy.louie/
|
|
#
|
|
# The result, from the IMAP client's point of view is:
|
|
#
|
|
# Inbox -----------------
|
|
# |
|
|
# | fffff -----------
|
|
# | ggggg -----------
|
|
# |
|
|
# - xxx -------------
|
|
# | | aaaa --------
|
|
# | | bbbb --------
|
|
# | | cccc --------
|
|
# | | dddd --------
|
|
# |
|
|
# - yyyy ------------
|
|
# | huey -------
|
|
# | duey -------
|
|
# | louie ------
|
|
#
|
|
# Note that although ~/Maildir/.xxx/ and ~/Maildir/.yyyy may appear
|
|
# as folders to the IMAP client the above commands to not generate
|
|
# any Maildir folders of these names. These are simply elements
|
|
# of the names of other Maildir directories. (if you used '-R', they
|
|
# whill be able to act as normal folders, containing messages AND folders)
|
|
#
|
|
# With a separate run of this script, using just the "-s" option
|
|
# without "-f" nor "-R", it would be possible to create mailboxes which
|
|
# appear at the same location as far as the IMAP client is
|
|
# concerned. By having Mbox mailboxes in some directory:
|
|
# ~/oldmail/nnn/ of the form:
|
|
#
|
|
# /home/blah/oldmail/nn/xxxx
|
|
# /home/blah/oldmail/nn/yyyyy
|
|
#
|
|
# then the command:
|
|
#
|
|
# mb2md -s oldmail/nn
|
|
#
|
|
# will create two new Maildirs:
|
|
#
|
|
# /home/blah/Maildir/.xxx/
|
|
# /home/blah/Maildir/.yyyy/
|
|
#
|
|
# Then what used to be the xxx and yyyy folders now function as
|
|
# mailboxes too. Netscape 4.77 needed to be put to sleep and given ECT
|
|
# to recognise this - deleting the contents of (Win2k example):
|
|
#
|
|
# C:\Program Files\Netscape\Users\uu\ImapMail\aaa.bbb.ccc\
|
|
#
|
|
# where "uu" is the user and "aaa.bbb.ccc" is the IMAP server
|
|
#
|
|
# I often find that deleting all this directory's contents, except
|
|
# "rules.dat", forces Netscape back to reality after its IMAP innards
|
|
# have become twisted. Then maybe use File > Subscribe - but this
|
|
# seems incapable of subscribing to folders.
|
|
#
|
|
# For Outlook Express, select the mail server, then click the
|
|
# "IMAP Folders" button and use "Reset list". In the "All"
|
|
# window, select the mailboxes you want to see in normal
|
|
# usage.
|
|
#
|
|
#
|
|
# This script did not recurse subdirectories or delete old mailboxes, before addition of the '-R' parameter :)
|
|
#
|
|
# Be sure not to be accessing the Mbox mailboxes while running this
|
|
# script. It does not attempt to lock them. Likewise, don't run two
|
|
# copies of this script either.
|
|
#
|
|
#
|
|
# Trickier usage . . .
|
|
# ====================
|
|
#
|
|
# If you have a bunch of mailboxes in a directory ~/oldmail/doors/
|
|
# and you want them to appear in folders such as:
|
|
#
|
|
# ~/Maildir/.music.bands.doors.Jim
|
|
# ~/Maildir/.music.bands.doors.John
|
|
#
|
|
# etc. so they appear in an IMAP folder:
|
|
#
|
|
# Inbox -----------------
|
|
# | music
|
|
# | bands
|
|
# | doors
|
|
# | Jim
|
|
# | John
|
|
# | Robbie
|
|
# | Ray
|
|
#
|
|
# Then you could rename the source directory to:
|
|
#
|
|
# ~/oldmail/music.bands.doors/
|
|
#
|
|
# then use:
|
|
#
|
|
# mb2md -s oldmail -f music.bands.doors
|
|
#
|
|
#
|
|
# Or simply use '-R' switch with:
|
|
# mb2md -s oldmail -R
|
|
#
|
|
#
|
|
# Stripping mailbox extensions:
|
|
# =============================
|
|
#
|
|
# If you want to convert mailboxes that came for example from
|
|
# a Windows box than you might want to strip the extension of
|
|
# the mailbox name so that it won't create a subfolder in your
|
|
# mail clients view.
|
|
#
|
|
# Example:
|
|
# You have several mailboxes named Trash.mbx, Sent.mbx, Drafts.mbx
|
|
# If you don't strip the extension "mbx" you will get the following
|
|
# hierarchy:
|
|
#
|
|
# Inbox
|
|
# |
|
|
# - Trash
|
|
# | | mbx
|
|
# |
|
|
# - Sent
|
|
# | | mbx
|
|
# |
|
|
# - Drafts
|
|
# | mbx
|
|
#
|
|
# This is more than ugly!
|
|
# Just use:
|
|
# mb2md -s oldmail -r mbx
|
|
#
|
|
# Note: don't specify the dot! It will be stripped off
|
|
# automagically ;)
|
|
#
|
|
#------------------------------------------------------------------------------
|
|
|
|
use strict;
|
|
use Getopt::Std;
|
|
use Date::Parse;
|
|
use IO::Handle;
|
|
use Fcntl;
|
|
|
|
my $DEBUG = ''; # set it to 1 to enable debug messages
|
|
|
|
# print the usage message
|
|
sub usage() {
|
|
print "Usage:\n";
|
|
print " mb2md -h\n";
|
|
print " mb2md [-c] -m [-d destdir]\n";
|
|
print " mb2md [-c] -s sourcefile [-d destdir]\n";
|
|
die " mb2md [-c] -s sourcedir [-l wu-mailboxlist] [-R|-f somefolder] [-d destdir] [-r strip_extension]\n";
|
|
}
|
|
# get options
|
|
my %opts;
|
|
getopts('d:f:chms:r:l:R', \%opts) || usage();
|
|
usage() if ( defined($opts{h})
|
|
|| (!defined($opts{m}) && !defined($opts{s})) );
|
|
|
|
# Get uid, username and home dir
|
|
my ($name, $passwd, $uid, $gid, $quota, $comment, $gcos, $homedir, $shell) = getpwuid($<);
|
|
|
|
# Get arguments and determine source
|
|
# and target directories.
|
|
my $mbroot = undef; # this is the base directory for the mboxes
|
|
my $mbdir = undef; # this is an mbox dir relative to the $mbroot
|
|
my $mbfile = undef; # this is an mbox file
|
|
my $dest = undef;
|
|
my $strip_ext = undef;
|
|
my $use_cl = undef; # defines whether we use the Content-Length: header if present
|
|
|
|
# if option "-c" is given, we use the Content-Length: header if present
|
|
# dangerous! may be unreliable, as the whole CL stuff is a bad idea
|
|
if (defined($opts{c}))
|
|
{
|
|
$use_cl = 1;
|
|
} else {
|
|
$use_cl = 0;
|
|
}
|
|
|
|
# first, if the user has gone the -m option
|
|
# we simply convert their mailfile
|
|
if (defined($opts{m}))
|
|
{
|
|
if (defined($ENV{'MAIL'})) {
|
|
$mbfile = $ENV{'MAIL'};
|
|
} elsif ( -f "/var/spool/mail/$name" ) {
|
|
$mbfile = "/var/spool/mail/$name"
|
|
} elsif ( -f "/var/mail/$name" ) {
|
|
$mbfile = "/var/mail/$name"
|
|
} else {
|
|
die("I searched \$MAIL, /var/spool/mail/$name and /var/mail/$name, ".
|
|
"but I couldn't find your mail spool file - ");
|
|
}
|
|
}
|
|
# see if the user has specified a source directory
|
|
elsif (defined($opts{s}))
|
|
{
|
|
# if opts{s} doesn't start with a "/" then
|
|
# it is a subdir of the users $home
|
|
# if it does start with a "/" then
|
|
# let's take $mbroot as a absolut path
|
|
$opts{s} = "$homedir/$opts{s}" if ($opts{s} !~ /^\//);
|
|
|
|
# check if the given source is a mbox file
|
|
if (-f $opts{s})
|
|
{
|
|
$mbfile = $opts{s};
|
|
}
|
|
|
|
# otherwise check if it is a directory
|
|
elsif (-d $opts{s})
|
|
{
|
|
$mbroot = $opts{s};
|
|
# get rid of trailing /'s
|
|
$mbroot =~ s/\/$//;
|
|
|
|
# check if we have a specified sub directory,
|
|
# otherwise the sub directory is '.'
|
|
if (defined($opts{f}))
|
|
{
|
|
$mbdir = $opts{f};
|
|
# get rid of trailing /'s
|
|
$mbdir =~ s/\/$//;
|
|
}
|
|
}
|
|
|
|
# otherwise we have an error
|
|
else
|
|
{
|
|
die("Fatal: Source is not an mbox file or a directory!\n");
|
|
}
|
|
}
|
|
|
|
|
|
# get the dest
|
|
defined($opts{d}) && ($dest = $opts{d}) || ($dest = "Maildir");
|
|
# see if we have anything to strip
|
|
defined($opts{r}) && ($strip_ext = $opts{r});
|
|
# No '-f' with '-R'
|
|
if((defined($opts{R}))&&(defined($opts{f}))) { die "No recursion with \"-f\"";}
|
|
# Get list of folders
|
|
my @flist;
|
|
if(defined($opts{l}))
|
|
{
|
|
open (LIST,$opts{l}) or die "Could not open mailbox list $opts{l}: $!";
|
|
@flist=<LIST>;
|
|
close LIST;
|
|
}
|
|
|
|
# if the destination is relative to the home dir,
|
|
# check that the home dir exists
|
|
die("Fatal: home dir $homedir doesn't exist.\n") if ($dest !~ /^\// && ! -e $homedir);
|
|
|
|
#
|
|
# form the destination value
|
|
# slap the home dir on the front of the dest if the dest does not begin
|
|
# with a '/'
|
|
$dest = "$homedir/$dest" if ($dest !~ /^\//);
|
|
# get rid of trailing /'s
|
|
$dest =~ s/\/$//;
|
|
|
|
|
|
# Count the number of mailboxes, or
|
|
# at least files, we found.
|
|
my $mailboxcount = 0;
|
|
|
|
# Since we'll be making sub directories of the main
|
|
# Maildir, we need to make sure that the main maildir
|
|
# exists
|
|
&maildirmake($dest);
|
|
|
|
# Now we do different things depending on whether we convert one mbox
|
|
# file or a directory of mbox files
|
|
if (defined($mbfile))
|
|
{
|
|
if (!isamailboxfile($mbfile))
|
|
{
|
|
print "Skipping $mbfile: not a mbox file\n";
|
|
}
|
|
else
|
|
{
|
|
print "Converting $mbfile to maildir: $dest\n";
|
|
# this is easy, we just run the convert function
|
|
&convert($mbfile, $dest);
|
|
}
|
|
}
|
|
# if '-f' was used ...
|
|
elsif (defined($mbdir))
|
|
{
|
|
print "Converting mboxdir/mbdir: $mbroot/$mbdir to maildir: $dest/\n";
|
|
|
|
# Now set our source directory
|
|
my $sourcedir = "$mbroot/$mbdir";
|
|
|
|
# check that the directory we are supposed to be finding mbox
|
|
# files in, exists and is a directory
|
|
-e $sourcedir or die("Fatal: MBDIR directory $sourcedir/ does not exist.\n");
|
|
-d $sourcedir or die("Fatal: MBDIR $sourcedir is not a directory.\n");
|
|
|
|
|
|
&convertit($mbdir,"");
|
|
}
|
|
# Else, let's work in $mbroot
|
|
else
|
|
{
|
|
opendir(SDIR, $mbroot)
|
|
or die("Fatal: Cannot open source directory $mbroot/ \n");
|
|
|
|
|
|
while (my $sourcefile = readdir(SDIR))
|
|
{
|
|
if (-d "$mbroot/$sourcefile") {
|
|
# Recurse only if requested (to be changed ?)
|
|
if (defined($opts{R})) {
|
|
print "convertit($sourcefile,\"\")\n" if ($DEBUG);
|
|
&convertit($sourcefile,"");
|
|
} else {
|
|
print("$sourcefile is a directory, but '-R' was not used... skipping\n");
|
|
}
|
|
}
|
|
elsif (!-f "$mbroot/$sourcefile")
|
|
{
|
|
print "Skipping $mbroot/$sourcefile : not a file nor a dir\n";
|
|
next;
|
|
}
|
|
elsif (!isamailboxfile("$mbroot/$sourcefile"))
|
|
{
|
|
print "Skipping $mbroot/$sourcefile : not a mbox file\n";
|
|
next;
|
|
}
|
|
else
|
|
{
|
|
&convertit($sourcefile,"");
|
|
}
|
|
} # end of "while ($sfile = readdir(SDIR))" loop.
|
|
closedir(SDIR);
|
|
printf("$mailboxcount files processed.\n") if ($DEBUG);
|
|
}
|
|
#
|
|
|
|
exit 0;
|
|
|
|
# My debbugging placeholder I can put somewhere to show how far the script ran.
|
|
# die("So far so good.\n\n");
|
|
|
|
# The isamailboxfile function
|
|
# ----------------------
|
|
#
|
|
# Here we check if the file is a mailbox file, not an address-book or
|
|
# something else.
|
|
# If file is empty, we say it is a mbox, to create it empty.
|
|
#
|
|
# Returns 1 if file is said mbox, 0 else.
|
|
sub isamailboxfile {
|
|
my ($mbxfile) = @_;
|
|
return 1 if(-z $mbxfile);
|
|
sysopen(MBXFILE, "$mbxfile", O_RDONLY) or die "Could not open $mbxfile ! \n";
|
|
while(<MBXFILE>) {
|
|
if (/^From/) {
|
|
close(MBXFILE);
|
|
return 1;
|
|
}
|
|
else {
|
|
close(MBXFILE);
|
|
return 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
# The convertit function
|
|
# -----------------------
|
|
#
|
|
# This function creates all subdirs in maildir, and calls convert()
|
|
# for each mbox file.
|
|
# Yes, it becomes the 'main loop' :)
|
|
sub convertit
|
|
{
|
|
# Get subdir as argument
|
|
my ($dir,$oldpath) = @_;
|
|
|
|
$oldpath =~ s/\/\///;
|
|
|
|
# Skip files beginning with '.' since they are
|
|
# not normally mbox files nor dirs (includes '.' and '..')
|
|
if ($dir =~ /^\./)
|
|
{
|
|
print "Skipping $dir : name begins with a '.'\n" if ($DEBUG);
|
|
return;
|
|
}
|
|
my $destinationdir = $dir;
|
|
my $temppath = $oldpath;
|
|
|
|
# We don't want to have .'s in the $targetfile file
|
|
# name because they will become directories in the
|
|
# Maildir. Therefore we convert them to _'s
|
|
$temppath =~ s/\./\_/g;
|
|
$destinationdir =~ s/\./\_/g;
|
|
|
|
# Appending $oldpath => path is only missing $dest
|
|
$destinationdir = "$temppath.$destinationdir";
|
|
|
|
# Converting '/' to '.' in $destinationdir
|
|
$destinationdir =~s/\/+/\./g;
|
|
|
|
# source dir
|
|
my $srcdir="$mbroot/$oldpath/$dir";
|
|
|
|
printf("convertit(): Converting $dir in $mbroot/$oldpath to $dest/$destinationdir\n") if ($DEBUG);
|
|
&maildirmake("$dest/$destinationdir");
|
|
print("destination = $destinationdir\n") if ($DEBUG);
|
|
if (-d $srcdir) {
|
|
opendir(SUBDIR, "$srcdir") or die "can't open $srcdir !\n";
|
|
my @subdirlist=readdir(SUBDIR);
|
|
closedir(SUBDIR);
|
|
foreach (@subdirlist) {
|
|
next if (/^\.+$/);
|
|
print("Sub: $_\n");
|
|
print("convertit($_,\"$oldpath/$dir\")\n") if ($DEBUG);
|
|
&convertit($_,"$oldpath/$dir");
|
|
}
|
|
} else {
|
|
# Source file verifs ....
|
|
#
|
|
return if(defined($opts{l}) && !inlist("$oldpath/$dir",@flist));
|
|
|
|
if (!isamailboxfile("$mbroot/$oldpath/$dir"))
|
|
{
|
|
print "Skipping $dir (is not mbox)\n" if ($DEBUG);
|
|
next;
|
|
}
|
|
|
|
# target file verifs...
|
|
#
|
|
# if $strip_extension is defined,
|
|
# strip it off the $targetfile
|
|
defined($strip_ext) && ($destinationdir =~ s/\.$strip_ext$//);
|
|
&convert("$mbroot/$oldpath/$dir","$dest/$destinationdir");
|
|
$mailboxcount++;
|
|
}
|
|
}
|
|
# The maildirmake function
|
|
# ------------------------
|
|
#
|
|
# It does the same thing that the maildirmake binary that
|
|
# comes with courier-imap distribution
|
|
#
|
|
sub maildirmake
|
|
{
|
|
foreach(@_) {
|
|
-d $_ or mkdir $_,0700 or die("Fatal: Directory $_ doesn't exist and can't be created.\n");
|
|
|
|
-d "$_/tmp" or mkdir("$_/tmp",0700) or die("Fatal: Unable to make $_/tmp/ subdirectory.\n");
|
|
-d "$_/new" or mkdir("$_/new",0700) or die("Fatal: Unable to make $_/new/ subdirectory.\n");
|
|
-d "$_/cur" or mkdir("$_/cur",0700) or die("Fatal: Unable to make $_/cur/ subdirectory.\n");
|
|
}
|
|
}
|
|
|
|
# The inlist function
|
|
# ------------------------
|
|
#
|
|
# It checks that the folder to be converted is in the list of subscribed
|
|
# folders in WU-IMAP
|
|
#
|
|
sub inlist
|
|
{
|
|
my ($file,@flist) = @_;
|
|
my $valid = 0;
|
|
# Get rid of the first / if any
|
|
$file =~ s/^\///;
|
|
foreach my $folder (@flist) {
|
|
chomp $folder;
|
|
if ($file eq $folder) {
|
|
$valid = 1;
|
|
last;
|
|
}
|
|
}
|
|
if (!$valid) {
|
|
print "$file is not in list\n";
|
|
}
|
|
else {
|
|
print "$file is in list\n";
|
|
}
|
|
|
|
return $valid;
|
|
}
|
|
|
|
#
|
|
|
|
# The convert function
|
|
# ---------------------
|
|
#
|
|
# This function does the down and dirty work of
|
|
# actually converting the mbox to a maildir
|
|
#
|
|
sub convert
|
|
{
|
|
# get the source and destination as arguments
|
|
my ($mbox, $maildir) = @_;
|
|
|
|
printf("Source Mbox is $mbox\n") if ($DEBUG);
|
|
printf("Target Maildir is $maildir \n") if ($DEBUG);
|
|
|
|
# create the directories for the new maildir
|
|
#
|
|
# if it is the root maildir (ie. converting the inbox)
|
|
# these already exist but thats not a big issue
|
|
|
|
&maildirmake($maildir);
|
|
|
|
# Change to the target mailbox directory.
|
|
|
|
chdir "$maildir" ;
|
|
|
|
# Converts a Mbox to multiple files
|
|
# in a Maildir.
|
|
# This is adapted from mbox2maildir.
|
|
#
|
|
# Open the Mbox mailbox file.
|
|
|
|
|
|
if (sysopen(MBOX, "$mbox", O_RDONLY))
|
|
{
|
|
#printf("Converting Mbox $mbox . . . \n");
|
|
}
|
|
else
|
|
{
|
|
die("Fatal: unable to open input mailbox file: $mbox ! \n");
|
|
}
|
|
|
|
# This loop scans the input mailbox for
|
|
# a line starting with "From ". The
|
|
# "^" before it is pattern-matching
|
|
# lingo for it being at the start of a
|
|
# line.
|
|
#
|
|
# Each email in Mbox mailbox starts
|
|
# with such a line, which is why any
|
|
# such line in the body of the email
|
|
# has to have a ">" put in front of it.
|
|
#
|
|
# This is not required in a Maildir
|
|
# mailbox, and some majik below
|
|
# finds any such quoted "> From"s and
|
|
# gets rid of the "> " quote.
|
|
#
|
|
# Each email is put in a file
|
|
# in the cur/ subdirectory with a
|
|
# name of the form:
|
|
#
|
|
# nnnnnnnnn.cccc.mbox:2,XXXX
|
|
#
|
|
# where:
|
|
# "nnnnnnnnn" is the Unix time since
|
|
# 1970 when this script started
|
|
# running, incremented by 1 for
|
|
# every email. This is to ensure
|
|
# unique names for each message
|
|
# file.
|
|
#
|
|
# ".cccc" is the message count of
|
|
# messages from this mbox.
|
|
#
|
|
# ".mbox" is just to indicate that
|
|
# this message was converted from
|
|
# an Mbox mailbox.
|
|
#
|
|
# ":2," is the start of potentially
|
|
# multiple IMAP flag characters
|
|
# "XXXX", but may be followed by
|
|
# nothing.
|
|
#
|
|
# This is sort-of compliant with
|
|
# the Maildir naming conventions
|
|
# specified at:
|
|
#
|
|
# http://www.qmail.org/man/man5/maildir.html
|
|
#
|
|
# This approach does not involve the
|
|
# process ID or the hostname, but it is
|
|
# probably good enough.
|
|
#
|
|
# When the IMAP server looks at this
|
|
# mailbox, it will move the files to
|
|
# the cur/ directory and change their
|
|
# names as it pleases. In the case
|
|
# of Courier IMAP, the names will
|
|
# become like:
|
|
#
|
|
# 995096541.25351.mbox:2,S
|
|
#
|
|
# with 25351 being Courier IMAP's
|
|
# process ID. The :2, is the start
|
|
# of the flags, and the "S" means
|
|
# that this one has been seen by
|
|
# the user. (But is this the same
|
|
# meaning as the user actually
|
|
# having opened the message to see
|
|
# its contents, rather than just the
|
|
# IMAP server having been asked to
|
|
# list the message's Subject etc.
|
|
# so the client could list it in the
|
|
# visible Inbox?)
|
|
#
|
|
# This contrasts with a message
|
|
# created by Courier IMAP, say with
|
|
# a message copy, which is like:
|
|
#
|
|
# 995096541.25351.zair,S=14285:2,S
|
|
#
|
|
# where ",S=14285" is the size of the
|
|
# message in bytes.
|
|
#
|
|
# Courier Maildrop's names are similar
|
|
# but lack the ":2,XXXX" flags . . .
|
|
# except for my modified Maildrop
|
|
# which can deliver them with a
|
|
# ":2,T" - flagged for deletion.
|
|
#
|
|
# I have extended the logic of the
|
|
# per-message inner loop to stop
|
|
# saving a file for a message with:
|
|
#
|
|
# Subject: DON'T DELETE THIS MESSAGE -- FOLDER INTERNAL DATA
|
|
#
|
|
# This is the dummy message, always
|
|
# at the start of an Mbox format
|
|
# mailbox file - and is put there
|
|
# by UW IMAPD. Since quite a few
|
|
# people will use this for
|
|
# converting from a UW system,
|
|
# I figure it is worth it.
|
|
#
|
|
# I will not save any such message
|
|
# file for the dummy message.
|
|
#
|
|
# Plan
|
|
# ----
|
|
#
|
|
# We want to read the entire Mbox file, whilst
|
|
# going through a loop for each message we find.
|
|
#
|
|
# We want to read all the headers of the message,
|
|
# starting with the "From " line. For that "From "
|
|
# line we want to get a date.
|
|
#
|
|
# For all other header lines, we want to store them
|
|
# in $headers whilst parsing them to find:
|
|
#
|
|
# 1 - Any flags in the "Status: " or "X-Status: " or
|
|
# "X-Mozilla-Status: " lines.
|
|
#
|
|
# 2 - A subject line indicating this is the dummy message
|
|
# at the start (typically, but not necessarily) of
|
|
# the Mbox.
|
|
#
|
|
# Once we reach the end of the headers, we will crunch any
|
|
# flags we found to create a file name. Then, unless this is
|
|
# the dummy message we create that file and write all the
|
|
# headers to it.
|
|
#
|
|
# Then we continue reading the Mbox, converting ">From " to
|
|
# "From " and writing it to the file, until we reach one of:
|
|
#
|
|
# 1 - Another "From " line (indicating the start of another
|
|
# message).
|
|
#
|
|
# or
|
|
#
|
|
# 2 - The end of the Mbox.
|
|
#
|
|
# In the former case, which we detect at the start of the loop
|
|
# we need to close the file and touch it to alter its date-time.
|
|
#
|
|
# In the later case, we also need to close the file and touch
|
|
# it to alter its date-time - but this is beyond the end of the
|
|
# loop.
|
|
|
|
|
|
# Variables
|
|
# ---------
|
|
|
|
my $messagecount = 0;
|
|
|
|
# For generating unique filenames for
|
|
# each message. Initialise it here with
|
|
# numeric time in seconds since 1970.
|
|
my $unique = time;
|
|
|
|
# Name of message file to delete if we found that
|
|
# it was created by reading the Mbox dummy message.
|
|
|
|
my $deletedummy = '';
|
|
|
|
# To store the complete "From (address) (date-time)
|
|
# which delineates the start of each message
|
|
# in the Mbox
|
|
my $fromline = '';
|
|
|
|
|
|
# Set to 1 when we are reading the header lines,
|
|
# including the "From " line.
|
|
#
|
|
# 0 means we are reading the message body and looking
|
|
# for another "From " line.
|
|
|
|
my $inheaders = 0;
|
|
|
|
# Variable to hold all headers (apart from
|
|
# the first line "From ...." which is not
|
|
# part of the message itself.
|
|
my $headers = '';
|
|
|
|
# Variable to hold the accumulated characters
|
|
# we find in header lines of the type:
|
|
#
|
|
# Status:
|
|
# X-Status:
|
|
# X-Mozilla-Status:
|
|
# X-Evolution:
|
|
my $flags = '';
|
|
|
|
# To build the file name for the message in.
|
|
my $messagefn = '';
|
|
|
|
|
|
# The date string from the "From " line of each
|
|
# message will be written here - and used by
|
|
# touch to alter the date-time of each message
|
|
# file. Put non-date text here to make it
|
|
# spit the dummy if my code fails to find a
|
|
# date to write into this.
|
|
|
|
my $receivedate = 'Bogus';
|
|
|
|
# The subject of the message
|
|
my $subject = '';
|
|
|
|
my $previous_line_was_empty = 1;
|
|
|
|
# We record the message start line here, for error
|
|
# reporting.
|
|
my $startline;
|
|
|
|
# If defined, we use this as the number of bytes in the
|
|
# message body rather than looking for a /^From / line.
|
|
my $contentlength;
|
|
|
|
# A From lines can either occur as the first
|
|
# line of a file, or after an empty line.
|
|
# Most mail systems will quote all From lines
|
|
# appearing in the message, but some will only
|
|
# do it when necessary.
|
|
# Since we initialise the variable to true,
|
|
# we don't need to check for beginning of file.
|
|
|
|
while(<MBOX>)
|
|
{
|
|
# Prevent an "Malformed UTF-8 character" message
|
|
open(OLDERR, ">&STDERR");
|
|
open(STDERR, ">/dev/null");
|
|
|
|
# exchange possible Windows EOL (CRLF) with Unix EOL (LF)
|
|
$_ =~ s/\r\n$/\n/;
|
|
|
|
if ( /^From /
|
|
&& $previous_line_was_empty
|
|
&& (!defined $contentlength)
|
|
) {
|
|
# We are reading the "From " line which has an
|
|
# email address followed by a receive date.
|
|
# Turn on the $inheaders flag until we reach
|
|
# the end of the headers.
|
|
$inheaders = 1;
|
|
|
|
# record the message start line
|
|
$startline = $.;
|
|
|
|
# If this is not the first run through the loop
|
|
# then this means we have already been working
|
|
# on a message.
|
|
if ($messagecount > 0)
|
|
{
|
|
# If so, then close that message file and then
|
|
# use utime to change its date-time.
|
|
#
|
|
# Note this code should be duplicated to do
|
|
# the same thing at the end of the while loop
|
|
# since we must close and touch the final message
|
|
# file we were writing when we hit the end of the
|
|
# Mbox file.
|
|
close (OUT);
|
|
|
|
if ($messagefn ne '') {
|
|
|
|
my $t = str2time($receivedate);
|
|
utime $t, $t, $messagefn;
|
|
}
|
|
}
|
|
|
|
# Because we opened the Mbox file without any
|
|
# variable, I think this means that we have its
|
|
# current line in Perl's default variable "$_".
|
|
# So all sorts of pattern matching magic works
|
|
# directly on it.
|
|
# We are currently reading the first line starting with
|
|
# "From " which contains the date we want.
|
|
#
|
|
# This will be of the form:
|
|
#
|
|
# From dduck@test.org Wed Nov 24 11:05:35 1999
|
|
#
|
|
# at least with UW-IMAP.
|
|
#
|
|
# However, I did find a nasty exception to this in my
|
|
# tests, of the form:
|
|
#
|
|
# "bounce-MusicNewsletter 5-rw=test.org"@announce2.mp3.com
|
|
#
|
|
# This makes it trickier to get rid of the email address,
|
|
# but I did find a way. I can't rule out that there would
|
|
# be some address like this with an "@" in the quoted
|
|
# portion too.
|
|
#
|
|
# Unfortunately, testing with an old Inbox Mbox file,
|
|
# I also found an instance where the email address
|
|
# had no @ sign at all. It was just an email
|
|
# account name, with no host.
|
|
#
|
|
# I could search for the day of the week. If I skipped
|
|
# at least one word of non-whitespace (1 or more contiguous
|
|
# non-whitespace characters) then searched for a day of
|
|
# the week, then I should be able to avoid almost
|
|
# every instance of a day of the week appearing in
|
|
# the email address.
|
|
#
|
|
# Do I need a failsafe arrangement to provide some
|
|
# other date to touch if I don't get what seems like
|
|
# a date in my resulting string? For now, no.
|
|
#
|
|
# I will take one approach if there is an @ in the
|
|
# "From " line and another (just skip the first word
|
|
# after "From ") if there is no @ in the line.
|
|
#
|
|
# If I knew more about Perl I would probably do it in
|
|
# a more elegant way.
|
|
# Copy the current line into $fromline.
|
|
$fromline = $_;
|
|
|
|
# Now get rid of the "From ". " =~ s" means substitute.
|
|
# Find the word "From " at the start of the line and
|
|
# replace it with nothing. The nothing is what is
|
|
# between the second and third slash.
|
|
$fromline =~ s/^From // ;
|
|
|
|
# Likewise get rid of the email address.
|
|
# This first section is if we determine there is one
|
|
# (or more . . . ) "@" characters in the line, which
|
|
# would normally be the case.
|
|
if ($fromline =~ m/@/)
|
|
{
|
|
# The line has at least one "@" in it, so we assume
|
|
# this is in the middle of an email address.
|
|
#
|
|
# If the email address had no spaces, then we could
|
|
# get rid of the whole thing by searching for any number
|
|
# of non-whitespace characters (\S) contiguously, and
|
|
# then I think a space. Subsitute nothing for this.
|
|
#
|
|
# $fromline =~ s/(\S)+ // ;
|
|
#
|
|
# But we need something to match any number of non-@
|
|
# characters, then the "@" and then all the non-whitespace
|
|
# characters from there (which takes us to the end of
|
|
# "test.org") and then the space following that.
|
|
#
|
|
# A tutorial on regular expressions is:
|
|
#
|
|
# http://www.perldoc.com/perl5.6.1/pod/perlretut.html
|
|
#
|
|
# Get rid of all non-@ characters up to the first "@":
|
|
$fromline =~ s/[^@]+//;
|
|
|
|
# Get rid of the "@".
|
|
$fromline =~ s/@//;
|
|
}
|
|
|
|
# If there was an "@" in the line, then we have now
|
|
# removed the first one (lets hope there aren't more!)
|
|
# and everything which preceded it.
|
|
#
|
|
# we now remove either something like
|
|
# '(foo bar)'. eg. '(no mail address)',
|
|
# or everything after the '@' up to the trailing
|
|
# timezone
|
|
|
|
# FIXME: all those regexp should be combined to just one single one
|
|
$fromline =~ s/(\((\S*| )+\)|\S+) *//;
|
|
chomp $fromline;
|
|
|
|
# Stash the date-time for later use. We will use it
|
|
# to touch the file after we have closed it.
|
|
$receivedate = $fromline;
|
|
|
|
# End of handling the "From " line.
|
|
}
|
|
|
|
close(STDERR);
|
|
open(STDERR, ">&OLDERR");
|
|
close(OLDERR);
|
|
|
|
# Now process header lines which are not the "From " line.
|
|
|
|
if ( ($inheaders eq 1)
|
|
&& (! /^From /)
|
|
)
|
|
{
|
|
# Now we are reading the header lines after the "From " line.
|
|
# Keep looking for the blank line which indicates the end of the
|
|
# headers.
|
|
|
|
|
|
# ".=" means append the current line to the $headers
|
|
# variable.
|
|
#
|
|
# For some reason, I was getting two blank lines
|
|
# at the end of the headers, rather than one,
|
|
# so I decided not to read in the blank line
|
|
# which terminates the headers.
|
|
#
|
|
# Delete the "unless ($_ eq "\n")" to get rid
|
|
# of this kludge.
|
|
|
|
$headers .= $_ unless ($_ eq "\n");
|
|
|
|
# Now scan the line for various status flags
|
|
# and to fine the Subject line.
|
|
|
|
$flags .= $1 if /^Status: ([A-Z]+)/;
|
|
$flags .= $1 if /^X-Status: ([A-Z]+)/;
|
|
if (/^X-Mozilla-Status: ([0-9a-f]{4})/i)
|
|
{
|
|
$flags .= 'R' if (hex($1) & 0x0001);
|
|
$flags .= 'A' if (hex($1) & 0x0002);
|
|
$flags .= 'D' if (hex($1) & 0x0008);
|
|
}
|
|
if(/^X\-Evolution:\s+\w{8}\-(\w{4})/oi)
|
|
{
|
|
$b = pack("H4", $1); #pack it as 4 digit hex (0x0000)
|
|
$b = unpack("B32", $b); #unpack into bit string
|
|
|
|
# "usually" only the right most six bits are used
|
|
# however, I have come across a seventh bit in
|
|
# about 15 (out of 10,000) messages with this bit
|
|
# activated.
|
|
# I have not found any documentation in the source.
|
|
# If you find out what it does, please let me know.
|
|
|
|
# Notes:
|
|
# Evolution 1.4 does mark forwarded messages.
|
|
# The sixth bit is to denote an attachment
|
|
|
|
$flags .= 'A' if($b =~ /[01]{15}1/); #replied
|
|
$flags .= 'D' if($b =~ /[01]{14}1[01]{1}/); #deleted
|
|
$flags .= 'T' if($b =~ /[01]{13}1[01]{2}/); #draft
|
|
$flags .= 'F' if($b =~ /[01]{12}1[01]{3}/); #flagged
|
|
$flags .= 'R' if($b =~ /[01]{11}1[01]{4}/); #seen/read
|
|
}
|
|
$subject = $1 if /^Subject: (.*)$/;
|
|
if ($use_cl eq 1)
|
|
{
|
|
$contentlength = $1 if /^Content-Length: (\d+)$/;
|
|
}
|
|
|
|
# Now look out for the end of the headers - a blank
|
|
# line. When we find it, create the file name and
|
|
# analyse the Subject line.
|
|
|
|
if ($_ eq "\n")
|
|
{
|
|
# We are at the end of the headers. Set the
|
|
# $inheaders flag back to 0.
|
|
|
|
$inheaders = 0;
|
|
|
|
# Include the current newline in the content length
|
|
|
|
++$contentlength if defined $contentlength;
|
|
|
|
# Create the file name for the current message.
|
|
#
|
|
# A simple version of this would be:
|
|
#
|
|
# $messagefn = "cur/$unique.$messagecount.mbox:2,";
|
|
#
|
|
# This would create names with $messagecount values of
|
|
# 1, 2, etc. But for neatness when looking at a
|
|
# directory of such messages, sorted by filename,
|
|
# I want to have leading zeroes on message count, so
|
|
# that they would be 000001 etc. This makes them
|
|
# appear in message order rather than 1 being after
|
|
# 19 etc. So this is good for up to 999,999 messages
|
|
# in a mailbox. It is a cosmetic matter for a person
|
|
# looking into the Maildir directory manually.
|
|
# To do this, use sprintf instead with "%06d" for
|
|
# 6 characters of zero-padding:
|
|
|
|
$messagefn = sprintf ("cur/%d.%06d.mbox:2,", $unique, $messagecount) ;
|
|
|
|
|
|
# Append flag characters to the end of the
|
|
# filename, according to flag characters
|
|
# collected from the message headers
|
|
|
|
$messagefn .= 'F' if $flags =~ /F/; # Flagged.
|
|
$messagefn .= 'R' if $flags =~ /A/; # Replied to.
|
|
$messagefn .= 'S' if $flags =~ /R/; # Seen or Read.
|
|
$messagefn .= 'T' if $flags =~ /D/; # Tagged for deletion.
|
|
|
|
|
|
# Opens filename $messagefn for output (>) with filehandle OUT.
|
|
|
|
open(OUT, ">$messagefn") or die("Fatal: unable to create new message $messagefn");
|
|
|
|
# Count the messages.
|
|
|
|
$messagecount++;
|
|
|
|
# Only for the first message,
|
|
# check to see if it is a dummy.
|
|
# Delete the message file we
|
|
# just created if it was for the
|
|
# dummy message at the start
|
|
# of the Mbox.
|
|
#
|
|
# Add search terms as required.
|
|
# The last 2 lines are for rent.
|
|
#
|
|
# "m" means match the regular expression,
|
|
# but we can do without it.
|
|
#
|
|
# Do I need to escape the ' in "DON'T"?
|
|
# I didn't in the original version.
|
|
|
|
if ( (($messagecount == 1) && defined($subject))
|
|
&& ($subject =~ m/^DON'T DELETE THIS MESSAGE -- FOLDER INTERNAL DATA/)
|
|
)
|
|
{
|
|
# Stash the file name of the dummy message so we
|
|
# can delete it later.
|
|
|
|
$deletedummy = "$messagefn";
|
|
}
|
|
|
|
# Print the collected headers to the message file.
|
|
|
|
print OUT "$headers";
|
|
|
|
|
|
# Clear $headers and $flags ready for the next message.
|
|
|
|
$headers = '';
|
|
$flags = '';
|
|
|
|
# End of processing the headers once we found the
|
|
# blank line which terminated them
|
|
}
|
|
|
|
# End of dealing with the headers.
|
|
}
|
|
|
|
|
|
if ( $inheaders eq 0)
|
|
{
|
|
|
|
# We are now processing the message body.
|
|
#
|
|
# Now we have passed the headers to the
|
|
# output file, we scan until the while
|
|
# loop finds another "From " line.
|
|
|
|
# Decrement our content length if we're
|
|
# using it to find the end of the message
|
|
# body
|
|
|
|
if (defined $contentlength) {
|
|
|
|
# Decrement our $contentlength variable
|
|
|
|
$contentlength -= length($_);
|
|
|
|
# The proper end for a message with Content-Length
|
|
# specified is the $contentlength variable should
|
|
# be exactly -1 and we should be on a bare
|
|
# newline. Note that the bare newline is not
|
|
# printed to the end of the current message as
|
|
# it's actually a message separator in the mbox
|
|
# format rather than part of the message. The
|
|
# next line _should_ be a From_ line, but just in
|
|
# case the Content-Length header is incorrect
|
|
# (e.g. a corrupt mailbox), we just continue
|
|
# putting lines into the current message until we
|
|
# see the next From_ line.
|
|
|
|
if ($contentlength < 0) {
|
|
if ($contentlength == -1 && $_ eq "\n") {
|
|
$contentlength = undef;
|
|
next;
|
|
}
|
|
$contentlength = undef;
|
|
}
|
|
}
|
|
|
|
#
|
|
# We want to copy every part of the message
|
|
# body to the output file, except for the
|
|
# quoted ">From " lines, which was the
|
|
# way the IMAP server encoded body lines
|
|
# starting with "From ".
|
|
#
|
|
# Pattern matching Perl majik to
|
|
# get rid of an Mbox quoted From.
|
|
#
|
|
# This works on the default variable "$_" which
|
|
# contains the text from the Mbox mailbox - I
|
|
# guess this is the case because of our
|
|
# (open(MBOX ....) line above, which did not
|
|
# assign this to anything else, so it would go
|
|
# to the default variable. This enables
|
|
# inscrutably terse Perlisms to follow.
|
|
#
|
|
# "s" means "Subsitute" and it looks for any
|
|
# occurrence of ">From" starting at the start
|
|
# of the line. When it finds this, it replaces
|
|
# it with "From".
|
|
#
|
|
# So this finds all instances in the Mbox message
|
|
# where the original line started with the word
|
|
# "From" but was converted to ">From" in order to
|
|
# not be mistaken for the "From ..." line which
|
|
# is used to demark each message in the Mbox.
|
|
# This was was a destructive conversion because
|
|
# any message which originally had ">From" at the
|
|
# start of the line, before being put into the
|
|
# Mbox, will now have that line without the ">".
|
|
|
|
s/^>From /From /;
|
|
|
|
# Glorious tersness here. Thanks Simon for
|
|
# explaining this.
|
|
#
|
|
# "print OUT" means print the default variable to
|
|
# the file of file handle OUT. This is where
|
|
# the bulk of the message text is written to
|
|
# the output file.
|
|
|
|
print OUT or die("Fatal: unable to write to new message to $messagefn");
|
|
|
|
|
|
# End of the if statement dealing with message body.
|
|
}
|
|
|
|
$previous_line_was_empty = ( $_ eq "\n" );
|
|
|
|
# End of while (MBOX) loop.
|
|
}
|
|
# Close the input file.
|
|
|
|
close(MBOX);
|
|
|
|
# Close the output file, and duplicate the code
|
|
# from the start of the while loop which touches
|
|
# the date-time of the most recent message file.
|
|
|
|
close(OUT);
|
|
if ($messagefn ne '') {
|
|
my $t = str2time($receivedate);
|
|
utime $t, $t, $messagefn;
|
|
}
|
|
|
|
# After all the messages have been
|
|
# converted, check to see if the
|
|
# first one was a dummy.
|
|
# if so, delete it and make
|
|
# the message count one less.
|
|
|
|
if ($deletedummy ne "")
|
|
{
|
|
printf("Dummy mail system first message detected and not saved.\n") if ($DEBUG);
|
|
unlink $deletedummy;
|
|
|
|
$messagecount--;
|
|
|
|
}
|
|
|
|
printf("$messagecount messages.\n\n") if ($DEBUG);
|
|
}
|