dot rc

Sign in or create your account | Project List | Help

dot rc Commit Details

Date:2008-12-30 14:04:53 (3 years 1 month ago)
Author:Luciano M. F. Rocha
Commit:8f1c268dc583b701f6c2a1a57b338bdbf0611fd2
Message:using external GetLive.pl

Files: opt/noarch/GetLive.pl (1 diff)

Change Details

opt/noarch/GetLive.pl
1#!/usr/bin/perl -w
2
3########################################################################################################################
4#
5# GetLive - perl script to get mail from hotmail (live) mailboxes.
6#
7# $Id: GetLive.pl,v 1.43 2008/07/05 19:55:41 jdla Exp $
8# $Name: Release_0_57 $
9#
10# Copyright (C) 2007 Jos De Laender <jos.de_laender@pandora.be>
11#
12# This work is inspired and partly reuses code from
13# gotmail :
14# Copyright (C) 2000-2003 Peter Hawkins <peterhawkins@ozemail.com.au>
15# Copyright (C) 2005 Jon Phillips <jon@rejon.org>
16# Copyright (C) 2005 Michael Ziegler.
17# Copyright (C) 2005-2006 Jos De Laender <jos.de_laender@pandora.be>
18#
19# This program is free software; you can redistribute it and/or modify
20# it under the terms of the GNU General Public License as published by
21# the Free Software Foundation; either version 2 of the License, or
22# (at your option) any later version
23#
24# This program is distributed in the hope that it will be useful,
25# but WITHOUT ANY WARRANTY; without even the implied warranty of
26# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27# GNU General Public License for more details.
28#
29# You should have received a copy of the GNU General Public License
30# along with this program; if not, write to the Free Software
31# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
32#
33########################################################################################################################
34
35use strict;
36use File::Spec;
37use URI::Escape;
38
39########################################################################################################################
40#
41# XXX
42# XXX This is inserted to cope with French characters in the folder names.
43# XXX Not too sure about. It works on my LANG=nl_BE.UTF-8 box, but I'm afraid it may screw up other boxes ...
44# XXX In my case also use encoding("UTF-8") worked (it's my locale after all).
45# XXX
46#
47########################################################################################################################
48
49eval "use encoding(\":locale\");";
50
51########################################################################################################################
52#
53# Global constants and variables.
54#
55########################################################################################################################
56
57my $ProgramName = "GetLive";
58my $Revision = '$Revision: 1.43 $'; # Meant for RCS.
59
60# Constants of configuration.
61my $Proxy = "";
62my $ProxyAuth = "";
63my $Login = "";
64my $Password = "";
65my $Domain = 'hotmail.com';
66my $CurlCommand = 'curl -k';
67my $Verbosity = 1; # 0:Silent; 1:Normal; 2:Verbose; 10:debug; 100:heavy debug
68my $MailProcessor = '/usr/bin/procmail'; # Any program taking mbox formatted at stdin will do.
69my $DownloadedIdsFile = ""; # Local file with Ids of already downloaded messages.
70my $RetryLimit = 2;
71my $MarkRead = "No"; # No,Yes : But never when downloaded before !
72my $Delete = "No"; # No,Yes : But never when downloaded before !
73my $FetchOnlyUnread = "No"; # If Yes, only messages marked unread are downloaded.
74                                                    # Unlike gotmail, this is completely orthogonal to the
75                                                    # DownloadedIdsFile, i.e. it is the one or the other.
76my $MoveToFolder = ""; # The name of the folder to move to after the download. "" is not.
77                                                    # If it begins with @ it is reference to a filename that
78                                                    # contains the folder to move to. This is a hook for
79                                                    # autoclassifying the mail on the server, including spam filtering.
80
81# Quirk. MS generates unended <pre> tags. Hope this is a temporary weakness in their mind.
82# (Hope makes living, we say in Dutch ...)
83# If we want to allow it set this to one. Currently as of 8/9/2007 it must be accepted or the
84# message is not downloadable.
85
86my $AllowUncompletePreTag = 1;
87
88# Yet another quirk. See bug 1875392. We'll call it the DarrenQuirk.
89my $CorrectDarrenQuirk = 1;
90
91# Files in a temporary directory.
92my $TmpDir = File::Spec->tmpdir() . "/$ProgramName.$$";
93my $TmpCurlHeadersFile = "$TmpDir/Headers";
94my $TmpCookiesFile = "$TmpDir/Cookies";
95my $TmpFormDataFile = "$TmpDir/Form";
96my $TmpCurlStderrFile = "$TmpDir/CurlStderr";
97my $TmpCurlStdoutFile = "$TmpDir/CurlStdout";
98my $TmpCurlTraceFile = "$TmpDir/CurlTrace";
99
100# Messages retrieved from a folder.
101my $NrMessagesDetected = 0;
102my $NrMessagesUnread = 0;
103my @MessagesFrom = ();
104my @MessagesSubject = ();
105my @MessagesId = ();
106my @MessagesAd = ();
107my @MessagesRead = ();
108
109# Various variables.
110my $BaseUrl; # The one in the logged in screen used for fetching folders.
111my $NParameter;
112
113my @FolderHrefs = (); # The Hrefs found for the different folders.
114my @FolderIds = (); # The Ids found for the different folders.
115my @FolderNames = (); # The names found for the different folders.
116my @FolderNrMessages = (); # The number of messages found for the different folders.
117my $NrFolders = 0; # The number of folders found.
118
119my %FoldersToProcess = (); # The folders to process (empty will be considered as all). Otherwise FolderName=>1 assoc.
120
121my $CurlRun = 0; # Increased with each Curl run. Basically for debug reasons.
122my $ConfigFile;
123
124my $RequestHandler = "";
125my $SessionId = "";
126my $AuthUser = "";
127my $TrashFolderId = "";
128
129
130########################################################################################################################
131#
132# Catchall signal handler. Just observes death and cleans up the mess.
133#
134########################################################################################################################
135
136$SIG{INT} = $SIG{TERM} = $SIG{__DIE__} = sub {
137  my($Text) = @_;
138    print STDERR "$ProgramName died with message: '$Text'.";
139    CleanTempFiles();
140    exit(1);
141};
142
143########################################################################################################################
144#
145# Display some text.
146# First parameter : text to be displayed.
147# Then a number of named parameters that are optional.
148# See %args.
149#
150########################################################################################################################
151
152sub Display($%) {
153    my $Text = shift;
154    my %Args = (MinVerbosity => 0,
155                            stderr => 0,
156                            @_);
157
158  # stderr messages are under no circumstances suppressed.
159    if ($Args{'stderr'}) {
160        print STDERR $Text;
161        return;
162    }
163
164    # Filter out the ones for which the verbosity is too high.
165    return if ($Args{'MinVerbosity'} > $Verbosity);
166
167    # And finally print ;-)
168  # Stdout is flushed immediate , not to miss error messages.
169  my $WasSelected = select(STDOUT);
170  $|=1;
171  select($WasSelected);
172
173    print STDOUT $Text;
174
175    return;
176}
177
178########################################################################################################################
179#
180# Display the introduction text.
181# Text as argument, stderr as optional named argument to redirect to stderr.
182#
183########################################################################################################################
184
185sub DisplayIntroText(%) {
186    my %Args = (stderr => 0,
187              MinVerbosity => 1,
188              @_);
189    my $Text =
190      "\n".
191      "$ProgramName $Revision Copyright (C)2007 Jos De Laender.\n".
192        "$ProgramName comes with ABSOLUTELY NO WARRANTY.\n".
193        "This is free software, and you are welcome to redistribute it\n".
194        "under certain conditions; see the file License for details.\n".
195        '$Name: Release_0_57 $' . "\n".
196        '$Id: GetLive.pl,v 1.43 2008/07/05 19:55:41 jdla Exp $' . "\n".
197    "Running at ".localtime(time)." for user $Login.\n";
198    Display($Text,%Args);
199}
200
201########################################################################################################################
202#
203# This is only called in error conditions. Output will go to stderr.
204#
205########################################################################################################################
206
207sub DisplayUsageAndExit() {
208    DisplayIntroText(stderr => 1);
209    Display("Usage: $ProgramName --config-file ConfigFile [--verbosity -1..100]\n",stderr => 1);
210    exit(1);
211}
212
213########################################################################################################################
214#
215# Parse the command line
216#
217########################################################################################################################
218
219sub ParseArgs() {
220  my $ArgvAsString = join(" ",@ARGV);
221
222  # --config-file is a mandatory argument.
223  if ($ArgvAsString !~ m/--config-file\s+([\w\/\\~\.\-]+)/si) {
224    DisplayUsageAndExit();
225  }
226  $ConfigFile = $1;
227  $ArgvAsString = $` . $'; # The matched stuff removed.
228
229  # --verbosity is an optional argument.
230  if ($ArgvAsString =~ m/--verbosity\s+(\d+)/si) {
231    $Verbosity = $1;
232    $ArgvAsString = $` . $'; # The matched stuff removed.
233  }
234  # Should have no other arguments.
235  $ArgvAsString =~ s/\s//sg;
236  if ($ArgvAsString ne "") {
237    Display("Wrong command line arguments '$ArgvAsString'.\n",stderr => 1);
238    DisplayUsageAndExit();
239  }
240}
241
242########################################################################################################################
243#
244# Parse the Configuration File
245#
246########################################################################################################################
247
248sub ParseConfig {
249
250    open (CONFIG,$ConfigFile) || die "Configuration file '$ConfigFile' could not be opened : $!.";
251
252    # Parse the file
253    while (<CONFIG>) {
254        my $Line = $_;
255        next if ($Line =~ /^#/); # Comment.
256        next if ($Line =~ /^\s*$/); # Empty line.
257        if (not $Line =~ m/^([a-zA-Z0-9-_]+)/) {
258            Display("Wrong configuration line : '$_'.\n",stderr=>1);
259            DisplayUsageAndExit();
260        }
261        my $Option = $1;
262        my $OptionValue = "";
263        $Line = $'; # The remaining of the line.
264        if (not $Line =~ m/\s*=\s*\S+/) {
265          Display("Wrong configuration line : '$_' (no value).\n",stderr => 1);
266            DisplayUsageAndExit();
267        }
268        # Remove equals sign and leading, trailing whitespace.
269        $Line =~ s/=//;
270        $Line =~ s/^\s+|\s+$//g;
271        $OptionValue = $Line;
272
273    if ($Option =~ m/^UserName$/i) {
274      $Login = $OptionValue;
275    } elsif ($Option =~ m/^Password$/i) {
276      $Password = $OptionValue;
277    } elsif ($Option =~ m/^Mode$/i) {
278      warn "\n'Mode = ...' in the config file is ignored.\nThis version works only for 'Live' mailboxes !\n";
279    } elsif ($Option =~ m/^Domain$/i) {
280      $Domain = $OptionValue;
281    } elsif ($Option =~ m/^Proxy$/i) {
282      $Proxy = $OptionValue;
283    } elsif ($Option =~ m/^ProxyAuth$/i) {
284      $ProxyAuth = $OptionValue;
285    } elsif ($Option =~ m/^Downloaded$/i) {
286      $DownloadedIdsFile = $OptionValue;
287    } elsif ($Option =~ m/^RetryLimit$/i) {
288      $RetryLimit = $OptionValue;
289    } elsif ($Option =~ m/^Processor$/i) {
290      $MailProcessor = $OptionValue;
291    } elsif ($Option =~ m/^CurlBin$/i) {
292      $CurlCommand = $OptionValue;
293    } elsif ($Option =~ m/^Folder$/i) {
294      $FoldersToProcess{lc $OptionValue} = 1;
295    } elsif ($Option =~ m/^FetchOnlyUnread$/i) {
296      $FetchOnlyUnread = $OptionValue;
297    } elsif ($Option =~ m/^MarkRead$/i) {
298      $MarkRead = $OptionValue;
299    } elsif ($Option =~ m/^Delete$/i) {
300      $Delete = $OptionValue;
301    } elsif ($Option =~ m/^MoveToFolder$/i) {
302      $MoveToFolder = $OptionValue;
303    } else {
304          Display("Wrong configuration line : '$_' (unknown option).\n",stderr=>1);
305            DisplayUsageAndExit();
306    }
307    }
308    close(CONFIG);
309
310  # Some sanitychecks.
311  if ($Login eq "") {
312    Display("UserName should be specified in the configuration file.\n",stderr=>1);
313    DisplayUsageAndExit();
314  }
315  if ($Password eq "") {
316    Display("Password should be specified in the configuration file.\n",stderr=>1);
317    DisplayUsageAndExit();
318  }
319  if ($FetchOnlyUnread !~ m/^(No|Yes)$/i) {
320    Display("FetchOnlyUnread should take No or Yes as argument in the configuration file.\n",stderr=>1);
321    DisplayUsageAndExit();
322  }
323  if ( ($FetchOnlyUnread =~ m/^No$/i) && ($DownloadedIdsFile eq "") ) {
324    Display("Downloaded should be specified in the configuration file.\n",stderr=>1);
325    DisplayUsageAndExit();
326  }
327  if ( ($FetchOnlyUnread =~ m/^Yes$/i) && ($DownloadedIdsFile ne "") ) {
328    Display("Downloaded should not be specified in the configuration file when FetchOnlyUnread=Yes.\n",stderr=>1);
329    DisplayUsageAndExit();
330  }
331  if ($MarkRead !~ m/^(No|Yes)$/i) {
332    Display("MarkRead should take No or Yes as argument in the configuration file.\n",stderr=>1);
333    DisplayUsageAndExit();
334  }
335  if ($Delete !~ m/^(No|Yes)$/i) {
336    Display("Delete should take No or Yes as argument in the configuration file.\n",stderr=>1);
337    DisplayUsageAndExit();
338  }
339  if (($Delete =~ m/^Yes$/i) && ($MoveToFolder ne "")) {
340    Display("Delete must be 'No' when MoveToFolder is also specified in the configuration file.\n",stderr=>1);
341    DisplayUsageAndExit();
342  }
343}
344
345########################################################################################################################
346#
347# Clean up any temporary files which are collected in a temporary directory.
348#
349########################################################################################################################
350
351sub CleanTempFiles() {
352  return if ($Verbosity >9); # Considered debug mode and thus keep the files !
353  return if (! -e $TmpDir); # We're even not at the point that the tmpdir exists ...
354  opendir (TMPDIR,$TmpDir) || die "Could not open '$TmpDir' : $!.";
355  while (my $FileName = readdir(TMPDIR)) {
356    next if $FileName =~ m/^\.$/; # Not the .
357    next if $FileName =~ m/^\.\.$/; # Nor .. directory
358    unlink("$TmpDir/$FileName") || warn "Could not unlink $TmpDir.$FileName : $!";
359  }
360  closedir (TMPDIR);
361  # Finally get rid of the temporary directory itself.
362  rmdir($TmpDir) || warn "Could not unlink $TmpDir";
363}
364
365########################################################################################################################
366#
367# Unescape html characters, widechars become blank along the conversion.
368#
369# Based on a function with copyright: Bryant H. McGill - 11c Lower Dorset Street, Dublin 1, Ireland
370# Use Terms: Free for non-commercial use, commercial use with notification.
371#
372########################################################################################################################
373
374sub HtmlUnescape($) {
375  my $String = shift;
376  $String =~ s[&(.*?);]{
377    local $_ = $1;
378    /^amp$/i ? "&" :
379    /^quot$/i ? '"' :
380    /^gt$/i ? ">" :
381    /^lt$/i ? "<" :
382    /^nbsp$/i ? " " :
383    /^#(\d+)$/ ? ($1>255 ? "":chr($1)) :
384    /^#x([0-9a-f]+)$/i ? (hex($1)>255 ? "": chr(hex($1))) :
385    $_
386    }gex;
387  return $String;
388}
389
390########################################################################################################################
391#
392# Get a html page, basically via curl.
393# Returns the page as one big string.
394# Returns a second string with the latest url.
395# The parameters should be reasonably clear. FollowForward will follow a redirection.
396#
397########################################################################################################################
398
399sub GetPage($%) {
400  my %Args = (Url => "",
401                            CurlDataArg => "",
402                            FollowForward => 0,
403                            @_);
404  my $Url = $Args{'Url'};
405  my $CurlDataArg = $Args{'CurlDataArg'};
406  my $FollowForward = $Args{'FollowForward'};
407
408  die "'No Cookies Alarm' in '$Url'. Structure of hotmail changed ?" if ($Url =~ m/reason=nocookies/i);
409
410    $CurlRun++;
411
412  my $OptionsToCurl = "";
413
414    if ($Proxy) {
415    $OptionsToCurl .= "--proxy $Proxy ";
416  }
417    if ($ProxyAuth) {
418    $OptionsToCurl .= "--proxy-user $ProxyAuth ";
419  }
420
421  # The files with the Cookies.
422  $OptionsToCurl .= "-b $TmpCookiesFile -c $TmpCookiesFile ";
423
424    if ($CurlDataArg ne "") {
425    $OptionsToCurl .= "--data \"$CurlDataArg\" ";
426  }
427
428    # Curl is put silent (but with error output)
429    # when not interactive or low verbosity.
430    if ( (not -t STDOUT) || ($Verbosity <= 1) ) {
431    $OptionsToCurl .= "-s -S "
432  }
433
434    if ($Verbosity > 9) {
435    $OptionsToCurl .= "-v --trace $TmpCurlTraceFile.$CurlRun"
436  }
437
438    # JDLA curl outputs info via stderr. Catched in file and appended
439    # to stdout output in debug mode.
440  my $CommandLine =
441        "$CurlCommand --stderr $TmpCurlStderrFile.$CurlRun \"$Url\" " .
442    "$OptionsToCurl -i -m 600 -D $TmpCurlHeadersFile.$CurlRun " .
443        "-A \"Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.1.5) Gecko/20061201 Firefox/2.0.0.5 (Ubuntu-feisty)\"";
444  Display("Curl run $CurlRun.\nCommandLine : '$CommandLine'.\n", MinVerbosity => 10);
445  my $NrTries = 0;
446    my @CurlOutput = ();
447    while (!@CurlOutput && $NrTries++ < $RetryLimit) {
448        Display("Trying [$NrTries/$RetryLimit].\n",MinVerbosity => 2);
449        @CurlOutput = `$CommandLine`;
450        # Copy output. Only in very high debug levels. # We have it in file anyway.
451    if ($Verbosity > 99) { # The if around makes it a bit more efficient over the loop.
452          foreach my $Line (@CurlOutput) {
453            Display($Line,MinVerbosity => 100);
454      }
455    }
456        open (CURL_STDERR,"$TmpCurlStderrFile.$CurlRun") || die "Could not open $TmpCurlStderrFile.$CurlRun : $!.";
457        # Copy curl stderr.
458        Display("\nstderr of curl :\n",MinVerbosity => 10);
459        while(<CURL_STDERR>) {
460            my $Line = $_;
461            my $PasswordToBlank = uri_escape($Password,"^A-Za-z");
462            $Line =~ s/$PasswordToBlank/YouThinkThisIsThePassword/g;
463          Display("$Line",MinVerbosity => 10);
464        }
465        close(CURL_STDERR);
466        Display("\nEnd of stderr of curl.\n",MinVerbosity => 10);
467
468    # Some checking on the HTTP response to see if there's no 5** Server errror or 4** Client error.
469    # In general : 2** is Success, 3** is Redirection, 4** is Client Error and 5** is Server Error.
470
471    if ($CurlOutput[0] !~ m/HTTP[^ ]+ (\d{3})/) {
472      die("Irregular HTTP header '$CurlOutput[0]' received.");
473    }
474    my $HttpCode = $1;
475    if ($HttpCode =~ m/(1|2|3)\d{2}/) {
476      Display("Http Status OK : $HttpCode.\n",MinVerbosity=>2);
477    } elsif ($HttpCode =~m/4\d{2}/) {
478      Display("Http Client Error : $HttpCode.\n",MinVerbosity=>2);
479      @CurlOutput = (); # Force retry.
480    } elsif ($HttpCode =~m/5\d{2}/) {
481      Display("Http Server Error : $HttpCode.\n",MinVerbosity=>2);
482      @CurlOutput = (); # Force retry.
483    } else {
484      die("Unexpected HTTP status : '$HttpCode'.");
485    }
486    }
487
488  # In debug mode (Verbosity>9) we copy the output to a file.
489  if ($Verbosity > 9) {
490    open (CURL_STDOUT,">$TmpCurlStdoutFile.$CurlRun") ||
491          die "Could not open $TmpCurlStdoutFile.$CurlRun : $!.";
492    print CURL_STDOUT @CurlOutput;
493    close(CURL_STDOUT);
494  }
495
496    if (!@CurlOutput && $NrTries > $RetryLimit) {
497    Display("Curl run $CurlRun.\nCommandLine : '$CommandLine'.\n",stderr => 1);
498        die("An error was encountered getting the page.");
499    }
500
501    # Redirect search in headers.
502    my $Redirection = "";
503  open (CURL_HEADERS,"$TmpCurlHeadersFile.$CurlRun") || die "Could not open $TmpCurlHeadersFile.$CurlRun : $!.";
504    while (<CURL_HEADERS>) {
505        if (m/^Location: (\S+)\s/) {
506            $Redirection = $1;
507      last;
508        }
509    }
510  close(CURL_HEADERS);
511
512    # If we have been asked to follow Location: headers
513    if ($FollowForward) {
514        if ($Redirection ne "") {
515      if ($Redirection !~ m/^http.*/i) {
516        if ($Url =~ m/(http?:\/\/[^\/]+)\//i) {
517          $Redirection = $1 . $Redirection;
518        }
519      }
520            Display("Following redirect to $Redirection.\n",MinVerbosity => 2);
521            return &GetPage(Url => $Redirection,FollowForward => $FollowForward);
522        }
523    }
524
525    return (join("",@CurlOutput),$Url);
526}
527
528########################################################################################################################
529#
530# Do the HotMail login process - log in until we have the URL of the inbox.
531#
532########################################################################################################################
533
534sub Login() {
535
536    Display("Getting hotmail index loginpage.\n", MinVerbosity =>2);
537
538  my ($LoginPageAsString,$GetPageUrl) = GetPage(Url => "http://mail.live.com",FollowForward => 1);
539
540  # We expect here a number of functions now (aug 2007) to be hidden in a javascript
541  # that is loaded separately. Let's load and append.
542  # XXX JDLA It can turnout that after all we don't use anything of it, but reconstruct.
543  # Then one can speed up by leaving this JSPageAsString out.
544
545  my $BaseHref = "";
546  if ($LoginPageAsString =~ m/<base\s+href=\"([^\"]+)\"/) {
547    $BaseHref = $1;
548    Display("Found base href to be '$BaseHref'.\n",MinVerbosity => 10);
549  }
550
551  my $JavaScriptHref = "";
552  if ($LoginPageAsString =~ m/<script\s+type=\"text\/javascript\"\s+src=\"([^\"]+)\"/ ) {
553    $JavaScriptHref = $1;
554    Display("Found javascript href to be '$JavaScriptHref'.\n",MinVerbosity => 10);
555  }
556
557  die "Expected javascript href at this stage." unless $JavaScriptHref;
558
559  Display("Fetching the JS href.\n",MinVerbosity => 10);
560  my ($JSPageAsString,$JSGetPageUrl) = GetPage(Url => "${BaseHref}$JavaScriptHref",FollowForward => 1);
561
562  # Append the JS stuff into our page.
563
564  $LoginPageAsString .= $JSPageAsString;
565
566  # We would look to :
567  #
568  # function FormStart(){var s="
569  # <form name=\"f1\" method=\"POST\" target=\"_top\" action=\""+g_urlPost+"\"
570  # onsubmit=\"return WLSubmit(this)\">";
571  # s+=WL_HiddenField("idsbho","IDSBHO","1");
572  # s+=WL_HiddenField("PwdPad","i0340",null);
573  # s+=WL_HiddenField("LoginOptions","LoginOptions","3");
574  # s+=WL_HiddenField("CS","CS",null);
575  # s+=WL_HiddenField("FedState","FedState",null);
576  # s+=WL_HiddenField("PPSX","i0326",g_sRBlob);
577  # s+=WL_HiddenField("type","type",null);return s;}
578  #
579  # The WL_HiddenField = 'name','identifier','value'. Identifier unimportant.
580  # But assume g_urlPost is always there. So action is g_urlPost --> srf_uPost
581  # Assume also g_sRBlob -->srf_sRBlob
582
583
584  # FormStart as in herebove described analysis.
585  if ($LoginPageAsString !~ m/function FormStart\(\)\s*\{([^\}]+)\}/i) {
586    die "Page doesn't contain FormStart as expected.";
587  }
588  my $FormStart = $1;
589    Display("FormStart detected as '$FormStart'.\n", MinVerbosity => 10 );
590
591  # Hidden fields as described above.
592  my %Fields = ();
593  while ($FormStart =~ m/s\+=WL_HiddenField\(\"([^\"]+)\",[^,]+,([^\)]+)\)/gc) {
594    my $Key = $1;
595    my $Val = $2;
596    if ($Val =~ m/\"([^\"]+)\"/) {
597      $Val = $1;
598    }
599    if ($Val eq "null") {
600      $Val = "";
601    }
602    $Fields{$Key} = $Val;
603    Display("Detected HiddenField : $Key->$Val\n", MinVerbosity => 10);
604  }
605
606REENTRY_DARREN_QUIRK:
607  # Assumed g_urlPost
608    if ($LoginPageAsString !~ m/var srf_uPost='(\S+)'/i) {
609        die "Page doesn't contain var srf_uPost in the expected place.";
610    }
611  my $LoginUrl = $1;
612    Display("LoginUrl detected as '$LoginUrl'.\n", MinVerbosity => 10 );
613
614  # Transformation of LoginUrl (mimick part of the g_DO in javascript ..).
615  if ($LoginUrl !~ m/(http[s]?):\/\/([^\/]+)\/(.*)/ ) {
616    die "Malformed LoginUrl : '$LoginUrl'.";
617  }
618  my $ProtocolLoginUrl = $1;
619  my $FirstPartLoginUrl = $2;
620  my $SecondPartLoginUrl = $3;
621  if ($Domain eq "msn.com") {
622    $FirstPartLoginUrl = "msnia.login.live.com";
623  }
624  $LoginUrl = "$ProtocolLoginUrl://$FirstPartLoginUrl/$SecondPartLoginUrl";
625    Display("LoginUrl transformed as '$LoginUrl'.\n", MinVerbosity => 10 );
626
627  # Assumed g_sRBlob (and hence PPSX)
628    if ($LoginPageAsString !~ m/var srf_sRBlob='(\S+)'/i) {
629        die "Page doesn't contain var srf_sRBlob in the expected place.";
630    }
631    Display("PPSX detected as '$1'.\n", MinVerbosity => 10 );
632  $Fields{"PPSX"} = $1;
633
634  # PPFT is a normal (ie non JS) hidden input type.
635  if( $LoginPageAsString !~ m/<\s*input\s+.*name=\"PPFT\"(\s+id="\S+")?\s+value=\"(\S*)\"/ ) {
636    die "Page doesn't contain input field PPFT as expected.";
637    }
638    Display("PPFT detected : '$2'.\n",MinVerbosity => 10 );
639  $Fields{"PPFT"} = $2;
640
641  # A number of other assumption that are peeled deep out of JS.
642  # I'm afraid that the need for an embedded JS interpreter is coming closer ...
643  $Fields{"type"} = "11";
644  $Fields{"NewUser"} = "1";
645  $Fields{"i1"} = "0";
646  $Fields{"i2"} = "0";
647
648  # Hope the password padding still works ...
649    my $Padding = "BovenGentRijstEenzaamEnGrijsHetOudBelfort";
650    my $PwdPad = substr( $Padding, 0, length($Padding)-length($Password) );
651    Display("PwdPad constructed : '$PwdPad'.\n",MinVerbosity => 10 );
652  $Fields{"PwdPad"} = $PwdPad;
653
654  #login and password.
655  $Fields{"login"} = uri_escape($Login . '@' . $Domain, "^A-Za-z");
656  $Fields{"passwd"} = uri_escape($Password, "^A-Za-z");
657
658
659  # Construct the form with above in a temporary file.
660  open (FORMFILE,">$TmpFormDataFile") || die "Could not open $TmpFormDataFile : $!.";
661  my $HaveAlreadyArgument = 0;
662  foreach my $Key (keys %Fields) {
663    if ($HaveAlreadyArgument) { print FORMFILE "\&"; }
664    print FORMFILE "$Key=$Fields{$Key}";
665    $HaveAlreadyArgument = 1;
666  }
667    close FORMFILE;
668
669  # Second step of login. The form is provided as a curl --data argumetn.
670    Display("Logging in.\n",MinVerbosity => 1);
671
672    ($LoginPageAsString,$GetPageUrl) = GetPage(Url => $LoginUrl,CurlDataArg => "\@$TmpFormDataFile",FollowForward => 1);
673  # XXX JDLA This is old and needs to be checked. But for the moment I
674  # don't care for bad password notices.
675    if ($LoginPageAsString =~ /password is incorrect/i) {
676    # Bug correction : Darren Quirk !
677    if ($CorrectDarrenQuirk) {
678      $CorrectDarrenQuirk = 0; # Avoid looping on *really* wrong password.
679      Display("Recycling for the 'Darren Quirk'.\n",MinVerbosity=>10);
680      goto REENTRY_DARREN_QUIRK
681    }
682        die("There was an error logging in. Please check that your username and password are correct.");
683    }
684
685    if ($LoginPageAsString !~ m/window\.location\.replace\(\"(.*)\"\);/i &&
686      $LoginPageAsString !~ m/<meta http-equiv=\"REFRESH\" content=\"0;\sURL=(.*)\"></i) {
687        die("Hotmail's login structure has changed! (redirloc).");
688  }
689    $LoginUrl = $1;
690
691  Display("LoginUrl 2 : '$LoginUrl'.\n",MinVerbosity => 10);
692  # Following the redirect : Third step of login.
693    Display("Following redirect.\n",MinVerbosity => 2);
694  ($LoginPageAsString,$GetPageUrl) = GetPage(Url => $LoginUrl,FollowForward => 1);
695
696  $LoginUrl = $GetPageUrl;
697
698  if ($LoginUrl !~ m/(http[s]?:\/\/([^\/]+\/)+)/) {
699    die "Could not detect BaseUrl.";
700  }
701  $BaseUrl = $1;
702  $NParameter = "";
703  if ($LoginUrl =~ m/(n=\d+)/) {
704    $NParameter = $1;
705  }
706  Display("LoginUrl : $LoginUrl.\n",MinVerbosity => 10);
707  Display("BaseUrl : $BaseUrl.\n",MinVerbosity => 10);
708  Display("NParameter : $NParameter.\n",MinVerbosity => 10);
709
710  # At this moment we assume we are logged in, but there should be some 'markers' to
711  # check this reasonably.
712
713  my $LoggedIn = 0;
714  if ($LoginPageAsString =~ m/href=\"ManageFoldersLight.aspx/) {
715    $LoggedIn = 1;
716  } elsif ($LoginPageAsString =~ m/MSNPlatform\/browsercompat.js/) {
717    $LoggedIn = 1;
718  }
719
720  die "Could not log in. Maybe structure has changes or was not foreseen." unless $LoggedIn;
721
722    Display("Got MainPage.\n",MinVerbosity => 1);
723}
724
725########################################################################################################################
726#
727# Search for Cookie in the CookiesFile.
728# Argument : The cookie to be found.
729# Returns its value.
730#
731########################################################################################################################
732
733sub FindCookie($) {
734  my ($CookieToFind) = @_;
735  open (COOKIES,$TmpCookiesFile) || die "Could not open '$TmpCookiesFile'.";
736  while (<COOKIES>) {
737    chomp;
738    next if m/^#/; # Comment
739    next if m/^$/; # Empty line.
740    my @SplittedLine = split /\t/;
741    if ($SplittedLine[5] eq $CookieToFind) {
742      close COOKIES;
743      return $SplittedLine[6];
744    }
745  }
746  close COOKIES;
747  return "";
748}
749
750########################################################################################################################
751#
752# Move the email message to a folder.
753# MessageIdx and FolderName as argument.
754#
755########################################################################################################################
756
757sub MoveToFolder($$$) {
758  my ($MessageIdx,$TargetFolderName,$SourceFolderIdx) = @_;
759  my $MessageId = $MessagesId[$MessageIdx];
760  my $MessageAd = $MessagesAd[$MessageIdx];
761
762  # Find out which folder (the index in @FolderIds) is meant.
763  my $TargetFolderIdx = 0;
764  my $TargetFolderFound = 0;
765  while ((not $TargetFolderFound) && $TargetFolderIdx<$NrFolders) {
766    if (lc $TargetFolderName eq lc $FolderNames[$TargetFolderIdx]) {
767      $TargetFolderFound = 1;
768    } else {
769      $TargetFolderIdx++;
770    }
771  }
772
773  # Let's die the hard way if we do not find that folder.
774  die "Folder with name '$TargetFolderName' used in MoveToFolder could not be located." unless $TargetFolderFound;
775
776    Display("Moving email message to folder '$TargetFolderName'.\n",MinVerbosity => 1);
777
778  my $ToBox = $FolderIds[$TargetFolderIdx];
779  my $FromBox = $FolderIds[$SourceFolderIdx];
780  my $MT = FindCookie("mt");
781  my $Url = "${BaseUrl}$RequestHandler?cnmn=Microsoft.Msn.Hotmail.Ui.Fpp.MailBox.MoveMessages&".
782                  "a=$SessionId&".
783                  "au=$AuthUser";
784  my $PostData = "cn=Microsoft.Msn.Hotmail.Ui.Fpp.MailBox&".
785                 "d=".uri_escape("\"$FromBox\",\"$ToBox\",[\"$MessageId\"],[{\"$MessageAd$FromBox\",null}],null,null,0,false,Date,false,true")."&".
786                 "mn=MoveMessages&".
787                 "mt=$MT&".
788                 "v=1";
789
790  # Do The move ...
791    my ($EmailPageAsString,$GetPageUrl) = GetPage(Url => $Url,CurlDataArg => $PostData);
792}
793
794########################################################################################################################
795#
796# Delete the message.
797# MessageIdx as argument.
798#
799########################################################################################################################
800
801sub DeleteMessage($$) {
802  my ($MessageIdx,$SourceFolderIdx) = @_;
803  my $MessageId = $MessagesId[$MessageIdx];
804  my $MessageAd = $MessagesAd[$MessageIdx];
805
806    Display("Deleting email message.\n",MinVerbosity => 1);
807
808  my $FromBox = $FolderIds[$SourceFolderIdx];
809
810  my $MT = FindCookie("mt");
811  my $Url = "${BaseUrl}$RequestHandler?cnmn=Microsoft.Msn.Hotmail.Ui.Fpp.MailBox.MoveMessages&".
812                  "a=$SessionId&".
813                  "au=$AuthUser";
814  my $PostData = "cn=Microsoft.Msn.Hotmail.Ui.Fpp.MailBox&".
815                 "d=".uri_escape("\"$FromBox\",\"$TrashFolderId\",[\"$MessageId\"],[{\"$MessageAd$FromBox\",null}],null,null,0,false,Date,false,true")."&".
816                 "mn=MoveMessages&".
817                 "mt=$MT&".
818                 "v=1";
819
820  # Do The Delete ...
821    my ($EmailPageAsString,$GetPageUrl) = GetPage(Url => $Url,CurlDataArg => $PostData);
822}
823
824########################################################################################################################
825#
826# Mark the email message as read
827# MessageIdx as argument.
828#
829########################################################################################################################
830
831sub MarkRead($) {
832  my ($MessageIdx) = @_;
833  my $MessageId = $MessagesId[$MessageIdx];
834
835    Display("Marking email message as read.\n",MinVerbosity => 1);
836
837  my $Url = "${BaseUrl}$RequestHandler?cnmn=Microsoft.Msn.Hotmail.Ui.Fpp.MailBox.MarkMessages&".
838                  "a=$SessionId&".
839                  "au=$AuthUser";
840  my $MT = FindCookie("mt");
841  my $PostData = "cn=Microsoft.Msn.Hotmail.Ui.Fpp.MailBox&".
842                 "d=true%2C%5B%22$MessageId%22%5D&".
843                 "mn=MarkMessages&".
844                       "mt=$MT&".
845                 "v=1";
846
847  # Mark as Read ...
848  my ($EmailPageAsString,$GetPageUrl) = GetPage(Url => $Url,CurlDataArg => $PostData);
849}
850
851########################################################################################################################
852#
853# Return the email message (mbox format) as one big string.
854# MessageIdx and FolderName as argument.
855#
856########################################################################################################################
857
858sub GetEmail($$) {
859  my ($MessageIdx,$FolderName) = @_;
860  my $MessageId = $MessagesId[$MessageIdx];
861
862    Display("Getting email message.\n",MinVerbosity => 1);
863
864  my $Url = "${BaseUrl}GetMessageSource.aspx?msgid=$MessageId";
865    my ($EmailPageAsString,$GetPageUrl) = GetPage(Url => $Url,FollowForward => 1);
866
867  $EmailPageAsString =~ s/^[\s\n]*//;
868  $EmailPageAsString = HtmlUnescape($EmailPageAsString); # Strips all HTML artifacts from the message body.
869  $EmailPageAsString =~ s/\r\n/\n/gs; # Force unix line endings.
870
871  if ($AllowUncompletePreTag == 0) {
872      if ($EmailPageAsString !~ /<pre>[\s\n]*(.*?)<\/pre>/si) {
873          die "Unable to download email message.";
874      }
875    $EmailPageAsString = $1;
876  } else {
877      if ($EmailPageAsString !~ /<pre>[\s\n]*(.*?)<[^<]+$/si) {
878          die "Unable to download email message.";
879      }
880    $EmailPageAsString = $1;
881  }
882
883  # Fallback envelope sender and date, case it would not be in the message.
884    my $FromAddress = "$Login\@$Domain";
885    my $FromDate = scalar gmtime;
886
887    # Strip "From whoever" when found on the first line- the format is wrong for mbox files anyway.
888    if ($EmailPageAsString =~ s/^From ([^ ]*) [^\n]*\n//s) {
889    $FromAddress = $1;
890  } elsif ($EmailPageAsString =~ m/^From:[^<]*<([^>]*)>/m) {
891    $FromAddress = $1;
892  }
893
894    # Apply >From quoting
895    $EmailPageAsString =~ s/^From ([^\n]*)\n/>From $1/gm;
896
897    # If an mboxheader was desired, make up one
898    if ($EmailPageAsString =~ m/^\t (\w+), (\d+) (\w+) (\d+) (\d+):(\d+):(\d+) ([+-]?.+)/m) {
899        my $DayOfWeek = $1;
900        my $Month = $3;
901        my $Day = $2;
902        my $Hour = $5;
903        my $Minute = $6;
904        my $Second = $7;
905        my $Year = $4;
906        my $TimeZone = $8;
907
908        # Put date in mboxheader in UTC time
909        $Hour -= $TimeZone;
910        while ($Hour < 0) { $Hour += 24; }
911        while ($Hour > 23) { $Hour -= 24; }
912
913        $FromDate = sprintf ("%s %s %02d %02d:%02d:%02d %d",$DayOfWeek,$Month,$Day,$Hour,$Minute,$Second,$Year);
914    }
915
916    # Add an mbox-compatible header
917  # And add some identifying headers.
918    $EmailPageAsString =~ s/^/From $FromAddress $FromDate\nX-$ProgramName-Version: $Revision\nX-$ProgramName-Folder: $FolderName\nX-$ProgramName-User: $Login\n/;
919
920    return $EmailPageAsString;
921}
922
923########################################################################################################################
924#
925# Get the messages from the folder with Idx as argument.
926#
927########################################################################################################################
928
929sub GetMessagesFromFolder($) {
930  my ($FolderIdx) = @_;
931  my $FolderName = $FolderNames[$FolderIdx];
932  my $FolderId = $FolderIds[$FolderIdx];
933  my $ReportedNrMessages = $FolderNrMessages[$FolderIdx];
934
935    Display("Loading folder '$FolderName'.\n",MinVerbosity => 1);
936
937    my $Page = 0;
938    my $StillPageToGo = 1;
939
940  my $pnAm = "";
941  my $pnAd = "";
942
943    my $PageAsString;
944  my $GetPageUrl;
945
946  # Reinitialize the global variable back to 0.
947  $NrMessagesDetected = 0;
948  $NrMessagesUnread = 0;
949
950    while ($StillPageToGo) {
951        $StillPageToGo = 0;
952        $Page++;
953
954    Display("Handling page $Page.\n",MinVerbosity => 2);
955
956    my $MT = FindCookie("mt");
957    if ($RequestHandler) {
958      my $Url = "${BaseUrl}$RequestHandler?cnmn=Microsoft.Msn.Hotmail.Ui.Fpp.MailBox.GetInboxData&".
959                      "a=$SessionId&".
960                      "au=$AuthUser&".
961                "ptid=0";
962      my $PostData = "";
963      if ($Page == 1) {
964        $PostData = "cn=Microsoft.Msn.Hotmail.Ui.Fpp.MailBox".
965                    "&".
966                    "mn=GetInboxData".
967                    "&".
968                    "d=true,true,{".uri_escape("\"$FolderId\"").",25,0,0,Date,false,null,null,".
969                      "1,1,false,null,false,-1},false,null".
970                    "&".
971                    "v=1".
972                    "&".
973                    "mt=$MT";
974      } else {
975        $PostData = "cn=Microsoft.Msn.Hotmail.Ui.Fpp.MailBox".
976                    "&".
977                    "mn=GetInboxData".
978                    "&".
979                    "d=true,true,{".uri_escape("\"$FolderId\"").",25,NextPage,0,Date,false,".
980                      uri_escape("\"$pnAm\"") . "," .
981                      uri_escape("\"$pnAd\"") . "," .
982                      "$Page,2,false,null,false,$ReportedNrMessages},false,null".
983                    "&".
984                    "v=1".
985                    "&".
986                    "mt=$MT";
987      }
988
989        ($PageAsString,$GetPageUrl) = GetPage(Url => $Url,CurlDataArg => $PostData);
990
991      # XXX JDLA ???
992      # For God knows which reason all of the " are now suddenly \" in the html output ...
993      # Well in fact it is no html output, it is one big argument of a javascript. Let's do some substitutions to help.
994      $PageAsString =~ s/\\\"/\"/g;
995      $PageAsString =~ s/\\\"/\"/g;
996      $PageAsString =~ s/\\r/\r/g;
997      $PageAsString =~ s/\\n/\n/g;
998    } else {
999      # First time : not yet 'RequestHandler'
1000      Display("Getting session id, request handler, and other data necessary for requests.\n", MinVerbosity =>2);
1001      my $PageUrl = $BaseUrl.$FolderHrefs[$FolderIdx];
1002      ($PageAsString,$GetPageUrl) = GetPage(Url => $PageUrl,FollowForward => 1);
1003
1004      # get the ID for the trash folder
1005      $TrashFolderId = $PageAsString;
1006      $TrashFolderId =~ m/sysFldrs\s*?:\s*?{\s*?trashFid\s*?:\s*?\"(.*?)\".*?}/si;
1007      $TrashFolderId = $1;
1008
1009      # get the session variables as well as the request handler
1010      $PageAsString =~ m/fppCfg\s*?:\s*?{\s*?RequestHandler\s*?:\s*?\"(.*?)\".*?SessionId\s*?:\s*?\"(.*?)\".*?AuthUser\s*?:\s*?\"(.*?)\".*?}/si;
1011      $RequestHandler = $1;
1012      $SessionId = $2;
1013      $AuthUser = $3;
1014      die "Could not find RequestHandler." unless $RequestHandler;
1015    }
1016
1017    # To start with we limit us to a MessagesArea between
1018    # <table class="dItemListContentTable"..> ... </table..>
1019    if ($PageAsString !~ m/<table class=\"d?ItemListContentTable[^>]*>(.*?)<\/table/si) {
1020      die "Could not correctly parse the messages table.";
1021    }
1022    my $MessagesArea = $1;
1023
1024    # In this message area there's the body of the table containing messages.
1025    # <tbody ..> ... </tbody..>
1026    if ($MessagesArea !~ m/<tbody\s*.*?>(.*?)<\/tbody\s*>/si) {
1027      die "Could not correctly parse the messages table.";
1028    }
1029    $MessagesArea = $1;
1030
1031    # MessagesArea now contains the body of the messages table.
1032    # Table rows <tr ..> .. : Description of the messages.
1033    # The class=".." tag hints on Unread or not.
1034    while ($MessagesArea =~ m/<tr(.*?)>/si) {
1035      $MessagesArea = $';
1036      my $RowAttributes = $1;
1037
1038      if ($1 =~ m/ContentItemUnread/i) {
1039        $MessagesRead[$NrMessagesDetected] = 0;
1040        $NrMessagesUnread++;
1041      } else {
1042        $MessagesRead[$NrMessagesDetected] = 1;
1043      }
1044
1045      $RowAttributes =~ m/id=\"(.*?)\".*?mad=\"(.*?)\"/si;
1046      my $MessageId = $1;
1047      my $MessageAd = $2;
1048
1049      # Goto 5th column.(to get the from)
1050      my $TdLine = "";
1051      for (my $Idx=0;$Idx<4;$Idx++) {
1052        $MessagesArea =~ m/<td(.*?)>(.*?)<\/td\s*>/i;
1053        $MessagesArea = $';
1054        $TdLine = $2;
1055      }
1056      if ($TdLine !~ m/class=\"(truncate)?from\">(<[^>]+>)?(.*?)<[^>]+>/si) {
1057        die "Parse error for 'from'.";
1058      }
1059      my $From = HtmlUnescape($3);
1060      Display("From '$From'.\n",MinVerbosity => 10);
1061      $MessagesFrom[$NrMessagesDetected] = $From;
1062
1063      # Further to the subject column. There we pick up also the href of the message.
1064      $MessagesArea =~ m/<td(.*?)>(.*?)<\/td\s*>/i;
1065      $MessagesArea = $';
1066      $TdLine = $2;
1067      if ($TdLine !~ m/<a href=\"(.*?)\"\s*>(.*?)<\/a>/si) {
1068        die "Parse error for 'subject'.";
1069      }
1070      my $Subject = HtmlUnescape($2);
1071      Display("Subject '$Subject'.\n",MinVerbosity => 10);
1072      Display("MessageId '$MessageId'.\n",MinVerbosity => 10);
1073      Display("MessageAd '$MessageAd'.\n",MinVerbosity => 10);
1074      Display("Read '$MessagesRead[$NrMessagesDetected]'.\n",MinVerbosity => 10);
1075
1076      $MessagesSubject[$NrMessagesDetected] = $Subject;
1077      $MessagesId[$NrMessagesDetected] = $MessageId;
1078      $MessagesAd[$NrMessagesDetected] = $MessageAd;
1079
1080      $NrMessagesDetected++;
1081    }
1082
1083    Display("Total messages reported : $ReportedNrMessages.\n" .
1084            "Nr messages detected : $NrMessagesDetected.\n" ,
1085             MinVerbosity => 10);
1086
1087    # If the number of messages we detected already is smaller than the
1088    # reported total , we still have to look for another page and reloop.
1089    if ($NrMessagesDetected < $ReportedNrMessages) {
1090      $StillPageToGo = 1;
1091      Display("Search for one more page.\n",MinVerbosity => 10);
1092      # Search for 'next page' href
1093      my $NextPageAd = "";
1094      if ($PageAsString =~
1095          m/<li([^>]*)>\s*<a href=\"([^\"]+)\"[^>]*><img src=\"[^\"]*\" class=\"i_nextpage\".*?><\/a>/si) {
1096        $NextPageAd = $1;
1097      }
1098      die "Could not find an expected next page href. Probably page structure changed." unless $NextPageAd;
1099
1100      if ($NextPageAd !~ m /pnAm=\"([^\"]*)/) {
1101        die "Could not find pnAm in '$NextPageAd'.";
1102      }
1103      $pnAm = $1;
1104
1105      if ($NextPageAd !~ m /pnAd=\"([^\"]*)/) {
1106        die "Could not find pnAd in '$NextPageAd'.";
1107      }
1108      $pnAd = HtmlUnescape($1);
1109      $pnAd =~ s/\:/\\\:/g; # XXX JDLA seems necessary ...
1110
1111      Display("Next page Ad : '$pnAd'.\n",MinVerbosity => 10);
1112      Display("Next page Am : '$pnAm'.\n",MinVerbosity => 10);
1113    }
1114    }
1115}
1116
1117########################################################################################################################
1118#
1119# Process the messages retrieved from a folder.
1120# Acts on global variables @Messages ...
1121# It just takes FolderIdx for knowing the name. (and now also for the MoveToFolder/Delete command)
1122#
1123########################################################################################################################
1124
1125sub ProcessMessagesFromFolder ($) {
1126  my ($FolderIdx) = @_;
1127  my $FolderName = $FolderNames[$FolderIdx];
1128  # Now let's run through all detected messages ..
1129  my $MessageIdx;
1130  for ($MessageIdx=0;$MessageIdx<$NrMessagesDetected;$MessageIdx++) {
1131    if ($DownloadedIdsFile) {
1132      # First we check and or create the file with the downloaded Ids.
1133      if (not -e $DownloadedIdsFile) {
1134        open (DOWNLOADED,">$DownloadedIdsFile") || die "Could not open $DownloadedIdsFile : $!.";
1135        print DOWNLOADED "-- This is an automatically generated file by $0 containing the id of downloaded messages\n";
1136        close (DOWNLOADED);
1137      }
1138
1139      # Run through the downloaded Ids to check if we still have to download.
1140      my $HaveMessageAlready = 0;
1141      open (DOWNLOADED,"$DownloadedIdsFile") || die "Could not open $DownloadedIdsFile : $!.";
1142      while(my $TmpId = <DOWNLOADED>) {
1143        chomp ($TmpId);
1144        if (uc($MessagesId[$MessageIdx]) eq uc($TmpId)) {
1145          $HaveMessageAlready = 1;
1146          Display("The message $MessageIdx with id '$TmpId' is already downloaded.\n",MinVerbosity => 10);
1147          last;
1148        }
1149      }
1150      close (DOWNLOADED);
1151
1152      # All with this message if we downloaded already.
1153      next if ($HaveMessageAlready);
1154    }
1155
1156    next if ( ($FetchOnlyUnread =~ m/Yes/i) && ($MessagesRead[$MessageIdx] == 1) );
1157
1158    # Identifying a bit the message for the log.
1159    Display("Handling mail\n".
1160            " from : '$MessagesFrom[$MessageIdx]'\n".
1161            " subject : '$MessagesSubject[$MessageIdx]'\n",MinVerbosity => 1);
1162
1163    # JDLA getEmail , provided that HaveMessageAlready was not set.
1164    my $Message = GetEmail($MessageIdx,$FolderName);
1165
1166    # Pipe it through a processor such as procmail.
1167    Display("Sending mail to '$MailProcessor'.\n",MinVerbosity => 1);
1168        open PR,"|$MailProcessor";
1169        print PR $Message;
1170        close PR || die "Sending mail to '$MailProcessor' did not succeed. See error log.";
1171
1172    if ($DownloadedIdsFile) {
1173      # We don't have it yet. Add it to the downloaded.
1174      open (DOWNLOADED,">>$DownloadedIdsFile") || die "Could not open $DownloadedIdsFile : $!.";
1175      print DOWNLOADED "$MessagesId[$MessageIdx]\n";
1176      close (DOWNLOADED);
1177    }
1178
1179    # And maybe we have to mark it read too ?
1180    if ($MarkRead =~ m/^Yes$/i) {
1181      MarkRead($MessageIdx);
1182    }
1183
1184    # Maybe we even have to move it !
1185    if ($MoveToFolder ne "") {
1186
1187      # If MoveToFolder is of the format @FileName, get the folder name from that FileName.
1188      if ($MoveToFolder =~ m/^@(.*)$/) {
1189        my $MoveToFolderName = $1;
1190        open(IN,$MoveToFolderName) || die "Could not open '$MoveToFolderName' : $!";
1191        $MoveToFolder = <IN>;
1192        chomp $MoveToFolder;
1193        close(IN);
1194      }
1195
1196      # Do the move.
1197      MoveToFolder($MessageIdx,$MoveToFolder,$FolderIdx);
1198    }
1199
1200    # Or maybe we have to remove it.
1201    if ($Delete =~ m/^Yes$/i) {
1202      DeleteMessage($MessageIdx,$FolderIdx);
1203    }
1204
1205        Display("Done.\n",MinVerbosity => 1);
1206    }
1207}
1208
1209########################################################################################################################
1210#
1211# Get a list of the folders we have to deal with and parse them one by one.
1212#
1213########################################################################################################################
1214
1215sub GetFolders() {
1216  my ($FolderPageAsString,$GetPageUrl) = GetPage(Url => "${BaseUrl}ManageFoldersLight.aspx?$NParameter",
1217                                                 FollowForward => 1);
1218  if ($FolderPageAsString =~ m/Internal Server Error/i) {
1219    die "Internal Server Error reported. Page structure might have changed.";
1220  }
1221  # Scan the line for all folders, their href and title.
1222  # NrFolders on the fly;
1223  while ($FolderPageAsString =~
1224         m/<td class=\"d?ManageFoldersFolderNameCol\"><a\s*href=\"([^\"]*)\"\s*>(.*?)<\/a>\s*<\/td>\s*<td class=\"d?ManageFoldersTotalCountCol[^\"]*\">(\d+)<\/td>/gc) {
1225    $FolderHrefs[$NrFolders] = $1;
1226    $FolderNames[$NrFolders] = HtmlUnescape($2);
1227    $FolderNrMessages[$NrFolders] = $3;
1228    if ( $FolderHrefs[$NrFolders] !~ m/FolderID=([^&]*)/ ) {
1229       die "Could not detect FolderId.";
1230    }
1231    $FolderIds[$NrFolders] = $1;
1232
1233    Display(
1234     "Folder $NrFolders - $FolderIds[$NrFolders] - $FolderNames[$NrFolders] - $FolderNrMessages[$NrFolders].\n",
1235      MinVerbosity => 10);
1236    $NrFolders++;
1237  }
1238  die "No folders detected. Likely the page structure has changed." unless $NrFolders;
1239}
1240
1241########################################################################################################################
1242#
1243# The 'main' program.
1244#
1245########################################################################################################################
1246
1247# Don't allow others to read our temp files
1248umask(077);
1249# The temporary directory creation.
1250mkdir($TmpDir) || die "Could not create $TmpDir : $!.";
1251
1252ParseArgs();
1253ParseConfig();
1254DisplayIntroText();
1255Login();
1256GetFolders();
1257
1258for (my $FolderIdx=0;$FolderIdx<$NrFolders;$FolderIdx++) {
1259  next if (scalar keys %FoldersToProcess && not exists $FoldersToProcess{lc $FolderNames[$FolderIdx]});
1260  Display("\nProcessing folder $FolderNames[$FolderIdx].\n",MinVerbosity => 1);
1261  GetMessagesFromFolder($FolderIdx);
1262  Display("$NrMessagesDetected/$NrMessagesUnread Messages/Unread.\n",MinVerbosity => 1);
1263  ProcessMessagesFromFolder($FolderIdx); # Takes no arguments, works on globals. FolderIdx just for name calculation.
1264}
1265Display("All done.\n",MinVerbosity => 1);
1266CleanTempFiles();
1267
1268exit(0);
1269
1270########################################################################################################################
1271#
1272# $Log: GetLive.pl,v $
1273# Revision 1.43 2008/07/05 19:55:41 jdla
1274# Bug 1962937 : Could not correctly parse the messages table
1275# (after MS started changing things again around 1/7/2008)
1276#
1277# Revision 1.42 2008/03/11 19:32:11 jdla
1278# Corrected stupidity (even did not compile) on previous change.
1279#
1280# Revision 1.41 2008/03/07 22:23:01 jdla
1281# Bug 1909801 : Locale does not work in Windows.
1282#
1283# Revision 1.40 2008/02/02 17:43:30 jdla
1284# Bug 1881842 : Does not handle folder names containing non-ASCII characters
1285#
1286# Revision 1.39 2008/01/19 18:44:32 jdla
1287# Bug 1875392 : Login on msn.com does fail !
1288#
1289# Revision 1.38 2008/01/19 12:30:55 jdla
1290# Bug 1871076 : GetLive died with Unexpected HTTP status : '100'
1291#
1292# Revision 1.37 2007/12/02 14:38:46 jdla
1293# *) Feature 1778902 : deletewhenread=yes option
1294#
1295# Revision 1.36 2007/12/02 11:15:40 jdla
1296# *) Feature 1792688 : Option to get a count of unread messages only
1297#
1298# Revision 1.35 2007/12/02 09:52:55 jdla
1299# *) Bug 1796107 : HTTP/500 etc should be catched.
1300#
1301# Revision 1.34 2007/11/11 19:46:31 jdla
1302# Merged in erroneously created branch 1.33.2.1.
1303#
1304# Revision 1.33.2.1 2007/11/11 19:39:05 jdla
1305# *) Bug 830063 : Doesn't work anymore on some accounts.
1306#
1307# Revision 1.33 2007/09/08 18:21:28 jdla
1308#
1309# *) Bug 1784876 : Command line parsing error.
1310# *) Bug 1789899 : Unable to Download.
1311#
1312# Revision 1.32 2007/09/04 21:14:39 jdla
1313# [ 1784876 ] bug in command line argument parser
1314#
1315# Revision 1.31 2007/08/24 17:16:54 jdla
1316# *) Bug 1780285 : MARK READ
1317#
1318# Revision 1.30 2007/08/23 21:40:07 jdla
1319# *) Bug 1779371 : Manageforlderslight error
1320#
1321# Revision 1.29 2007/08/22 21:28:01 jdla
1322# *) Bug 1779788 : Some Accounts do not work.
1323#
1324# Revision 1.28 2007/08/21 21:22:01 jdla
1325#
1326# *) Revamping to catch up with MS changing the login to live login.
1327# From now on only supports 'Live' boxes.
1328# Please convert old ones. It's lossless.
1329#
1330# Revision 1.27 2007/08/18 07:52:43 jdla
1331#
1332# *) Bug 1774546 (second part, because in fact two unrelated bugs
1333# were entered into the same) :
1334# Live or dead: Could not find expected url
1335# (After change of interface by MS)
1336#
1337# Revision 1.26 2007/08/16 12:56:36 jdla
1338#
1339# *) Bug 1774546 : Live or dead: Could not find expected url.
1340# (After change of interface by MS)
1341#
1342# Revision 1.25 2007/08/04 19:49:16 jdla
1343# *) Changed Curl quoting to support Windows (thx to 'gharkink').
1344# *) Adapted SmtpForward.pl (also thx to 'gharkink').
1345# *) Added alternate SmtpAuthForward.pl (thx to 'runemaagensen').
1346# *) Update manual with above (and the info on working versions)
1347#
1348# Revision 1.24 2007/07/29 14:35:38 jdla
1349#
1350# *) Bug 1763128 : msn.com problems : See submitted patch 1758859
1351# *) Inclusion of sample SmtpForward.pl in the distribution.
1352#
1353# Revision 1.23 2007/06/24 17:37:41 jdla
1354#
1355# *) Bug 1742447 : Could not find expected url.
1356# (After change of interface by MS)
1357# *) Bug 1742493 : GetLive doesn't die on wrong 'MailProcessor'.
1358#
1359# Revision 1.22 2007/06/19 20:30:26 jdla
1360# *) Bug 1739263 : --verbosity 0 should be silent.
1361# *) Request 1724728 : only fetch unread messages w/o id file
1362#
1363# Revision 1.21 2007/05/24 19:13:03 jdla
1364#
1365# *) Bug 1722346 : MoveToFolder : sometimes read , sometimes not read.
1366#
1367# Revision 1.20 2007/05/23 22:02:18 jdla
1368# *) Bug 1722346 : MoveToFolder : sometimes read, sometimes not read.
1369# Now for sure (and thanks to a tool Live Http Headers in Mozilla)
1370# solved decently for the Live branch.
1371#
1372# Revision 1.19 2007/05/22 19:49:07 jdla
1373# *) Bug 1722346 : MoveToFolder : sometimes read , sometimes not read.
1374# Solved (I think ...) for the 'Dead' (old gotmail) mode.
1375#
1376# Revision 1.18 2007/05/20 18:53:47 jdla
1377# *) MoveToFolder now possible on downloading.
1378#
1379# Revision 1.17 2007/05/20 12:45:34 jdla
1380# Merged in the 1.16.2.1 that was by mistake done on a the release branch.
1381#
1382# Revision 1.16.2.1 2007/05/20 12:39:34 jdla
1383# *) MarkRead is now possible on downloading.
1384#
1385# Revision 1.16 2007/05/18 17:22:43 jdla
1386# *) Request 1721287 : Folder selection
1387#
1388# Revision 1.15 2007/05/18 14:59:21 jdla
1389# *) Bug 1719819 : Improve error message if Downloaded not specified.
1390#
1391# Revision 1.14 2007/05/18 14:10:40 jdla
1392# *) After the problem of Alex [dahaas] in which gotmail (the predecessor
1393# of GetLive) was not able to correctly load his account, an overhaul
1394# was made for correcting the counting of the messages per folder and
1395# for detection of the correct NextPage url (page=n&wo=...) in his case.
1396# Confirmed working for him and no regression for me.
1397#
1398# Revision 1.13.2.7 2007/05/18 12:19:12 jdla
1399# Overhaul in the detection of the number of messages per folder.
1400# Now as per suggesion of Alex [Dahaas] (dahaas@hotmail.com) based
1401# on the 'Manage Folders' function that is in Hotmail (the old and
1402# the live one).
1403#
1404# Revision 1.13.2.6 2007/05/18 08:10:43 jdla
1405# Further moving around of debug output for Alex' problem.
1406#
1407# Revision 1.13.2.5 2007/05/18 07:37:18 jdla
1408# Additional debug output ...
1409#
1410# Revision 1.13.2.4 2007/05/16 15:04:22 jdla
1411# Removed wrong debugoutput in a non-matched branch of if statement.
1412#
1413# Revision 1.13.2.3 2007/05/16 14:06:45 jdla
1414# Some additional debug info and some further adaptation of the pattern
1415# matching for NrMessages detection.
1416#
1417# Revision 1.13.2.2 2007/05/16 13:12:57 jdla
1418# Some improved debug info.
1419# NextPage search : corrected non-greedy search to negated class search.
1420#
1421# Revision 1.13.2.1 2007/05/15 21:03:56 jdla
1422# Try solving a stubborn problem coming from gotmail :
1423# [ 1714743 ] Gotmail fails to download if box contains >100 messages
1424#
1425# Revision 1.13 2007/05/14 17:29:31 jdla
1426# *) Support 1717590 : error message => Classic named Dead now.
1427#
1428# Revision 1.12 2007/05/12 09:47:55 jdla
1429# *) Support 1717590 : error message => Improved error message.
1430#
1431# Revision 1.11 2007/05/07 18:27:52 jdla
1432# *) Bug 1714417 : execution fails if the config file name contains a dot
1433#
1434# Revision 1.10 2007/05/05 11:48:42 jdla
1435#
1436# *) Bug 1713304 : Strange characters in 'Processing folder Verwijderd'.
1437#
1438# Revision 1.9 2007/05/04 19:00:57 jdla
1439#
1440# *) Bug 1712959 : GetLive chokes on hotmail folders with 'Concepts' in it.
1441# *) Bug 1712958 : File with Ids incompatible between gotmail and GetLive.
1442#
1443# Revision 1.8 2007/05/02 22:01:23 jdla
1444# Comparison of downloaded Ids on a case independent way.
1445#
1446# Revision 1.7 2007/04/22 15:32:10 jdla
1447# Changed some MinVerbosity settings.
1448# Spelling error corrected.
1449# Reported Messages calculation only on first page !
1450#
1451# Revision 1.6 2007/04/22 10:17:19 jdla
1452# Corrected problem with fetching unread mail on Classic.
1453# (due to line colour attribute).
1454# Corrected problem of one page mailboxes by assuming that the
1455# number of reported messages equals the number of found messages.
1456# (but still warning on the situation to crosscheck)
1457#
1458# Revision 1.5 2007/04/20 22:24:28 jdla
1459# Added Name keyword for release tracking.
1460#
1461# Revision 1.4 2007/04/19 20:40:30 jdla
1462# Correction of the DisplayIntroText.
1463#
1464# Revision 1.3 2007/04/19 19:04:04 jdla
1465# Added case insensitivity to Mode check.
1466# Removed a MainPageAsString redefinition that screwed up Live mailbox
1467# fetch.
1468# Added initialization of NrMessagesDetected to the
1469# GetMessagesFromFolderLive.
1470#
1471# Revision 1.2 2007/04/18 21:49:22 jdla
1472# Bug correction : Initialization of NrMessagesDetected before each
1473# folder.
1474#
1475# Revision 1.1.1.1 2007/04/18 18:58:10 jdla
1476# Initial version of GetLive
1477#
1478#
1479########################################################################################################################
1480
1481# vim:et:sw=2:ts=2:filetype=perl:columns=120:lines=50:

Archive Download the corresponding diff file

Branches:
master