package WWW::Hotmail; use Carp qw(croak); use base 'WWW::Mechanize'; use 5.006; use strict; use warnings; our $VERSION = '0.08'; our $croak_on_error = 0; our $errstr = ''; our $errhtml = ''; sub new { my $class = shift; # avoid complaints from M$ by using IE 6.0 my $self = $class->SUPER::new(agent => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)'); $self->cookie_jar({}); return $self; } sub login { my ($self,$email,$pass) = @_; unless ($email =~ m/\@([^.]+)\.(.+)/) { $errstr = 'You must supply full email addres as the username'; croak $errstr if $croak_on_error; $self->error2html(); return undef; } my $domain = lc("$1_$2"); my $resp = $self->get('http://www.hotmail.com/'); $resp->is_success || do { $errstr = $resp->as_string(); croak $errstr if $croak_on_error; $errhtml = $resp->error_as_HTML; return undef; }; # bypass the js detection page if ($self->{content} =~ m/hiddenform/i) { $self->form_name('hiddenform'); $self->submit(); } $self->form_name('form1'); # this SHOULD cover charter.com, compaq.net, hotmail.com, msn.com, passport.com, and webtv.net # all this java regex crap is needed just for this feature. Maybe this can be done better? if ($self->{content} =~ m#name="$domain" action="([^"]+)"#) { # current_form returns a HTML::Form obj $self->current_form()->action($1); } else { $errstr = 'hotmail format changed or email domain not used with Hotmail'; croak $errstr if $croak_on_error; $self->error2html(); return undef; } $self->field(login => $email); $self->field(passwd => $pass); $resp = $self->click('submit1'); # finally! $resp->is_success || do { $errstr = $resp->as_string; croak $errstr if $croak_on_error; $errhtml = $resp->error_as_HTML; return undef; }; $self->{content} =~ /URL=(.+)"/ or do { $errstr = 'Hotmail format changed!'; croak $errstr if $croak_on_error; $self->error2html(); return undef; }; $self->get($1); # look for the base url for the mailbox if ($self->{content} =~ m/_UM\s*=\s*"([^"]+)";?/) { $self->{_WWWHotmail_base} = $1; } elsif ($self->{content} =~ m!http://login\.passport\.net/uilogin\.srf!) { $errstr = 'Couldn\'t log in to Hotmail, username or password incorrect'; croak $errstr if $croak_on_error; $self->error2html(); return undef; } else { $errstr = 'Couldn\'t log in to Hotmail'; croak $errstr if $croak_on_error; $self->error2html(); return undef; } $self->{_WWWHotmail_logged_in} = 1; return 1; } sub messages { my $self = shift; unless (defined($self->{_WWWHotmail_logged_in})) { $errstr = 'Not logged in!'; croak $errstr if $croak_on_error; $self->error2html(); return (); } my $last_page = 1; my $i = 1; $self->{_WWWHotmail_msgs} = (); # traverse all pages while ($i <= $last_page) { # sorting avoids getting the same message twice $self->get('/cgi-bin/HoTMaiL?'.$self->{_WWWHotmail_base}."&page=$i&Sort=rDate"); # this finds the ->| link (last page) if ($i == 1 && $self->{content} =~ m/'page=(\d+)'/i) { $last_page = $1; } # replace javascript junk # and adapt it to grab 'from' AND 'subjects' # TODO this can be done better my $content = $self->content(); $content =~ s/\r|\n| //g; $content =~ s/javascript\:G\('([^']+)'\)">([^<]+)<\/a><\/td>