duminică, 8 noiembrie 2015

ATELIER -> Extragerea datelor din site-uri HTML cu Perl

Acest script verifica numarul de telefon daca a fost portat pe siteul http://www.portabilitate.ro si afiseaza rezultatele intr-o fereastra terminal dupa cum se vede in imagine.
#!/usr/bin/perl
# Acest script verifica nr 0722270796 la ce retea este abonat
# pe siteul PORTABILITATE.ro
use warnings;
use strict;
# Utilizarea modului LWP::Simple
use LWP::Simple;

my $url = 'http://www.portabilitate.ro/ro-no-0722270796';
my $content = get $url;
die "Nu s-a incarcat $url" unless defined $content;
my @site = head($url);
print $site[1],$site[0], "\n";
if ($content =~ m/Numarul/s) {
 print 'Am gasit linia Numarul';
 #my $_=shift;
#<td style="padding:10px;" align="right"><span id="ctl00_cphBody_lblCurrentOperator">Operator curent:</span></td>
#<td style="color:red;padding:10px; font-weight:bold;"><a id="ctl00_cphBody_lnkOperator">RCS &amp; RDS</a></td>
 #m{<a id="ctl00_cphBody_lnkOperator">\s+</a>(\d+)} || die;
 #return $1;
}else{
 print "Nu am gasit linia";
}
if ($content =~ m/id="ctl00_cphBody_lnkOperator"/s) {
 print 'Am gasit linia Numarul';
}else{
 print "Nu am gasit linia";
} 
my( $m ) = $content =~ m/<a id="ctl00_cphBody_lnkOperator">(.*?)<\/a>/;
print "\n Am gasit Operatorul: $m" if defined $m;
# <span class="ContentTitle">
                # Numarul 0722270796 este portat
            # </span>
my ($nr)= $content =~ m/<span class="ContentTitle">(.*?)<\/span>/s;
print "\n $nr" if defined $nr;
#<a id="ctl00_cphBody_lnkOperatorInitial">VODAFONE ROMANIA</a>
my ($operator)= $content =~ m/<a id="ctl00_cphBody_lnkOperatorInitial">(.*?)<\/a>/;
print "\n Operatorul vechi: $operator";
# my @matches;
# while ($content =~ /Numarul/g) {
 # push @matches, $1;
 
# # }
# foreach my $m(@matches){
 # print $m, "\n";
# }
Resurse:
http://www.perlmonks.org/?node_id=10698
#!/usr/bin/perl

use warnings;
use strict;

use LWP::Simple;

my $url = 'http://www.portabilitate.ro/ro-no-0722270796';
my $content = get $url;
die "Nu s-a incarcat $url" unless defined $content;
my @site = head($url);
print $site[1],$site[0], "\n";
if ($content =~ m/Numarul/s) {
 print 'Am gasit linia Numarul';
 local $_=shift;
#<td style="padding:10px;" align="right"><span id="ctl00_cphBody_lblCurrentOperator">Operator curent:</span></td>
#<td style="color:red;padding:10px; font-weight:bold;"><a id="ctl00_cphBody_lnkOperator">RCS &amp; RDS</a></td>
 m{<a id="ctl00_cphBody_lnkOperator">\s+</a>(\d+)} || die;
 return $1;
}else{
 print "Nu am gasit linia";
}
if ($content =~ m/id="ctl00_cphBody_lnkOperator"/s) {
 print 'Am gasit linia Numarul';
}else{
 print "Nu am gasit linia";
}  

#!/usr/bin/perl -w

use strict;
use LWP::Simple;

my $url = "http://www.wunderground.com/cgi-bin/findweather/getForecast?"
        . "query=";
my $ca = get("${url}95472"); # Sebastopol, California
my $ma = get("${url}02140"); # Cambridge, Massachusetts

my $ca_temp = current_temp($ca);
my $ma_temp = current_temp($ma);
my $diff = $ca_temp - $ma_temp;

print $diff > 0 ? "California" : "Massachusetts";
print " is warmer by ", abs($diff), " degrees F.\n";

sub current_temp {
  local $_ = shift;
  m{<tr ><td>Temperature</td>\s+<td><b>(\d+)} || die "No temp data?";
  return $1;
}


Niciun comentariu:

Trimiteți un comentariu