Reputation: 660
use WWW::Mechanize;
use strict;
use warnings;
use LWP::Simple;
my $ctime = time();
my $Home_page = 'www.condortk.com/';
my $output_file = "www.condortk.com-$ctime";
my $url = 'http://www.condortk.com/results.php?keyword=&Buscar.x=4&Buscar.y=7';
my $m = WWW::Mechanize->new();
$m->get( $url ) or die "unable to get $url";
my $Home_Con = $m->content;
system( 'mkdir Images' ) if ( !-d "Images" );
my $next = '';
my $page = '';
while ( $Home_Con =~ m/<div class="producto"><a href="([^>]*?)"/igs ) {
#print "Loop 1\n";
$m->get( $1 );
my $list_content = $m->content;
my $img_name = '';
my $img_folder = '';
my $image = '';
my $big_image = '';
while ( $list_content =~ m/<div\s*class="nombre2"><a\s*href="([^>]*?)"/igs ) {
#print"Loop2\n";
my $desc = '';
my $desc1 = '';
my $block = $Home_page . $1;
$m->get( $1 );
my $content1 = $m->content;
if ( $content1 =~ m/id="imgproducto"><img src="([^<]*?)"/is ) {
$img_name = $1;
# print "$img_name\n";
if ( $img_name =~ m/\/\w+\/\w+\/([^>]*?)$/is ) {
$img_folder = $1;
#print "$img_folder\n";
#print "$Home_page.$img_name\n";
getstore( $Home_page . $img_name, $img_folder );
}
}
}
}
i am trying to download and save a specific images from the site. but i am not sure why i am not able get those images. I have used the right regular expression of the images.The part which i have doubt is getstore method.Need some guidance.Thanks in advance.
Upvotes: 1
Views: 2289
Reputation: 7931
The regular expressions work because of luck, they are not solid. See the 'wrong regex' comments. Also, the getstore(url, file) method needs http://host.org/dir/file as argument 1 and path/to/file.jpg as argument 2.
Here is a working version:
use WWW::Mechanize;
use strict;
use warnings;
use LWP::Simple;
my $ctime = time();
my $Home_page = 'www.condortk.com/';
my $output_file = "www.condortk.com-$ctime";
my $url = 'http://www.condortk.com/results.php?keyword=&Buscar.x=4&Buscar.y=7';
my $m = WWW::Mechanize->new();
$m->get( $url ) or die "unable to get $url";
my $Home_Con = $m->content;
system( 'mkdir Images' ) if ( ! -d "Images" );
my $next = '';
my $page = '';
while ( $Home_Con =~ m/<div class="producto"><a href="([^"]*?)"/igs ) { # <-- wrong regex
#print "Loop 1 $1 \n";
$m->get( "http://www.condortk.com/$1" ); # <-- wrong url
my $list_content = $m->content;
my $img_name = '';
my $img_folder = '';
my $image = '';
my $big_image = '';
while ( $list_content =~ m/<div\s*class="nombre2"><a\s*href="([^"]*?)"/igs ) {
#print "Loop 2 $1 \n";
my $desc = '';
my $desc1 = '';
my $block = $Home_page . $1;
$m->get( $1 );
my $content1 = $m->content;
if ( $content1 =~ m/id="imgproducto"><img src="([^<]*?)"/is ) {
$img_name = $1;
# wrong url and 'save as' parameters
if ( $img_name =~ m/\/\w+\/\w+\/(.*)$/is ) {
my $basename = $1;
my $get = "http://${Home_page}${img_name}";
my $save = "Images/$basename";
my $code = -1;
if ( ! -f $save ) {
$code = getstore($get, $save );
print "$get --> $save : $code\n";
} else {
print "Skipping $save\n";
}
#if ($code != 200) { print "$get --> Images/$save : $code\n"; }
}
}
}
}
-
$ perl dl.img.pl
http://www.condortk.com/.../60504s_725x0.jpg --> Images/Images/60504s_725x0.jpg : 200
http://www.condortk.com/.../60508s_725x0.jpg --> Images/Images/60508s_725x0.jpg : 200
http://www.condortk.com/.../60501s_725x0.jpg --> Images/Images/60501s_725x0.jpg : 200
http://www.condortk.com/.../60020s_725x0.jpg --> Images/Images/60020s_725x0.jpg : 200
Upvotes: 1
Reputation: 13792
If you are searching images, use the specific image-methods at WWW::Mechanize that may helps you to reach what you are looking for. Example:
my @images = $mech->find_all_images( url_regex => qr/productos-detalle\.php/ );
Upvotes: 2