tips :改成多线程的了,默认为5,只下载mp3(可改),只下载文件大于3M的(可改),wget太慢换成mytget,以后再想到什么再改吧:)
tips ag: 原来的top100,发现歌太难听,换成500就好多了:)
原始地址:http://hi.baidu.com/ximix/blog/item/7dd ... 5593a.html
代码: 全选
#!/usr/bin/perl
#Author: yisudong
#Date: Sat Mar 29 12:10:57 CST 2008
#Contact: yisudong at gmail dot com or http://hi.baidu.com/ximix
use strict;
use Thread;
use HTTP::Request::Common;
use HTTP::Status qw(is_client_error is_server_error is_redirect);
use Data::Dumper;
require LWP::UserAgent;
my $down_PATH = '/usr/tmp/mp3';
#my $url = "http://list.mp3.baidu.com/list/newhits.html?top1";
my $url = "http://list.mp3.baidu.com/topso/mp3topsong.html?top2";
my $MAX_THREADS = 5;
my $ua = new LWP::UserAgent;
$ua->agent('yisuD_Robot');
my $request = HTTP::Request->new(GET => "$url");
my $response = $ua->simple_request( $request );
my $str = $response->content;
#print "$str";
my @aa = $str =~ m/class=\"border\"\>(\d+)\.\<\/td\>[\s\S]+?href\=\"(.*)\"\starget\=_blank\>/mg;
my %bb = @aa;
foreach my $n (sort { $a <=> $b } keys %bb)
{
print "$n.$bb{$n}\n";
my $url2 = $bb{$n};
my $request = HTTP::Request->new(GET => "$url2");
my $response = $ua->simple_request( $request );
my $str = $response->content;
my @cc = $str =~ m/\<td\sclass\=tdn\>\d+\<\/td\>[\s\S]+?href\=\"(.*)\"\s+title[\s\S]+?\<td\>(\d+\.\d)\sM\<\/td\>/mg;
my %t ;
foreach my $n(0..$#cc)
{
next if($n % 2 == 0);
push @{$t{$cc[$n]}},$n-1;
}
# print Dumper(\%t);
my @t_arr = Thread->list();
my $t_num = @t_arr;
print "[$t_num]\n";
if($t_num < $MAX_THREADS)
{
Thread->new(\&down,\%t,\@cc,$n);
}
sleep(5);
}
sub down
{
my ($tt_p,$ccc_p,$n) = @_;
my %tt = %$tt_p;
my @ccc = @$ccc_p;
L:{
foreach my $nn (reverse sort keys %tt)
{
next if($nn < 3);
foreach my $nnn (@{$tt{$nn}})
{
my $url3 = $ccc[$nnn];
my $request = HTTP::Request->new(GET => "$url3");
my $response = $ua->simple_request( $request );
my $str = $response->content;
my ($song_url) = $str =~ m/\<a\shref\=\"(.*)\"\>/m;
next if($song_url !~ m/\.mp3/i);#just wanna mp3
print "$n.$nn.$nnn.$song_url\n";
# my $wget = "wget -T 300 -t 3 -q $song_url -O $down_PATH/$n.mp3";#wget单线程太慢了……
my $wget = "mytget -n 10 -c 3 -d $down_PATH -f $n.mp3 '$song_url'";
print "\t$wget\n";
if(system($wget) == 0 and (-s "$down_PATH/$n.mp3") >3000000 )
{
last L;
}
else
{
system("rm -rf $down_PATH/$n.mp3");
}
}
}
}
my $retval= Thread->self->eval();
if ($@) { warn "thread failed: $@"; } else { print "thread returned $retval\n"; }
}
exit;