[原创] Perl 下载百度新歌500 代码更新~

软件和网站开发以及相关技术探讨
回复
ximi_x
帖子: 2
注册时间: 2006-10-01 0:19

[原创] Perl 下载百度新歌500 代码更新~

#1

帖子 ximi_x » 2008-03-29 12:20

我也凑个热闹,发个perl的,写的比较仓促,不过小demo应该可以完成最基本的功能了 :D

tips :改成多线程的了,默认为5,只下载mp3(可改),只下载文件大于3M的(可改),wget太慢换成mytget,以后再想到什么再改吧:)

tips ag: 原来的top100,发现歌太难听,换成500就好多了:)

原始地址:http://hi.baidu.com/ximix/blog/item/7dd ... 5593a.html

代码: 全选

#!/usr/bin/perl
#Author: yisudong
#Date: Sat Mar 29 12:10:57 CST 2008
#Contact: yisudong at gmail dot com or http://hi.baidu.com/ximix

use strict;
use Thread;
use HTTP::Request::Common;
use HTTP::Status qw(is_client_error is_server_error is_redirect);
use Data::Dumper;
require LWP::UserAgent;

my $down_PATH = '/usr/tmp/mp3';
#my $url = "http://list.mp3.baidu.com/list/newhits.html?top1";
my $url = "http://list.mp3.baidu.com/topso/mp3topsong.html?top2";
my $MAX_THREADS = 5;

my $ua = new LWP::UserAgent;
$ua->agent('yisuD_Robot');

my $request = HTTP::Request->new(GET => "$url");
my $response = $ua->simple_request( $request );
my $str = $response->content;
#print "$str";

my @aa = $str =~ m/class=\"border\"\>(\d+)\.\<\/td\>[\s\S]+?href\=\"(.*)\"\starget\=_blank\>/mg;
my %bb = @aa;


foreach my $n (sort { $a <=> $b } keys %bb)
{
        print "$n.$bb{$n}\n";
        my $url2 = $bb{$n};
        my $request = HTTP::Request->new(GET => "$url2");
        my $response = $ua->simple_request( $request );
        my $str = $response->content;

        my @cc = $str =~ m/\<td\sclass\=tdn\>\d+\<\/td\>[\s\S]+?href\=\"(.*)\"\s+title[\s\S]+?\<td\>(\d+\.\d)\sM\<\/td\>/mg;
        my %t ;
        foreach my $n(0..$#cc)
        {
                next if($n % 2 == 0);
                push @{$t{$cc[$n]}},$n-1;
        }

#       print Dumper(\%t);

        my @t_arr = Thread->list();
        my $t_num = @t_arr;
        print "[$t_num]\n";
        if($t_num < $MAX_THREADS)
                {
                        Thread->new(\&down,\%t,\@cc,$n);
                }
        sleep(5);

}


sub down
{
my ($tt_p,$ccc_p,$n) = @_;
my %tt = %$tt_p;
my @ccc = @$ccc_p;

L:{
        foreach my $nn (reverse sort keys %tt)
        {
                next if($nn < 3);
                foreach my $nnn (@{$tt{$nn}})
                {
                        my $url3 = $ccc[$nnn];
                        my $request = HTTP::Request->new(GET => "$url3");
                        my $response = $ua->simple_request( $request );
                        my $str = $response->content;

                        my ($song_url) = $str =~ m/\<a\shref\=\"(.*)\"\>/m;
                        next if($song_url !~ m/\.mp3/i);#just wanna mp3
                        print "$n.$nn.$nnn.$song_url\n";

#                       my $wget = "wget -T 300 -t 3 -q $song_url -O $down_PATH/$n.mp3";#wget单线程太慢了……
                        my $wget = "mytget -n 10 -c 3 -d $down_PATH -f $n.mp3 '$song_url'";
                        print "\t$wget\n";
                        if(system($wget) == 0 and (-s "$down_PATH/$n.mp3") >3000000 )
                        {
                                last L; 
                        }
                        else
                        {
                                system("rm -rf $down_PATH/$n.mp3");
                        }

                }
        }
}

my $retval= Thread->self->eval();
if ($@) { warn "thread failed: $@"; } else { print "thread returned $retval\n"; }

}

exit;
ximi_x
帖子: 2
注册时间: 2006-10-01 0:19

#2

帖子 ximi_x » 2008-04-10 10:48

借楼再问个问题

关于多线程的,为什么$mutex->up后$mutex不增加呢,最后$mutex->down到0就锁住了,很奇怪啊

代码: 全选

use Thread ;
use Thread::Semaphore;

my $oldfh = select STDOUT; $| = 1; select $oldfh;

$mutex = Thread::Semaphore->new(5);

while(1)
{
my $w =$mutex->down();
print "$w\n";
Thread->new(\&xx);
sleep(1);
}
sub xx
{
print "begin:";
my $s = int(rand 3);
print "$s";
my @a = Thread->list();
my $b = @a;
print "[$b]\n";
sleep($s);
#print "end\n";
$mutex->up();
Thread->self->detach;
}
tobe
帖子: 1
注册时间: 2008-04-06 0:45

#3

帖子 tobe » 2008-07-18 8:04

专门来谢谢的
这种socket变成对于perl 也应该是一件简单的事情啊
回复