代码: 全选
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright (c) 2008 UbuntuChina <http://forum.ubuntu.org.cn>
# License: GPLv2
# Author: oneleaf <oneleaf AT gmail.com>
# hack by ct <ctqucl AT gmail.com>
# code modify by david <zwssd190 AT 163.com> 增加了关于对话框
import pygtk
import gtk
import httplib
import re
import urllib
import os
import locale
def startdownload(start):
print "程序将开始下载工作......"
global m,topid
global fsize
fsize=1 #文件大小下限(M)
allowext=['.mp3','.wma'] #允许的扩展名
topid='0'
if topid=='0':
topid='/list/newhits.html' #新歌100
elif topid=='1':
topid='/topso/mp3topsong.html' #Top500
elif topid=='2':
topid='/list/oldsong.html' #老歌经典
elif topid=='3':
topid='/list/movies.html' #电影金曲
elif topid=='4':
topid='/list/tvs.html' #电视歌曲
elif topid=='5':
topid='/minge/mp3topsong.html' #民歌精选
elif topid=='6':
topid='/xiaoyuan/mp3topsong.html' #校园歌曲
elif topid=='7':
topid='/list/liujinsuiyue.html' #流金岁月(new)
elif topid=='8':
topid='/list/yaogun.html' #摇滚地带
def getdownfileurl(url): #获取歌曲页的试听URL
url = "http://220.181.38.82/m"+url
count = url.index('" ');
url = url[:count]
tn = re.search('&tn=(.*)&word',url).group(0)
url=url.replace(tn,'&tn=baidusg,mp3%20%20&word')
print u"正在处理",url
try:
urlopen = urllib.URLopener()
fp=urlopen.open(url)
data = fp.read()
fp.close()
except IOError, errmsg:
print errmsg
expression2='"_blank">(.*)</a></li>'
url = re.search(expression2, data).group(0)[16:-9]
try:
url="http://"+urllib.quote(url)
except:pass
print u"发现 "+url
return url
def getdownurl(url): #从歌曲页抓取URL列表
#print url
urllist=[]
urllist1=[]
urllist2=[]
conn = httplib.HTTPConnection('mp3.baidu.com')
conn.request("GET",url)
response = conn.getresponse()
html=response.read()
conn.close()
expression2='http://202.108.23.172/m(.*)target'
listSentence2 = re.findall(expression2, html) #抓取链接列表
#print u"|||||||||||",listSentence2,"!!!!!!!!!"
filesize=re.findall('<td>(.*)M</td>',html) #抓取文件大小
lineno=0
while lineno<len(listSentence2):
#print listSentence2[lineno],"开始转换链接为最终下载地址......"
mp3url=getdownfileurl(listSentence2[lineno]) #转换链接为最终下载地址
if mp3url:
urllist1.append(mp3url)
else:
urllist1.append('')
lineno+=1
urllist=map(None,urllist1,filesize)
return urllist
def downmp3(url,author,name,filelist): #下载歌曲
filename=author+"-"+name;
for i in filelist:
name=unicode(i,locale.getpreferredencoding())
if name.find(filename) == 0: #忽略
print u"文件已经下载,忽略。"
return 1
urllists=getdownurl(url) #获取文件url列表
lineno=0
while lineno<len(urllists):
print u"尝试",urllists[lineno][0]
ext=urllists[lineno][0][-4:] #获取文件名后缀(最后4位)
try:
lineno+=1
print urllists[lineno-1][1] +'M'
if float(urllists[lineno-1][1])>float(fsize) : #大小符合则下载
urlopen = urllib.URLopener()
fp=urlopen.open(urllists[lineno-1][0])
data = fp.read()
fp.close()
filename=filename+ext;
file=open(filename,'w+b')
file.write(data)
file.close()
print u"下载成功!"
return 1
elif float(urllists[lineno][1])<float(fsize) : #不符则略过
print u"文件太小,忽略!"
except:
continue
return 0
def axeldownmp3(url,author,name,filelist): #使用axel下载歌曲,Ubuntu用户需要使用 sudo apt-get install axel安装.
filename=author+"-"+name;
for i in filelist:
name=unicode(i,locale.getpreferredencoding())
if name.find(filename) >= 0: #忽略
print u"%s 文件已经下载,忽略。"%filename
return 1
print u'获取文件列表'
urllists=getdownurl(url) #获取文件url列表
lineno=0
print u"获得",len(urllists),"个下载地址"
while lineno<len(urllists):
print u"尝试",urllists[lineno][0]
#print urllists
ext=urllists[lineno][0][-4:].lower() #获取文件名后缀(最后4位)
if not ext in allowext:
print u"文件扩展名 %s 名不允许,忽略"%ext
lineno+=1
continue
try:
lineno+=1
print urllists[lineno-1][1] +'M'
if float(urllists[lineno-1][1])>float(fsize) : #大小符合则下载
savefilename=filename+ext;
if os.spawnlp(os.P_WAIT,'axel','-q','-n 20','-o '+savefilename,urllists[lineno-1][0])==0:
print u"下载成功!"
return 1
elif float(urllists[lineno][1])<float(fsize) : #不符则略过
print u"文件太小,忽略!"
except:
continue
return 0
if __name__ == "__main__":
conn = httplib.HTTPConnection('list.mp3.baidu.com')
conn.request("GET",topid ) #类型
response = conn.getresponse()
html=response.read().decode('gbk')
conn.close()
expression1='border">(.*).</td>'
expression2='><a href="http://mp3.baidu.com/m(.*)</a>'
expression3='href="http://mp3.baidu.com/m(.*)</td>'
listSentence1 = re.findall(expression1, html) #编号特征
listSentence2 = re.findall(expression2, html) #歌曲名特征
listSentence3 = re.findall(expression3, html) #歌手名特征
lineno=0
while lineno<len(listSentence1):
url=re.search('(.*)target',listSentence2[lineno])
url='/m'+url.group(0)[:-8]
idno=listSentence1[lineno]
name=re.search('blank>(.*)',listSentence2[lineno])
name=name.group(0)[6:]
dirty=re.search('</A>/<A href=(.*) target=_blank>',listSentence3[lineno])
if dirty is not None : #合唱
author1=re.search('>(.*)</A>/<A',listSentence3[lineno])
author1=author1.group(0)[1:-7]
author2=re.search('/<A href=(.*)</A>',listSentence3[lineno])
author2=re.search('>(.*)<',author2.group(0))
author2=author2.group(0)[1:-1]
author=author1 + '+' + author2
elif dirty is None : #独唱
author=re.search('blank>(.*)</',listSentence3[lineno])
author=author.group(0)[6:-2]
name=name.strip()
author=author.strip()
print u"开始下载",idno,name,author,u"来自",url
filelist=os.listdir('.');
if axeldownmp3(url,author,name,filelist)==0: #判断失败
print u"下载",author,name,u'失败!'
lineno+=1
class baidump3:
__license__ = """
baidump3下载器 1.0.0 for python
请保留作者的名字的前题下,随便改,随便发布!哈哈
"""
__authors__ = ["""
Author: oneleaf <oneleaf AT gmail.com>
hack by ct <ctqucl AT gmail.com>
code modify by david <zwssd1980 AT 163.com>
增加了关于对话框
"""]
def __init__(self):
self.window = gtk.Window()
self.about_dialog = gtk.AboutDialog()
self.about_dialog.set_name("baidump3下载器")
self.about_dialog.set_version("1.0.0")
self.about_dialog.set_license(self.__license__)
self.about_dialog.set_authors(self.__authors__)
self.about_dialog.connect("response", self.aboutDialogHide)
self.about_dialog.show()
self.about_dialog.connect("destroy",self.kill)
def kill(self, event):
gtk.main_quit()
print "程序已经退出!"
def main(self):
gtk.main()
def aboutDialogHide(self, widget, event):
self.about_dialog.hide()
j = 0
i = 0
while i<1000:
j+=i
i+=1
startdownload("start")
baidump3().main()