这个脚本作用是从 天涯社区/天涯真我/美媚秀 上把图片地址提取出来集中到一个页面中来欣赏
运行后生成index.html.1 index.html.2这样的文件,用浏览器打开就可以了
每个页面集中20张图片地址
代码: 全选
#!/bin/bash
echo -e "Starting...\n This will take some minutes,Please wait...."
baseurl="http://cache.tianya.cn"
out="tianyamyself.shtml"
tmp="tmp.html"
dist="http://cache.tianya.cn/new/Publicforum/ArticlesList.asp?strItem=tianyamyself&idWriter=0&Key=0&Part=1&strSubItem=%C3%C0%C3%C4%D0%E3&s
trSubItem2="
result="index.html"
true > $result
true > pictmp.html
loop=1
output=tmp1.html
while true
do
if [ $loop == 0 ];then
break
fi
wget -q --timeout=30 -O $out "$dist"
iconv -c -f GB2312 -t UTF-8 -o $output $out
#rm -f $out
#mv $output $out
dist=${baseurl}`grep "下一页" $output|sed -e 's/href=/&\n/g'|sed -e '1,2d'|awk -F '>' '{print $1}'|iconv -c -f UTF-8 -t GB2312`
#echo $dist
for i in `sed -e 's/<a href=/&\n/g' $output|sed -n -e '/shtml/p'|awk -F "'" '{print $2}'`
do
wget -q --timeout=30 -O $tmp ${baseurl}$i
sed -e 's/<img src=/\n&/g' -e '/advertisement/d;/logo/d;/Google/d;' $tmp |sed -n -e '/<img src=/p'|sed -e '$d'|awk -F '"' '{print
$2}' >> pictmp.html
done
loop=`expr $loop - 1 `
done
sort -u pictmp.html >pic.html
j=1
while read line
do
echo "<img src=$line><br>">>$result
if [ `expr $j % 20` == 0 ];then
pre=$result
result=`echo $result|sed -e 's/html\..*$/html/'`.$(echo `expr $j / 20 `)
echo "<br><p align=center><a href=$result>Next</a></p>" >> $pre
fi
j=`expr $j + 1`
done < pic.html
rm -f pic.html $out $output $tmp pictmp.html
echo "Done."
echo "You can open ./index.html to look it."