凑热闹贴,没有进一步优化。- rem 从诗文txt就可知已经下载的进度
- for /l %%g in (1,1,20) do (
- wget --user-agent="Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.3) Gecko/2008092416 Firefox/3.0.3" --no-check-certificate -e robots=off -c -N -t 0 -T 10 -O index.html "http://www.52shici.com/original.php?type=%%g"
- sed -n '/text-dashed/,/pages/'{p} index.html>>teste.ini
- findstr /iv "works-info" teste.ini>>dizhi.ini
- del teste.ini
- sed -i "s/\"/\n\"\nhttp:\/\/www.52shici.com\//g" dizhi.ini
- findstr /i "php" dizhi.ini>>dizhi.inf
-
- for /f "delims=*" %%i in (dizhi.inf) do (
- wget --user-agent="Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.3) Gecko/2008092416 Firefox/3.0.3" --no-check-certificate -e robots=off -c -N -t 0 -T 10 -O wz.ini "%%i"
- iconv -c -f utf-8 -t GBK "wz.ini" >wz1.ini
- findstr /i "author" 235253.html|findstr /i "nbsp">>wz1.ini
- sed -n '/works-content/,/works-info/'{p} wz1.ini>>testw.ini
- findstr /iv "works-info" testw.ini>>zw.html
- del teste.ini
- htox32c /IP /O0 zw.html>nul 2>nul
- type zw.txt>>诗文%%g.txt
- del zw.txt
- del *.ini
- )
- del *.inf
- )
- pause
复制代码
|