本帖最后由 WHY 于 2018-9-24 23:03 编辑
js 脚本,依照第4行的样子修改第5行
或者:把第4行前面的注释删掉,再删掉第5行,可以下载7个栏目- var fso = new ActiveXObject('Scripting.FileSystemObject');
- var http = new ActiveXObject('Microsoft.XMLHTTP');
-
- //var map = {'元曲':'YuanQu','诗经':'ShiJing','楚辞':'ChuCi','乐府诗集':'YueFu','其它古代':'Other','近现代诗':'XianDai','外国诗词':'ForeignPoesy'};
- var map = {'诗经':'ShiJing','楚辞':'ChuCi', '近现代诗':'XianDai'};
- var home = 'http://www.chinapoesy.com/';
-
- for (var key in map) {
- var n = 0;
- var url = home + map[key] + 'Index' + (key == '外国诗词' ? '.aspx' : '.html');
- var reg = /href='(.+?\.html)'\r?\n\s*target="_blank"/g;
- while(url != '') {
- var txt = getText(url);
- if(!n++) {
- var m1 = txt.match(/\(按照人气排名\).*?href='(.*?\.html)'/); //按照人气排名有 "更多..." ?
- txt = m1 ? getText(home + m1[1]) : txt.match(/\(按照人气排名\)[\s\S]+?<\/table>/)[0];
- }
- var m2 = txt.match(/href="\/([^"]*\.html)"[^<>]*><img src="\/Images\/Pager\/nextn\.gif"/); //有 "下一页" ?
- url = m2 ? home + m2[1] : '';
- txt = m2 ? txt.match(/>查询中 请稍后…[\s\S]+?<\/table>/)[0] : txt;
- while(arr=reg.exec(txt)) getPoesy(home + arr[1]);
- }
- }
-
- //获取网页内容
- function getText(url) {
- http.open('GET', url, false);
- http.send();
- with(new ActiveXObject('ADODB.Stream')){
- Mode = 3; Type = 1;
- Open();
- Write(http.responseBody);
- Position = 0;
- Type = 2;
- Charset = 'UTF-8';
- var s = ReadText(-1);
- }
- return s;
- }
-
- //获取诗词
- function getPoesy(url) {
- var reg = /href='(.+?\.html)'\s*target\s*=\s*"_blank"([\s\S]+?\(\d+\))/g;
- while(url != '') {
- var txt = getText(url);
- var m = txt.match(/href="\/([^"]*\.html)"[^<>]*><img src="\/Images\/Pager\/nextn\.gif"/); //有 "下一页" ?
- url = m ? home + m[1] : '';
- txt = txt.match(/>编辑它<\/a>[\s\S]+?>关于我们<\/a>/);
- while(arr=reg.exec(txt)) {
- var s = getText(home + arr[1]); //歌词内容
- s = s.split('("#loading").css("display","none");')[1];
- if(!s) return 0; //某些特定诗词网址打不开,遇到这种情况继续
- s = s.split('<script type="text/javascript">')[0];
- //中文及标点字符后面加 <br />
- s = s.replace(/([\u4E00-\u9FFF。?!,、:”)》】… —])(?:\r?\n|<\/p>)/g, '$1<br />');
- //删除所有标签,保留换行标签,删除空格和html转义空格
- s = s.replace(/<(?!\/?br)[^>]*>| |[ ]+/g, '');
- s = s.replace(/<\/?br>|<br\/>/g, '<br />'); //<br>、</br>、<br/> 替换为 <br />
- s = s.replace(/\r?\n/g, '');
- s = s.replace(/^(?:<br \/>)+/, ''); //删除开头多余的标签
- s = s.replace(/(<br \/>)+$/, '$1'); //删除结尾多余的标签
- s = s.replace(/·/g, '·').replace(/“/g, '“').replace(/”/g, '”');
- s = s.replace(/—/g, '—').replace(/"/g, '"');
- s = s.replace(/(?:<br \/>)+/, '\t'); //第一次连续多个换行标签替换为 Tab
- s = s.replace(/(<br \/>)+/, '$1'); //第二次连续多个换行标签替换为一个
- var s1 = arr[2].replace(/[ \r\n]+/g, '').replace(/.*>/, ''); //标题+人气指数
- writeToFile(s1 + '\t' + s);
- }
- }
- }
-
- //写入文本
- function writeToFile(str) {
- var objFile = fso.OpenTextFile(key + '.Log', 8, true);
- objFile.WriteLine(str);
- objFile.Close();
- }
-
- WSH.Echo('Done')
复制代码
|