批处理之家's Archiver

ivor 发表于 2016-2-21 22:35

python采集搜索引擎关键字

python做爬虫的人真多,我就练练手[code]# Python 3.5.1
# coding:utf-8
# 采集搜索引擎关键字
import urllib.request, re

text = ["北京", "上海", "青岛"]
for choice in text:
        keywards = urllib.request.quote(choice)
        url = "http://sug.so.360.cn/suggest?callback=suggest_so&encodein=utf-8&encodeout=utf-8&format=json&fields=word,obdata&word=" + keywards
        headers = {
                "GET":url,
                "Host":"sug.so.360.cn",
                "Referer":"http://www.so.com/",
                "User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36"
}
        req = urllib.request.Request(url)
        for header in headers:
                req.add_header(header,headers[header])
        html = urllib.request.urlopen(req).read()
        html_decode=html.decode("utf-8")
        result = re.findall("\"([\u4e00-\u9fa5].*?)\"",html_decode)
        for item in result:
                print(item)

input("Press Enter key to continue……")[/code]

页: [1]

Powered by Discuz! Archiver 7.2  © 2001-2009 Comsenz Inc.