python 爬虫 《一》get请求 post请求 +伪装

网友投稿 1046 2022-09-02

python 爬虫 《一》get请求 post请求 +伪装

python 爬虫 《一》get请求 post请求 +伪装

#get请求:#第一个方法import urllibimport urllib2def getUrllibFun(url): headers = {'User-agent': 'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:22.0) Gecko/20100101 Firefox/22.0'} req = urllib2.Request(url, headers = headers) res_data = urllib2.urlopen(req,timeout=20) res = res_data.read() res_data.close() return resif __name__ == "__main__": url = " restdata = getUrllibFun(url) print restdata#第二个方法import getUrllibFun(urlf,port): headers = {'User-agent': 'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:22.0) Gecko/20100101 Firefox/22.0'} conn = conn.request(method="GET",url=urlf,headers = headers) res = conn.getresponse() resdata= res.read() res.close() conn.close() return resdataif __name__ == "__main__": url = restdata = getUrllibFun(url") print restdata#post请求:#第一个方法import urllibimport urllib2def postHttpFun(requrl,datapamse): data_urlencode = urllib.urlencode(datapamse) headers = {'User-agent': 'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:22.0) Gecko/20100101 Firefox/22.0'} req = urllib2.Request(url = requrl,data =data_urlencode,headers = headers) res_data = urllib2.urlopen(req,timeout=20) res = res_data.read() res_data.close() return resif __name__=="__main__": requrl = test_data = {'qwe': 'qqq', 'qqqqw': 'qwew'} resdata = postHttpFun(requrl,test_data) print resdata#伪装ipimport randomimport urllib2def download_html(url): ip = ['121.31.159.197', '175.30.238.78', '124.202.247.110'] header = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.87 Safari/537.36', 'X-Forwarded-For': ip[random.randint(0, 2)]} request = urllib2.Request(url, None, header) response = urllib2.urlopen(request) return response.read()if __name__ == "__main__": url = " html = download_html(url)##代理服务器请求#encoding=utf8import urllib2import BeautifulSoupimport random##传入访问路径和代理服务器地址def dlgetFun(url,dlPort): header = { "Connection": "keep-alive", "Upgrade-Insecure-Requests": "1", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36", "Accept": " text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", "Accept-Encoding": "gzip,deflate", "Accept-Language": "zh-CN,zh;q=0.8" } proxy={" proxy_support = urllib2.ProxyHandler(proxy) # 注册代理 opener = urllib2.build_opener(proxy_support) urllib2.install_opener(opener) req = urllib2.Request(url, headers=header) response = urllib2.urlopen(req, None,5) soup = BeautifulSoup.BeautifulSoup(response) return soupif __name__=="__main__": ##爬去网页地址 urlHttp = " ##代理服务器ip端口 dlport = "111.114.613.191:9000" resdata = dlgetFun(urlHttp,dlport) print resdata

版权声明:本文内容由网络用户投稿,版权归原作者所有,本站不拥有其著作权,亦不承担相应法律责任。如果您发现本站中有涉嫌抄袭或描述失实的内容,请联系我们jiasou666@gmail.com 处理,核实后本网站将在24小时内删除侵权内容。

上一篇:五分钟看完 Linux 重点知识,建议收藏!(雷雨五分钟看完)
下一篇:Linux不重启识别新挂载磁盘
相关文章

 发表评论

暂时没有评论,来抢沙发吧~