React 前端框架助力企业快速适应数字化转型的挑战与机遇
640
2022-11-12
Python | 《国王排名》漫画爬取并合成pdf文件
ლ(′◉❥◉`ლ)
这里仅对下面两篇随笔做个合并,就是每爬取完一章的漫画图片,就立刻生成一个pdf文件。
Python 爬取《国王排名》漫画Python | 图片转pdf
from reportlab.lib.pagesizes import portraitfrom reportlab.pdfgen import canvasfrom PIL import Imagefrom pyquery import PyQueryimport requestsimport execjsimport globimport reimport osheaders = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36',}url = 'getOne(url): """获取漫画章节""" url_One = [] html = PyQuery(requests.get(url, headers=headers).content.decode('utf-8')) id_url = html("#detail-list-select-1 li a") for i in id_url.items(): url_One.append([i.text(), '+ i.attr("href")]) url_One.reverse() return url_Onedef getTwo(chapters_url): """解析漫画""" pathOne = os.getcwd() + r'\国王排名' if not os.path.exists(pathOne): os.mkdir(pathOne) # 获取漫画的必要参数 for chapter_name, chapter_url in chapters_url: print(f"开始- >> {chapter_name} << ") pathTwo = pathOne + '\\' + chapter_name if not os.path.exists(pathTwo): os.mkdir(pathTwo) response = requests.get(chapter_url) print(chapter_url) text = response.text cid = re.findall('var DM5_CID=(.*?);', text)[0].strip() mid = re.findall('var DM5_MID=(.*?);', text)[0].strip() dt = re.findall('var DM5_VIEWSIGN_DT="(.*?)";', text)[0].strip() sign = re.findall('var DM5_VIEWSIGN="(.*?)";', text)[0].strip() page_count = int(re.findall('var DM5_IMAGE_COUNT=(.*?);', text)[0].strip()) # print(cid, mid, dt, sign, page_count) page = 1 while page <= page_count: js_api = f'{chapter_url}chapterfun.ashx?cid={cid}&page={page}&key=&language=1>k=6&_cid={cid}&_mid={mid}&_dt={dt}&_sign={sign}' ret = requests.get(js_api, headers={'referer': ' js_code = ret.text image_url = execjs.eval(js_code) img_url = image_url[0] try: with open(f'{pathTwo}\\{page}.jpg', 'wb') as f: f.write(requests.get(img_url).content) print(f"- {chapter_name} {page}.jpg......") except Exception as e: print(f'{chapter_name} {page}-失败:{e}') page += 1 jpg_path = glob.glob(f"{pathTwo}\*.jpg") # jpg_path = os.listdir(path+"\\"+i) # jpg_path.sort(key=lambda x: int(x.split('.')[0])) jpg_path.sort(key=lambda x: int(os.path.basename(x).split('.')[0])) pdf_path = f'国王排名 {os.path.split(pathTwo)[1]}.pdf' jpg_to_pdf(jpg_path, pdf_path)def jpg_to_pdf(jpgs, path): """生成PDF文件""" w, h = Image.open(jpgs[0]).size ca = canvas.Canvas(path, pagesize=portrait((w, h))) for jpg in jpgs: ca.drawImage(jpg, 0, 0, w, h) ca.showPage() ca.save() print(path+' >> 已保存至pdf')def main(): urls_one = getOne(url) getTwo(urls_one)if __name__ == '__main__': main()
箴言:因为这些东西是非常简单的。不要抱怨自己学不会,那是因为你没有足够用心。
版权声明:本文内容由网络用户投稿,版权归原作者所有,本站不拥有其著作权,亦不承担相应法律责任。如果您发现本站中有涉嫌抄袭或描述失实的内容,请联系我们jiasou666@gmail.com 处理,核实后本网站将在24小时内删除侵权内容。
发表评论
暂时没有评论,来抢沙发吧~