根据豆瓣API获取豆瓣电影数据并存入数据库

网友投稿 1484 2022-10-10

根据豆瓣API获取豆瓣电影数据并存入数据库

根据豆瓣API获取豆瓣电影数据并存入数据库

# encoding: utf-8"""@author: lanxiaofang@contact: fang@lanxf-@software: PyCharm@file: doubanapi.py@time: 2020/4/11 11:34"""import datetimeimport requestsfrom time import sleepimport pymysqlimport randomheader = { 'Accept': 'application / json, text / plain, * / *', 'Accept - Encoding': 'gzip, deflate, br', "Accept - Language": "zh - CN, zh;q=0.9", 'Connection': "keep - alive", 'Referer': ' 'User-Agent': "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; de) Opera 11.01", }conn = pymysql.connect(host='127.0.0.1', user='dbmovie', password='dbmovie', port=3306, db='Dbmovie', charset='utf8')cur = conn.cursor()# start in range(1300000, 27150000, 1): url = ' try: res = requests.get(url, headers=header).json() lr = len(res) print(start, lr, res) if lr < 5: cur.execute("INSERT INTO db_invalidid (invalid_id) VALUES ({});".format(start)) print("无效的id ", start) else: rating = res['rating']['average'] # 平均评分(四舍五入的平均法9.66) 9.7 print(rating) original_title = res['original_title'] # 初始电影名 The Shawshank Redemption movie_image = res['images']['small'] # 电影海报url(small,large,medium are the same) year = ','.join(res['year']) # 播出年份 1994年 # alt = res['alt'] # 电影链接 movie_id = res['id'] # 电影id 1292052 title = res['title'] # 电影名 肖申克的救赎 print(title) languages_list = ','.join(res['languages']) # 电影语言 英语 writers_list = res['writers'] # 编剧 writers_list_len = len(writers_list) # 编剧的人数 writer_id_list = [] # 这部电影中编剧的id列表 for wi in range(writers_list_len): writer_avatars = writers_list[wi]['avatars']['small'] # 编剧照片(small,large,medium are the same ) writer_name_en = writers_list[wi]['name_en'] # 编剧英文名 Frank Darabont writer_name = writers_list[wi]['name'] # 编剧名字 弗兰克·德拉邦特 writer_alt = writers_list[wi]['alt'] # 编剧在豆瓣中的个人主页url writer_id = int(writers_list[wi]['id']) # 编剧在豆瓣中个人id 1047973 writer_id_list.append(writer_id) # 会创建一个编剧的表 我现在在考虑编剧的表和导演、演员是否放在一个表中,然后角色再用一个外键联合, 在电影表中只放这个编剧id的list列表 insert_sql = "INSERT INTO db_writers (writer_id, writer_name_en, writer_name, writer_avatars, writer_alt) VALUES ({},'{}','{}','{}','{}');".format(writer_id, writer_name_en, writer_name, writer_avatars, writer_alt) # print(insert_sql) cur.execute(insert_sql) pubdates_list = ','.join(res['pubdates']) # 首播时间 ['1994-09-10(多伦多电影节)', '1994-10-14(美国)'] tags_list = ','.join(res['tags']) # 电影标签 ['经典', '励志', '信念', '自由', '人性', '人生', '美国', '希望', '剧情', '成长'] durations_list = ','.join(res['durations']) # 电影时长 ['142分钟'] genres_list = ','.join(res['genres']) # 电影类型 ['犯罪', '剧情'] casts_list = res['casts'] # 电影主演 casts_list_len = len(casts_list) # 电影主演人数 cast_id_list = [] for ci in range(casts_list_len): cast_avatars = casts_list[ci]['avatars']['small'] # 主演照片(small,large,medium are the same ) cast_name_en = casts_list[ci]['name_en'] # 主演英文名 Tim Robbins cast_name = casts_list[ci]['name'] # 主演名字 蒂姆·罗宾斯 cast_alt = casts_list[ci]['alt'] # 主演在豆瓣中的个人主页url cast_id = casts_list[ci]['id'] # 主演在豆瓣中个人id 1054521 cast_id_list.append(int(cast_id)) # 这部电影主演id列表 insert_sql = "INSERT INTO db_casts (cast_id, cast_name_en, cast_name, cast_avatars, cast_alt) VALUES ({},'{}','{}','{}','{}');".format(cast_id, cast_name_en, cast_name, cast_avatars, cast_alt) # print(insert_sql) cur.execute(insert_sql) countries_list = ','.join(res['countries']) # 制片国家地区 ['美国'] directors_list = res['directors'] # 导演 directors_list_len = len(directors_list) # 这部电影导演的人数 director_id_list = [] for di in range(directors_list_len): director_avatars = directors_list[di]['avatars']['small'] # 导演照片(small,large,medium are the same ) director_name_en = directors_list[di]['name_en'] # 导演英文名 Frank Darabont director_name = directors_list[di]['name'] # 导演名字 弗兰克·德拉邦特 director_alt = directors_list[di]['alt'] # 导演在豆瓣中的个人主页url director_id = directors_list[di]['id'] # 导演在豆瓣中个人id 1047973 director_id_list.append(int(director_id)) # 这部电影导演id列表 insert_sql = "INSERT INTO db_directors (director_id, director_name_en, director_name, director_avatars, director_alt) VALUES ({},'{}','{}','{}','{}');".format(director_id, director_name_en, director_name, director_avatars, director_alt) # print(insert_sql) cur.execute(insert_sql) ratings_count = res['ratings_count'] # 参与评分的人数(此参数文档中无说明,根据词意翻译的) aka_list = ','.join(res['aka']) # 电影别名 ['月黑高飞(港)', '刺激1995(台)', '地狱诺言', '铁窗岁月', '消香克的救赎'] insert_sql = "INSERT INTO db_movie (movie_id, rating, ratings_count, title, original_title, aka_list, durations_list, year, pubdates_list, movie_image, languages_list, countries_list, writers_list_len, writer_id_list, casts_list_len, cast_id_list, directors_list_len, director_id_list, tags_list, genres_list) VALUES ({},{},{},'{}','{}','{}','{}','{}','{}','{}','{}','{}','{}','{}','{}','{}','{}','{}','{}','{}');".format(movie_id, rating, ratings_count, title, original_title, aka_list, durations_list, year, pubdates_list, movie_image, languages_list, countries_list, writers_list_len, writer_id_list, casts_list_len, cast_id_list, directors_list_len, director_id_list, tags_list, genres_list) print(insert_sql) cur.execute(insert_sql) except Exception as ex: print(ex) conn.commit() print(datetime.datetime.now()) # sleep(random.uniform(0.5, 1))cur.close()conn.close()

版权声明:本文内容由网络用户投稿,版权归原作者所有,本站不拥有其著作权,亦不承担相应法律责任。如果您发现本站中有涉嫌抄袭或描述失实的内容,请联系我们jiasou666@gmail.com 处理,核实后本网站将在24小时内删除侵权内容。

上一篇:Github 微信小程序开发资源大汇总(github是干什么的)
下一篇:微信小程序 电商后台管理系统(微信小程序开发一个多少钱)
相关文章

 发表评论

暂时没有评论,来抢沙发吧~