1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
| import requests import jsonpath import csv # 1、发起请求,获取相应内容模块 def get_response(url): headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.3' } try: response = requests.get(url, headers=headers) if response.status_code == 200: response.encoding = 'utf-8' return response return None except Exception as e: print(e) return None
# 2、解析相应,提取相关数据模块 def parse_response(response): response_json = response.json() uid = jsonpath.jsonpath(response_json, '$.data.items[*].user.uid') # 用户id列表 name = jsonpath.jsonpath(response_json, '$.data.items[*].user.name') # 用户名列表 is_vip = jsonpath.jsonpath(response_json, '$.data.items[*].user.is_vip') # 是否vip用户列表 item_id = jsonpath.jsonpath(response_json, '$.data.items[*].item.id') # 视频id列表 description = jsonpath.jsonpath(response_json, '$.data.items[*].item.description') # 视频描述列表 upload_time = jsonpath.jsonpath(response_json, '$.data.items[*].item.upload_time') # 上传时间列表 watched_num = jsonpath.jsonpath(response_json, '$.data.items[*].item.watched_num') # 浏览量列表 video_playurl = jsonpath.jsonpath(response_json, '$.data.items[*].item.video_playurl') # 视频url列表
return zip(uid, name, is_vip, item_id, upload_time, watched_num, video_playurl)
# 业务逻辑 def main(): # 一个ajax的url url = 'http://api.vc.bilibili.com/board/v1/ranking/top?page_size=10&next_offset=&tag=%E4%BB%8A%E6%97%A5%E7%83%AD%E9%97%A8&platform=pc' response = get_response(url) # 调用get_response data_list = parse_response(response) # print(type(data_list), data_list) # <class 'zip'> <zip object at 0x031C0260> # for i in data_list: # print(i) # 元组
# 3、准备本地持久化到csv文件。当然也可以入库,读者可以自行编写。 # 当然这里也可以封装成一个函数模块,供调用,直接提供文件名即可,读者自行优化。 # 打开文件,追加a out = open('bilibili111.csv', 'a', newline='') for u, n, i, item, uptime, wat, play in data_list: # print(str(u) +',' + n +',' + str(i) +',' + str(item) +',' + uptime +',' + str(wat) +',' + play) # with open('bilibili.csv', 'a', newline='') as fp: # fp.write(str(u) +',' + n +',' + str(i) +',' + str(item) +',' + uptime +',' + str(wat) +',' + play) # fp.write('\n') # 重组数据为列表 data_list = [] data_list.append(u) data_list.append(n) data_list.append(i) data_list.append(item) data_list.append(uptime) data_list.append(wat) data_list.append(play) print(data_list)
# 设定写入模式 csv_write = csv.writer(out, dialect='excel') # 写入具体内容 csv_write.writerow(data_list) out.close()
if __name__ == '__main__': main()
|