大家好,欢迎来到IT知识分享网。
荔枝直播(分享页):
分享页主页直播列表:(抓包分析)
- 接口:https://appweb.lizhi.fm/smallApp/getLiveList?pageNum=1
- 支持获取n页数据,注意请求间隔;
- get请求,获取分享页必须的 liveId 字段
直播分享页:
- html:https://appweb.lizhi.fm/live/share?liveId=5190925580233002038&njId=2552360964061657132&duserId=138542e7ea551a918c42396e0488695b&from=iosBrowser
- 中间两个参数非必须
- get请求,获取 uid 及 liveUrl 字段
- 该页面请求时需设置移动端User-Agent
- ‘User-Agent’:‘Mozilla/5.0 (iPhone; CPU iPhone OS 14_5_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148’
编码实现:
import requests import time import json import re class LiveList(object): def __init__(self, pageLimit=10): self.liveList = [] self.pageLimit = pageLimit def getLiveListPage(self, pageNum=1): url = 'https://appweb.lizhi.fm/smallApp/getLiveList?pageNum=' + str(pageNum) print(url) time.sleep(1) response = requests.get(url) response.encoding = 'utf-8' infos = {
} try: infos = json.loads(response.text) except Exception as e: print(e) infos = {
} infos['ret'] = {
} infos['ret']['dataList'] = [] return (len(infos['ret']['dataList']), infos['ret']['dataList']) def getLiveListPages(self): cur_page = 1 cur_len = 10 while cur_len > 0: if cur_page > self.pageLimit: break page_len, page_liveList = self.getLiveListPage(cur_page) cur_len = page_len self.liveList.extend(page_liveList) cur_page = cur_page + 1 return self.liveList def parseRegular(param="liveUrl"): liveUrl_r = param + r' = \"([^;]*)\";' liveUrl_b = re.compile(liveUrl_r , re.DOTALL) return liveUrl_b def parseShareURL(liveId): url1 = "https://appweb.lizhi.fm/live/share?liveId=" + liveId + "&from=iosBrowser" headers = {
'User-Agent':'Mozilla/5.0 (iPhone; CPU iPhone OS 14_5_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148' } response_1=requests.get(url1, headers=headers) time.sleep(1) response_1.encoding='utf-8' liveUrl = parseRegular('liveUrl').findall(response_1.text) userId = parseRegular('userId').findall(response_1.text) if len(liveUrl) > 0: return {
"liveUrl":liveUrl[0],"userId":userId[0]} else: return None if __name__ == '__main__': # 建议:每隔1h进行一次刷新 # 获取1页数据 liveListObj = LiveList(1) liveList = liveListObj.getLiveListPages() # 打印5页数据的直播记录数 print('liveList count = ',len(liveList)) # 最终结果保存数组 final_res = [] # 遍历直播记录 for liveItem in liveList: print(liveItem['liveId']) # 解析当前直播分享页 userInfo = parseShareURL(liveItem['liveId']) if userInfo is None: print("直播已结束!") else: print('直播中...') final_item = {
liveItem, userInfo} final_res.append(final_item) # 打印最终结果 print(len(final_res),final_res) # 结果存入本地json with open('liveList.json','w') as file_obj: json.dump(final_res,file_obj) print('over!!!')
Print:
https://appweb.lizhi.fm/smallApp/getLiveList?pageNum=1 liveList count = 10 直播中... 直播中... 直播中... 直播中... 直播中... 直播中... 直播中... 直播中... 直播中... 直播中... 10 [{
'radioConver': 'http://cdnimg103.lizhi.fm/studio/2020/09/21/.jpg', 'liveStatus': 1, 'totalCount': '25309', 'liveName': '大凯故事会', 'userName': '大凯说', 'liveId': '', 'userPortrait': 'http://cdnimg103.lizhi.fm/user/2017/05/22/_80x80.jpg', 'liveStartTime': 00, 'liveEndTime': 00, 'tag': '', 'liveUrl': 'http://pull102.gzlz307.com/home/6d944f6ab72b3d0a23c39/playlist.m3u8?only-audio=1', 'userId': ''}, {
'radioConver': 'http://cdnimg103.lizhi.fm//studio/2021/07/14/.jpg', 'liveStatus': 1, 'totalCount': '3154', 'liveName': '日常的午后尬聊', 'userName': '搞事儿ღ 养声糖', 'liveId': '', 'userPortrait': 'http://cdnimg103.lizhi.fm/user/2020/07/26/_80x80.jpg', 'liveStartTime': 00, 'liveEndTime': 00, 'tag': '脱口秀', 'liveUrl': 'http://pull102.gzlz307.com/home/94aa9abda928e2418e5a76/playlist.m3u8?only-audio=1', 'userId': ''}, {
'radioConver': 'http://cdnimg103.lizhi.fm/studio/2020/10/19/.jpg', 'liveStatus': 1, 'totalCount': '10128', 'liveName': '性感男神在线直播', 'userName': '王帅帅☀幸好有你', 'liveId': '', 'userPortrait': 'http://cdnimg103.lizhi.fm/user/2021/04/25/_80x80.jpg', 'liveStartTime': 00, 'liveEndTime': 00, 'tag': '', 'liveUrl': 'http://pull102.gzlz307.com/home/aef22384abe68f32729d855ee12b99bc/playlist.m3u8?only-audio=1', 'userId': ''}, {
'radioConver': 'http://cdnimg103.lizhi.fm/studio/2021/07/05/.jpg', 'liveStatus': 1, 'totalCount': '11968', 'liveName': '百变老舅正在直播', 'userName': '老舅⁹ 招主播', 'liveId': '', 'userPortrait': 'http://cdnimg103.lizhi.fm/user/2021/07/05/_80x80.jpg', 'liveStartTime': 00, 'liveEndTime': 00, 'tag': '脱口秀', 'liveUrl': 'http://pull102.gzlz307.com/home/2b5aa92a75c9a6b76fafebd6e38d218d/playlist.m3u8?only-audio=1', 'userId': ''}, {
'radioConver': 'http://cdnimg103.lizhi.fm/studio/2021/01/04/00.jpg', 'liveStatus': 1, 'totalCount': '2794', 'liveName': '夺宝、塔罗牌好运直播间❤️', 'userName': '墨子轩🍄招主播', 'liveId': '', 'userPortrait': 'http://cdnimg103.lizhi.fm/user/2020/12/02/_80x80.jpg', 'liveStartTime': 00, 'liveEndTime': 00, 'tag': '古风', 'liveUrl': 'http://pull102.gzlz307.com/home/02e57ae2959d3a3104caf/playlist.m3u8?only-audio=1', 'userId': ''}, {
'radioConver': 'http://cdnimg103.lizhi.fm/studio/2021/03/16/.jpg', 'liveStatus': 1, 'totalCount': '1279', 'liveName': '今天也是圆气满满的一天鸭', 'userName': '٩🥳۶圆气满满鸭~🥀', 'liveId': '', 'userPortrait': 'http://cdnimg103.lizhi.fm/user/2021/05/25/_80x80.jpg', 'liveStartTime': 00, 'liveEndTime': 00, 'tag': '', 'liveUrl': 'http://pull102.gzlz307.com/home/6c9eec0f07b9d9c31ba9a63a090e42ad/playlist.m3u8?only-audio=1', 'userId': ''}, {
'radioConver': 'http://cdnimg103.lizhi.fm/studio/2021/06/18/.jpg', 'liveStatus': 1, 'totalCount': '', 'liveName': '恋行男友', 'userName': '恋行-高福利招人💝', 'liveId': '', 'userPortrait': 'http://cdnimg103.lizhi.fm/user/2021/05/21/0_80x80.jpg', 'liveStartTime': 00, 'liveEndTime': 00, 'tag': '男友', 'liveUrl': 'http://pull102.gzlz307.com/home/3ceb87599a87bc1e7152ad4419faf21b/playlist.m3u8?only-audio=1', 'userId': ''}, {
'radioConver': 'http://cdnimg103.lizhi.fm//studio/2021/07/07/0.jpg', 'liveStatus': 1, 'totalCount': '1414', 'liveName': '甜妹求带飞上星星✨', 'userName': 'dy.奶糖_Jenny', 'liveId': '', 'userPortrait': 'http://cdnimg103.lizhi.fm/user/2021/07/07/_80x80.jpg', 'liveStartTime': 00, 'liveEndTime': 00, 'tag': '情感', 'liveUrl': 'http://pull102.gzlz307.com/home/1c158cec050b10c5545fef19be825eed/playlist.m3u8?only-audio=1', 'userId': ''}, {
'radioConver': 'http://cdnimg103.lizhi.fm/studio/2021/07/20/.jpg', 'liveStatus': 1, 'totalCount': '24076', 'liveName': '♬.星辰音乐电台 --温暖治愈', 'userName': '星辰✨冠名LuLu👑', 'liveId': '', 'userPortrait': 'http://cdnimg103.lizhi.fm/user/2020/09/14/_80x80.jpg', 'liveStartTime': 00, 'liveEndTime': 00, 'tag': '连线', 'liveUrl': 'http://pull102.gzlz307.com/home/160aaf1435dd8720a197c69e/playlist.m3u8?only-audio=1', 'userId': ''}, {
'radioConver': 'http://cdnimg103.lizhi.fm/studio/2021/05/24/0.jpg', 'liveStatus': 1, 'totalCount': '237', 'liveName': '🍷别拿豆包 不当干粮🍷', 'userName': 'DJ安哥🍷天籁', 'liveId': '', 'userPortrait': 'http://cdnimg103.lizhi.fm/user/2020/08/31/_80x80.jpg', 'liveStartTime': 00, 'liveEndTime': 00, 'tag': '情感', 'liveUrl': 'http://pull102.gzlz307.com/home/c45cbdb813f2a0521b3a4d077033b373/playlist.m3u8?only-audio=1', 'userId': '0'}] over!!!
得到记录信息:
{
'radioConver': 'http://cdnimg103.lizhi.fm/studio/2021/05/24/0.jpg', 'liveStatus': 1, 'totalCount': '237', 'liveName': '🍷别拿豆包 不当干粮🍷', 'userName': 'DJ安哥🍷天籁', 'liveId': '', 'userPortrait': 'http://cdnimg103.lizhi.fm/user/2020/08/31/_80x80.jpg', 'liveStartTime': 00, 'liveEndTime': 00, 'tag': '情感', 'liveUrl': 'http://pull102.gzlz307.com/home/c45cbdb813f2a0521b3a4d077033b373/playlist.m3u8?only-audio=1', 'userId': '0'}
多进程的优化
import concurrent.futures group_max_workers = 5 def evaluate_item(tests): return tests item_x_list = [] with concurrent.futures.ProcessPoolExecutor( max_workers=group_max_workers ) as executor: results = executor.map(evaluate_item, [[1,2,3,4,5],[2,2,3,4,5],[3,2,3,4,5],[4,2,3,4,5],[5,2,3,4,5],[6,2,3,4,5],[7,2,3,4,5],[8,2,3,4,5],[9,2,3,4,5],[10,2,3,4,5]]) for result in results: item_x_list.extend(result) print(item_x_list) # [1, 2, 3, 4, 5, 2, 2, 3, 4, 5, 3, 2, 3, 4, 5, 4, 2, 3, 4, 5, 5, 2, 3, 4, 5, 6, 2, 3, 4, 5, 7, 2, 3, 4, 5, 8, 2, 3, 4, 5, 9, 2, 3, 4, 5, 10, 2, 3, 4, 5]
import requests import time import json import re import concurrent.futures class LiveList(object): def __init__(self, pageLimit=10): self.liveList = [] self.pageLimit = pageLimit def getLiveListPage(self, pageNum=1): url = 'https://appweb.lizhi.fm/smallApp/getLiveList?pageNum=' + str(pageNum) print(url) time.sleep(1) response = requests.get(url) response.encoding = 'utf-8' infos = {
} try: infos = json.loads(response.text) except Exception as e: print(e) infos = {
} infos['ret'] = {
} infos['ret']['dataList'] = [] return (len(infos['ret']['dataList']), infos['ret']['dataList']) def getLiveListPages(self): cur_page = 1 cur_len = 10 while cur_len > 0: if cur_page > self.pageLimit: break page_len, page_liveList = self.getLiveListPage(cur_page) cur_len = page_len self.liveList.extend(page_liveList) cur_page = cur_page + 1 return self.liveList def parseRegular(param="liveUrl"): liveUrl_r = param + r' = \"([^;]*)\";' liveUrl_b = re.compile(liveUrl_r , re.DOTALL) return liveUrl_b def parseShareURL(liveId): url1 = "https://appweb.lizhi.fm/live/share?liveId=" + liveId + "&from=iosBrowser" headers = {
'User-Agent':'Mozilla/5.0 (iPhone; CPU iPhone OS 14_5_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148' } response_1=requests.get(url1, headers=headers) # time.sleep(0.5) response_1.encoding='utf-8' liveUrl = parseRegular('liveUrl').findall(response_1.text) userId = parseRegular('userId').findall(response_1.text) if len(liveUrl) > 0: return {
"liveUrl":liveUrl[0],"userId":userId[0]} else: return None def dealLiveItemForUserInfo(liveItem): userInfo = parseShareURL(liveItem['liveId']) if userInfo is None: print("直播已结束!") return None else: print('直播中...') final_item = {
liveItem, userInfo} return final_item def dealLiveListAsFinalResWithWokers(liveList, max_works=5): # 默认开启5个进程 并行处理 当前直播分享页 的解析 # 最终结果保存数组 final_res = [] with concurrent.futures.ProcessPoolExecutor( max_workers=max_works ) as executor: results = executor.map(dealLiveItemForUserInfo,liveList) for result in results: if result is not None: final_res.append(result) return final_res def saveFinaleResJsonToLocalPath(final_res, l_path='liveList.json'): with open('liveList.json','w') as file_obj: json.dump(final_res,file_obj) if __name__ == '__main__': pageLimit = 1 liveListObj = LiveList(pageLimit) liveList = liveListObj.getLiveListPages() final_res = dealLiveListAsFinalResWithWokers(liveList, max_works=5) saveFinaleResJsonToLocalPath(final_res,'liveList.json') print('共请求 ' + str(pageLimit) + '页数据\n', '共获取 ' + str(len(liveList)) + '条主播记录\n', '共保存 ' + str(len(final_res)) + '条有效记录\n', 'over!!!')
Log:
https://appweb.lizhi.fm/smallApp/getLiveList?pageNum=1 直播中... 直播中... 直播中... 直播中... 直播中... 直播中... 直播中... 直播中... 直播中... 直播中... 共请求 1页数据 共获取 10条主播记录 共保存 10条有效记录 over!!!
免责声明:本站所有文章内容,图片,视频等均是来源于用户投稿和互联网及文摘转载整编而成,不代表本站观点,不承担相关法律责任。其著作权各归其原作者或其出版社所有。如发现本站有涉嫌抄袭侵权/违法违规的内容,侵犯到您的权益,请在线联系站长,一经查实,本站将立刻删除。 本文来自网络,若有侵权,请联系删除,如若转载,请注明出处:https://haidsoft.com/156233.html