偷乐短剧.py 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. # 偷乐短剧爬虫
  4. import sys
  5. import json
  6. import re
  7. import time
  8. import urllib.parse
  9. import requests
  10. from bs4 import BeautifulSoup
  11. # 导入基础类
  12. sys.path.append('../../')
  13. try:
  14. from base.spider import Spider
  15. except ImportError:
  16. # 本地调试时的替代实现
  17. class Spider:
  18. def init(self, extend=""):
  19. pass
  20. class Spider(Spider):
  21. def __init__(self):
  22. # 网站主URL
  23. self.siteUrl = "https://www.toule.top"
  24. # 根据网站实际结构,分类链接格式为: /index.php/vod/show/class/分类名/id/1.html
  25. # 分类ID映射 - 从网站中提取的分类
  26. self.cateManual = {
  27. "男频": "/index.php/vod/show/class/%E7%94%B7%E9%A2%91/id/1.html",
  28. "女频": "/index.php/vod/show/class/%E5%A5%B3%E9%A2%91/id/1.html",
  29. "都市": "/index.php/vod/show/class/%E9%83%BD%E5%B8%82/id/1.html",
  30. "赘婿": "/index.php/vod/show/class/%E8%B5%98%E5%A9%BF/id/1.html",
  31. "战神": "/index.php/vod/show/class/%E6%88%98%E7%A5%9E/id/1.html",
  32. "古代言情": "/index.php/vod/show/class/%E5%8F%A4%E4%BB%A3%E8%A8%80%E6%83%85/id/1.html",
  33. "现代言情": "/index.php/vod/show/class/%E7%8E%B0%E4%BB%A3%E8%A8%80%E6%83%85/id/1.html",
  34. "历史": "/index.php/vod/show/class/%E5%8E%86%E5%8F%B2/id/1.html",
  35. "玄幻": "/index.php/vod/show/class/%E7%8E%84%E5%B9%BB/id/1.html",
  36. "搞笑": "/index.php/vod/show/class/%E6%90%9E%E7%AC%91/id/1.html",
  37. "甜宠": "/index.php/vod/show/class/%E7%94%9C%E5%AE%A0/id/1.html",
  38. "励志": "/index.php/vod/show/class/%E5%8A%B1%E5%BF%97/id/1.html",
  39. "逆袭": "/index.php/vod/show/class/%E9%80%86%E8%A2%AD/id/1.html",
  40. "穿越": "/index.php/vod/show/class/%E7%A9%BF%E8%B6%8A/id/1.html",
  41. "古装": "/index.php/vod/show/class/%E5%8F%A4%E8%A3%85/id/1.html"
  42. }
  43. # 请求头
  44. self.headers = {
  45. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
  46. "Referer": "https://www.toule.top/",
  47. "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
  48. "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
  49. "Accept-Encoding": "gzip, deflate, br",
  50. "Connection": "keep-alive",
  51. }
  52. # 缓存
  53. self.cache = {}
  54. self.cache_timeout = {}
  55. def getName(self):
  56. return "偷乐短剧"
  57. def init(self, extend=""):
  58. # 初始化方法,可以留空
  59. return
  60. def isVideoFormat(self, url):
  61. """判断是否为视频格式"""
  62. video_formats = ['.mp4', '.m3u8', '.ts', '.flv', '.avi', '.mkv', '.mov', '.rmvb', '.3gp']
  63. for format in video_formats:
  64. if format in url.lower():
  65. return True
  66. return False
  67. def manualVideoCheck(self):
  68. """是否需要手动检查视频"""
  69. return False
  70. # 工具方法 - 网络请求
  71. def fetch(self, url, headers=None, data=None, method="GET"):
  72. """统一的网络请求方法"""
  73. try:
  74. if headers is None:
  75. headers = self.headers.copy()
  76. if method.upper() == "GET":
  77. response = requests.get(url, headers=headers, params=data, timeout=10,verify=False)
  78. else: # POST
  79. response = requests.post(url, headers=headers, data=data, timeout=10,verify=False)
  80. response.raise_for_status()
  81. response.encoding = response.apparent_encoding or 'utf-8'
  82. return response
  83. except Exception as e:
  84. self.log(f"请求失败: {url}, 错误: {str(e)}", "ERROR")
  85. return None
  86. # 缓存方法
  87. def getCache(self, key, timeout=3600):
  88. """获取缓存数据"""
  89. if key in self.cache and key in self.cache_timeout:
  90. if time.time() < self.cache_timeout[key]:
  91. return self.cache[key]
  92. else:
  93. del self.cache[key]
  94. del self.cache_timeout[key]
  95. return None
  96. def setCache(self, key, value, timeout=3600):
  97. """设置缓存数据"""
  98. self.cache[key] = value
  99. self.cache_timeout[key] = time.time() + timeout
  100. # 日志方法
  101. def log(self, msg, level='INFO'):
  102. """记录日志"""
  103. levels = {
  104. 'DEBUG': 0,
  105. 'INFO': 1,
  106. 'WARNING': 2,
  107. 'ERROR': 3
  108. }
  109. current_level = 'INFO' # 可以设置为DEBUG以获取更多信息
  110. if levels.get(level, 4) >= levels.get(current_level, 1):
  111. print(f"[{level}] {time.strftime('%Y-%m-%d %H:%M:%S')} - {msg}")
  112. # 辅助方法 - 从URL中提取视频ID
  113. def extractVodId(self, url):
  114. """从URL中提取视频ID"""
  115. # 路径格式: /index.php/vod/play/id/9024/sid/1/nid/1.html
  116. match = re.search(r'/id/(\d+)/', url)
  117. if match:
  118. return match.group(1)
  119. return ""
  120. # 辅助方法 - 从网页内容中提取分类
  121. def extractCategories(self, text):
  122. """从网页内容中提取分类标签"""
  123. cats = []
  124. # 匹配标签字符串,例如: "男频,逆袭,亲情,短剧"
  125. if "," in text:
  126. parts = text.split(",")
  127. for part in parts:
  128. part = part.strip()
  129. if part and part != "短剧":
  130. cats.append(part)
  131. return cats
  132. # 主要接口实现
  133. def homeContent(self, filter):
  134. """获取首页分类及内容"""
  135. result = {}
  136. classes = []
  137. # 从缓存获取
  138. cache_key = 'home_classes'
  139. cached_classes = self.getCache(cache_key)
  140. if cached_classes:
  141. classes = cached_classes
  142. else:
  143. # 使用预定义的分类
  144. for k, v in self.cateManual.items():
  145. classes.append({
  146. 'type_id': v, # 使用完整URL路径作为type_id
  147. 'type_name': k
  148. })
  149. # 保存到缓存
  150. self.setCache(cache_key, classes, 24*3600) # 缓存24小时
  151. result['class'] = classes
  152. # 获取首页推荐视频
  153. videos = self.homeVideoContent().get('list', [])
  154. result['list'] = videos
  155. return result
  156. def homeVideoContent(self):
  157. """获取首页推荐视频内容"""
  158. result = {'list': []}
  159. videos = []
  160. # 从缓存获取
  161. cache_key = 'home_videos'
  162. cached_videos = self.getCache(cache_key)
  163. if cached_videos:
  164. return {'list': cached_videos}
  165. try:
  166. response = self.fetch(self.siteUrl)
  167. if response and response.status_code == 200:
  168. html = response.text
  169. soup = BeautifulSoup(html, 'html.parser')
  170. # 查找最新更新区域
  171. latest_section = soup.find('h2', text=lambda t: t and '最新更新' in t)
  172. if latest_section:
  173. container = latest_section.parent # 获取容器
  174. if container:
  175. # 查找所有 li.item 元素
  176. items = container.find_all('li', class_='item')
  177. for item in items:
  178. try:
  179. # 获取链接和标题
  180. title_link = item.find('h3')
  181. if not title_link:
  182. continue
  183. title = title_link.text.strip()
  184. # 获取第一个链接作为详情页链接
  185. link_tag = item.find('a')
  186. if not link_tag:
  187. continue
  188. link = link_tag.get('href', '')
  189. if not link.startswith('http'):
  190. link = urllib.parse.urljoin(self.siteUrl, link)
  191. # 提取ID
  192. vid = self.extractVodId(link)
  193. if not vid:
  194. continue
  195. # 获取图片
  196. img_tag = item.find('img')
  197. img_url = ""
  198. if img_tag:
  199. img_url = img_tag.get('src', img_tag.get('data-src', ''))
  200. if img_url and not img_url.startswith('http'):
  201. img_url = urllib.parse.urljoin(self.siteUrl, img_url)
  202. # 获取备注信息
  203. remarks = ""
  204. remarks_tag = item.find('span', class_='remarks')
  205. if remarks_tag:
  206. remarks = remarks_tag.text.strip()
  207. # 获取标签信息
  208. tags = ""
  209. tags_tag = item.find('span', class_='tags')
  210. if tags_tag:
  211. tags = tags_tag.text.strip()
  212. # 合并备注和标签
  213. if remarks and tags:
  214. remarks = f"{remarks} | {tags}"
  215. elif tags:
  216. remarks = tags
  217. # 构建视频项
  218. videos.append({
  219. 'vod_id': vid,
  220. 'vod_name': title,
  221. 'vod_pic': img_url,
  222. 'vod_remarks': remarks
  223. })
  224. except Exception as e:
  225. self.log(f"处理视频项时出错: {str(e)}", "ERROR")
  226. continue
  227. # 保存到缓存
  228. self.setCache(cache_key, videos, 3600) # 缓存1小时
  229. except Exception as e:
  230. self.log(f"获取首页视频内容发生错误: {str(e)}", "ERROR")
  231. result['list'] = videos
  232. return result
  233. def categoryContent(self, tid, pg, filter, extend):
  234. """获取分类内容"""
  235. result = {}
  236. videos = []
  237. # 处理页码
  238. if pg is None:
  239. pg = 1
  240. else:
  241. pg = int(pg)
  242. # 构建分类URL - tid是完整的URL路径
  243. if tid.startswith("/"):
  244. # 替换页码,URL格式可能像: /index.php/vod/show/class/男频/id/1.html
  245. if pg > 1:
  246. if "html" in tid:
  247. category_url = tid.replace(".html", f"/page/{pg}.html")
  248. else:
  249. category_url = f"{tid}/page/{pg}.html"
  250. else:
  251. category_url = tid
  252. full_url = urllib.parse.urljoin(self.siteUrl, category_url)
  253. else:
  254. # 如果tid不是URL路径,可能是旧版分类ID,尝试查找对应URL
  255. category_url = ""
  256. for name, url in self.cateManual.items():
  257. if name == tid:
  258. category_url = url
  259. break
  260. if not category_url:
  261. self.log(f"未找到分类ID对应的URL: {tid}", "ERROR")
  262. result['list'] = []
  263. result['page'] = pg
  264. result['pagecount'] = 1
  265. result['limit'] = 0
  266. result['total'] = 0
  267. return result
  268. # 处理页码
  269. if pg > 1:
  270. if "html" in category_url:
  271. category_url = category_url.replace(".html", f"/page/{pg}.html")
  272. else:
  273. category_url = f"{category_url}/page/{pg}.html"
  274. full_url = urllib.parse.urljoin(self.siteUrl, category_url)
  275. # 请求分类页
  276. try:
  277. response = self.fetch(full_url)
  278. if response and response.status_code == 200:
  279. html = response.text
  280. soup = BeautifulSoup(html, 'html.parser')
  281. # 查找视频项,根据实际HTML结构调整
  282. items = soup.find_all('li', class_='item')
  283. for item in items:
  284. try:
  285. # 获取链接和标题
  286. title_tag = item.find('h3')
  287. if not title_tag:
  288. continue
  289. title = title_tag.text.strip()
  290. # 获取链接
  291. link_tag = item.find('a')
  292. if not link_tag:
  293. continue
  294. link = link_tag.get('href', '')
  295. if not link.startswith('http'):
  296. link = urllib.parse.urljoin(self.siteUrl, link)
  297. # 提取ID
  298. vid = self.extractVodId(link)
  299. if not vid:
  300. continue
  301. # 获取图片
  302. img_tag = item.find('img')
  303. img_url = ""
  304. if img_tag:
  305. img_url = img_tag.get('src', img_tag.get('data-src', ''))
  306. if img_url and not img_url.startswith('http'):
  307. img_url = urllib.parse.urljoin(self.siteUrl, img_url)
  308. # 获取备注信息
  309. remarks = ""
  310. remarks_tag = item.find('span', class_='remarks')
  311. if remarks_tag:
  312. remarks = remarks_tag.text.strip()
  313. # 获取标签信息
  314. tags = ""
  315. tags_tag = item.find('span', class_='tags')
  316. if tags_tag:
  317. tags = tags_tag.text.strip()
  318. # 合并备注和标签
  319. if remarks and tags:
  320. remarks = f"{remarks} | {tags}"
  321. elif tags:
  322. remarks = tags
  323. # 构建视频项
  324. videos.append({
  325. 'vod_id': vid,
  326. 'vod_name': title,
  327. 'vod_pic': img_url,
  328. 'vod_remarks': remarks
  329. })
  330. except Exception as e:
  331. self.log(f"处理分类视频项时出错: {str(e)}", "ERROR")
  332. continue
  333. # 查找分页信息
  334. # 默认值
  335. total = len(videos)
  336. pagecount = 1
  337. limit = 20
  338. # 尝试查找分页元素
  339. pagination = soup.find('ul', class_='page')
  340. if pagination:
  341. # 查找最后一页的链接
  342. last_page_links = pagination.find_all('a')
  343. for link in last_page_links:
  344. page_text = link.text.strip()
  345. if page_text.isdigit():
  346. pagecount = max(pagecount, int(page_text))
  347. except Exception as e:
  348. self.log(f"获取分类内容发生错误: {str(e)}", "ERROR")
  349. result['list'] = videos
  350. result['page'] = pg
  351. result['pagecount'] = pagecount
  352. result['limit'] = limit
  353. result['total'] = total
  354. return result
  355. def detailContent(self, ids):
  356. """获取详情内容"""
  357. result = {}
  358. if not ids or len(ids) == 0:
  359. return result
  360. # 视频ID
  361. vid = ids[0]
  362. # 构建播放页URL
  363. play_url = f"{self.siteUrl}/index.php/vod/play/id/{vid}/sid/1/nid/1.html"
  364. try:
  365. response = self.fetch(play_url)
  366. if not response or response.status_code != 200:
  367. return result
  368. html = response.text
  369. soup = BeautifulSoup(html, 'html.parser')
  370. # 提取视频基本信息
  371. # 标题
  372. title = ""
  373. title_tag = soup.find('h1', class_='items-title')
  374. if title_tag:
  375. title = title_tag.text.strip()
  376. # 图片
  377. pic = ""
  378. pic_tag = soup.find('img', class_='thumb')
  379. if pic_tag:
  380. pic = pic_tag.get('src', '')
  381. if pic and not pic.startswith('http'):
  382. pic = urllib.parse.urljoin(self.siteUrl, pic)
  383. # 简介
  384. desc = ""
  385. desc_tag = soup.find('div', class_='text-content')
  386. if desc_tag:
  387. desc = desc_tag.text.strip()
  388. # 标签/分类
  389. tags = []
  390. tags_container = soup.find('span', class_='items-tags')
  391. if tags_container:
  392. tag_links = tags_container.find_all('a')
  393. for tag in tag_links:
  394. tag_text = tag.text.strip()
  395. if tag_text:
  396. tags.append(tag_text)
  397. # 提取播放列表
  398. play_from = "偷乐短剧"
  399. play_list = []
  400. # 查找播放列表区域
  401. play_area = soup.find('div', class_='swiper-wrapper')
  402. if play_area:
  403. # 查找所有剧集链接
  404. episode_links = play_area.find_all('a')
  405. for ep in episode_links:
  406. ep_title = ep.text.strip()
  407. ep_url = ep.get('href', '')
  408. if ep_url:
  409. # 直接使用URL作为ID
  410. if not ep_url.startswith('http'):
  411. ep_url = urllib.parse.urljoin(self.siteUrl, ep_url)
  412. # 提取集数信息
  413. ep_num = ep_title
  414. if ep_num.isdigit():
  415. ep_num = f"第{ep_num}集"
  416. play_list.append(f"{ep_num}${ep_url}")
  417. # 如果没有找到播放列表,查找播放按钮
  418. if not play_list:
  419. play_btn = soup.find('a', class_='btn-play')
  420. if play_btn:
  421. play_url = play_btn.get('href', '')
  422. if play_url:
  423. if not play_url.startswith('http'):
  424. play_url = urllib.parse.urljoin(self.siteUrl, play_url)
  425. play_list.append(f"播放${play_url}")
  426. # 如果仍然没有找到播放链接,使用播放页URL
  427. if not play_list:
  428. play_url = f"{self.siteUrl}/index.php/vod/play/id/{vid}/sid/1/nid/1.html"
  429. play_list.append(f"播放${play_url}")
  430. # 提取更多信息(导演、演员等)
  431. director = ""
  432. actor = ""
  433. year = ""
  434. area = ""
  435. remarks = ""
  436. # 查找备注信息
  437. meta_items = soup.find_all('div', class_='meta-item')
  438. for item in meta_items:
  439. item_title = item.find('span', class_='item-title')
  440. item_content = item.find('span', class_='item-content')
  441. if item_title and item_content:
  442. title_text = item_title.text.strip()
  443. content_text = item_content.text.strip()
  444. if "导演" in title_text:
  445. director = content_text
  446. elif "主演" in title_text:
  447. actor = content_text
  448. elif "年份" in title_text:
  449. year = content_text
  450. elif "地区" in title_text:
  451. area = content_text
  452. elif "简介" in title_text:
  453. if not desc:
  454. desc = content_text
  455. elif "状态" in title_text:
  456. remarks = content_text
  457. # 如果没有从meta-item中获取到remarks
  458. if not remarks:
  459. remarks_tag = soup.find('span', class_='remarks')
  460. if remarks_tag:
  461. remarks = remarks_tag.text.strip()
  462. # 构建标准数据结构
  463. vod = {
  464. "vod_id": vid,
  465. "vod_name": title,
  466. "vod_pic": pic,
  467. "vod_year": year,
  468. "vod_area": area,
  469. "vod_remarks": remarks,
  470. "vod_actor": actor,
  471. "vod_director": director,
  472. "vod_content": desc,
  473. "type_name": ",".join(tags),
  474. "vod_play_from": play_from,
  475. "vod_play_url": "#".join(play_list)
  476. }
  477. result = {
  478. 'list': [vod]
  479. }
  480. except Exception as e:
  481. self.log(f"获取详情内容时出错: {str(e)}", "ERROR")
  482. return result
  483. def searchContent(self, key, quick, pg=1):
  484. """搜索功能"""
  485. result = {}
  486. videos = []
  487. # 构建搜索URL和参数
  488. search_url = f"{self.siteUrl}/index.php/vod/search.html"
  489. params = {"wd": key}
  490. try:
  491. response = self.fetch(search_url, data=params)
  492. if response and response.status_code == 200:
  493. html = response.text
  494. soup = BeautifulSoup(html, 'html.parser')
  495. # 查找搜索结果项
  496. search_items = soup.find_all('li', class_='item')
  497. for item in search_items:
  498. try:
  499. # 获取标题
  500. title_tag = item.find('h3')
  501. if not title_tag:
  502. continue
  503. title = title_tag.text.strip()
  504. # 获取链接
  505. link_tag = item.find('a')
  506. if not link_tag:
  507. continue
  508. link = link_tag.get('href', '')
  509. if not link.startswith('http'):
  510. link = urllib.parse.urljoin(self.siteUrl, link)
  511. # 提取视频ID
  512. vid = self.extractVodId(link)
  513. if not vid:
  514. continue
  515. # 获取图片
  516. img_tag = item.find('img')
  517. img_url = ""
  518. if img_tag:
  519. img_url = img_tag.get('src', img_tag.get('data-src', ''))
  520. if img_url and not img_url.startswith('http'):
  521. img_url = urllib.parse.urljoin(self.siteUrl, img_url)
  522. # 获取备注信息
  523. remarks = ""
  524. remarks_tag = item.find('span', class_='remarks')
  525. if remarks_tag:
  526. remarks = remarks_tag.text.strip()
  527. # 获取标签信息
  528. tags = ""
  529. tags_tag = item.find('span', class_='tags')
  530. if tags_tag:
  531. tags = tags_tag.text.strip()
  532. # 合并备注和标签
  533. if remarks and tags:
  534. remarks = f"{remarks} | {tags}"
  535. elif tags:
  536. remarks = tags
  537. # 构建视频项
  538. videos.append({
  539. 'vod_id': vid,
  540. 'vod_name': title,
  541. 'vod_pic': img_url,
  542. 'vod_remarks': remarks
  543. })
  544. except Exception as e:
  545. self.log(f"处理搜索结果时出错: {str(e)}", "ERROR")
  546. continue
  547. except Exception as e:
  548. self.log(f"搜索功能发生错误: {str(e)}", "ERROR")
  549. result['list'] = videos
  550. return result
  551. def searchContentPage(self, key, quick, pg=1):
  552. return self.searchContent(key, quick, pg)
  553. def playerContent(self, flag, id, vipFlags):
  554. """获取播放内容"""
  555. result = {}
  556. try:
  557. # 判断是否已经是视频URL
  558. if self.isVideoFormat(id):
  559. result["parse"] = 0
  560. result["url"] = id
  561. result["playUrl"] = ""
  562. result["header"] = json.dumps(self.headers)
  563. return result
  564. # 判断是否是完整的页面URL
  565. if id.startswith(('http://', 'https://')):
  566. play_url = id
  567. # 尝试作为相对路径处理
  568. elif id.startswith('/'):
  569. play_url = urllib.parse.urljoin(self.siteUrl, id)
  570. # 假设是视频ID,构建播放页面URL
  571. else:
  572. # 检查是否是"视频ID_集数"格式
  573. parts = id.split('_')
  574. if len(parts) > 1 and parts[0].isdigit():
  575. vid = parts[0]
  576. nid = parts[1]
  577. play_url = f"{self.siteUrl}/index.php/vod/play/id/{vid}/sid/1/nid/{nid}.html"
  578. else:
  579. # 直接当作视频ID处理
  580. play_url = f"{self.siteUrl}/index.php/vod/play/id/{id}/sid/1/nid/1.html"
  581. # 访问播放页获取真实播放地址
  582. try:
  583. self.log(f"正在解析播放页面: {play_url}")
  584. response = self.fetch(play_url)
  585. if response and response.status_code == 200:
  586. html = response.text
  587. # 查找player_aaaa变量
  588. player_match = re.search(r'var\s+player_aaaa\s*=\s*({.*?});', html, re.DOTALL)
  589. if player_match:
  590. try:
  591. player_data = json.loads(player_match.group(1))
  592. if 'url' in player_data:
  593. video_url = player_data['url']
  594. if not video_url.startswith('http'):
  595. video_url = urllib.parse.urljoin(self.siteUrl, video_url)
  596. self.log(f"从player_aaaa获取到视频地址: {video_url}")
  597. result["parse"] = 0
  598. result["url"] = video_url
  599. result["playUrl"] = ""
  600. result["header"] = json.dumps(self.headers)
  601. return result
  602. except json.JSONDecodeError as e:
  603. self.log(f"解析player_aaaa JSON出错: {str(e)}", "ERROR")
  604. # 如果player_aaaa解析失败,尝试其他方式
  605. # 1. 查找video标签
  606. video_match = re.search(r'<video[^>]*src=["\'](.*?)["\']', html)
  607. if video_match:
  608. video_url = video_match.group(1)
  609. if not video_url.startswith('http'):
  610. video_url = urllib.parse.urljoin(self.siteUrl, video_url)
  611. self.log(f"从video标签找到视频地址: {video_url}")
  612. result["parse"] = 0
  613. result["url"] = video_url
  614. result["playUrl"] = ""
  615. result["header"] = json.dumps(self.headers)
  616. return result
  617. # 2. 查找iframe
  618. iframe_match = re.search(r'<iframe[^>]*src=["\'](.*?)["\']', html)
  619. if iframe_match:
  620. iframe_url = iframe_match.group(1)
  621. if not iframe_url.startswith('http'):
  622. iframe_url = urllib.parse.urljoin(self.siteUrl, iframe_url)
  623. self.log(f"找到iframe,正在解析: {iframe_url}")
  624. # 访问iframe内容
  625. iframe_response = self.fetch(iframe_url)
  626. if iframe_response and iframe_response.status_code == 200:
  627. iframe_html = iframe_response.text
  628. # 在iframe内容中查找视频地址
  629. iframe_video_match = re.search(r'(https?://[^\'"]+\.(mp4|m3u8|ts))', iframe_html)
  630. if iframe_video_match:
  631. video_url = iframe_video_match.group(1)
  632. self.log(f"从iframe中找到视频地址: {video_url}")
  633. result["parse"] = 0
  634. result["url"] = video_url
  635. result["playUrl"] = ""
  636. result["header"] = json.dumps({
  637. "User-Agent": self.headers["User-Agent"],
  638. "Referer": iframe_url
  639. })
  640. return result
  641. # 3. 查找任何可能的视频URL
  642. url_match = re.search(r'(https?://[^\'"]+\.(mp4|m3u8|ts))', html)
  643. if url_match:
  644. video_url = url_match.group(1)
  645. self.log(f"找到可能的视频地址: {video_url}")
  646. result["parse"] = 0
  647. result["url"] = video_url
  648. result["playUrl"] = ""
  649. result["header"] = json.dumps(self.headers)
  650. return result
  651. except Exception as e:
  652. self.log(f"解析播放地址时出错: {str(e)}", "ERROR")
  653. # 如果所有方式都失败,返回外部解析标志
  654. self.log("未找到直接可用的视频地址,需要外部解析", "WARNING")
  655. result["parse"] = 1 # 表示需要外部解析
  656. result["url"] = play_url # 返回播放页面URL
  657. result["playUrl"] = ""
  658. result["header"] = json.dumps(self.headers)
  659. except Exception as e:
  660. self.log(f"获取播放内容时出错: {str(e)}", "ERROR")
  661. return result
  662. def localProxy(self, param):
  663. """本地代理"""
  664. return [404, "text/plain", {}, "Not Found"]