You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Ru/c/PY1/啪啪视频.py

106 lines
4.4 KiB

# coding=utf-8
import re
from urllib.parse import urljoin
from base.spider import Spider
class Spider(Spider):
def getName(self):
return "PaPa视频"
def init(self, extend=""):
# 建议动态配置或确保此 host 正确
self.host = "http://202601.duduo.vip"
self.headers = {
"User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 17_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1",
"Referer": self.host
}
def homeContent(self, filter):
# 根据图片 1000202100.jpg 准确提取分类 ID
result = {}
classes = [
{'type_name': '国产', 'type_id': '20'},
{'type_name': '无码', 'type_id': '21'},
{'type_name': '字幕', 'type_id': '22'},
{'type_name': '欧美', 'type_id': '23'},
{'type_name': '三级', 'type_id': '24'},
{'type_name': '动漫', 'type_id': '25'}
]
result['class'] = classes
return result
def _parse_vod_list(self, root):
"""公共解析逻辑:根据图片 1000202101.jpg 的 stui-vodlist 结构"""
videos = []
# 定位 li 容器,类名为 stui-vodlist__item
items = root.xpath("//li[contains(@class,'stui-vodlist__item')]")
for item in items:
try:
# 1. 提取标题:从 h4 标签下的 a 标签获取
name = item.xpath(".//h4[contains(@class,'title')]/a/text()")[0].strip()
# 2. 提取链接并截取 ID:从 href="/6/index.php/vod/play/id/116255..." 中提取 116255
href = item.xpath(".//h4[contains(@class,'title')]/a/@href")[0]
vid = re.search(r'id/(\d+)', href).group(1)
# 3. 提取图片:优先获取 data-original (懒加载地址)
pic = item.xpath(".//a[contains(@class,'thumb')]/@data-original")
if not pic:
pic = item.xpath(".//a[contains(@class,'thumb')]/@src")
pic_url = urljoin(self.host, pic[0]) if pic else ""
# 4. 提取副标题/备注(如:点击播放 或 时长)
remark = item.xpath(".//span[contains(@class,'pic-text')]/text()")
videos.append({
"vod_id": vid,
"vod_name": name,
"vod_pic": pic_url,
"vod_remarks": remark[0].strip() if remark else ""
})
except Exception:
continue
return videos
def homeVideoContent(self):
# 首页通常展示最新更新
rsp = self.fetch(f"{self.host}/6/index.php", headers=self.headers)
root = self.html(self.cleanText(rsp.text))
return {'list': self._parse_vod_list(root)}
def categoryContent(self, tid, pg, filter, extend):
# 根据图片 1000202100.jpg 的链接格式拼接
# 格式示例:/6/index.php/vod/type/id/20.html
url = f"{self.host}/6/index.php/vod/type/id/{tid}/page/{pg}.html"
rsp = self.fetch(url, headers=self.headers)
root = self.html(self.cleanText(rsp.text))
return {'list': self._parse_vod_list(root), 'page': pg}
def detailContent(self, array):
vid = array[0]
# 直接构造播放页地址 (根据图片 1000202101.jpg 逻辑)
# 苹果CMS通常播放地址是 /vod/play/id/{vid}/sid/1/nid/1.html
play_url = f"{self.host}/6/index.php/vod/play/id/{vid}/sid/1/nid/1.html"
# 详情页信息通常需要再次请求 vid 对应的 detail 页面,这里简单处理直接跳播放
vod = {
"vod_id": vid,
"vod_name": "视频详情",
"vod_play_from": "PaPa线路",
"vod_play_url": f"点击播放${play_url}"
}
return {'list': [vod]}
def searchContent(self, key, quick, pg):
url = f"{self.host}/6/index.php/vod/search/page/{pg}/wd/{key}.html"
rsp = self.fetch(url, headers=self.headers)
root = self.html(self.cleanText(rsp.text))
return {'list': self._parse_vod_list(root), 'page': pg}
def playerContent(self, flag, id, vipFlags):
# 此类网站通常需要 web 嗅探
return {
"parse": 1,
"url": id,
"header": self.headers
}