forked from tfornik/RussiaTools
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
254 lines
9.3 KiB
254 lines
9.3 KiB
# -*- coding: utf-8 -*-
|
|
import re
|
|
import urllib.parse
|
|
import requests
|
|
from base64 import b64encode
|
|
|
|
# 尝试导入 Crypto 库,防止环境缺失报错
|
|
try:
|
|
from Crypto.Cipher import AES
|
|
from Crypto.Util.Padding import unpad
|
|
HAS_CRYPTO = True
|
|
except ImportError:
|
|
HAS_CRYPTO = False
|
|
|
|
class Spider:
|
|
def __init__(self):
|
|
self.name = '玩物社区'
|
|
self.host = 'https://wanwuu.com/'
|
|
self.default_pic = 'https://via.placeholder.com/400x225?text=Video'
|
|
self.headers = {
|
|
'User-Agent': 'Mozilla/5.0 (Linux; Android 13; SM-S901U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Mobile Safari/537.36',
|
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
|
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
|
|
'Accept-Encoding': 'gzip, deflate',
|
|
'Connection': 'keep-alive',
|
|
'Referer': self.host,
|
|
}
|
|
self.classes = []
|
|
category_str = "国产SM$guochan-sm#日韩SM$rihan-sm#欧美SM$oumei-sm#直播回放$zhibo-huifang#SM小说$novels/new#玩物社区$posts/all"
|
|
for item in category_str.split('#'):
|
|
if '$' in item:
|
|
name, path = item.split('$')
|
|
self.classes.append({"type_name": name, "type_id": path})
|
|
|
|
# --- 集成 demo.py 的 AES 解密逻辑 ---
|
|
def _decrypt_pic(self, img_url):
|
|
"""
|
|
下载并解密加密图片,返回 base64 格式直接显示
|
|
"""
|
|
if not HAS_CRYPTO:
|
|
return img_url # 如果没有加密库,直接返回原链接
|
|
|
|
try:
|
|
# demo.py 中的特定 headers
|
|
img_headers = {
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.7390.55 Safari/537.36',
|
|
'sec-ch-ua': '"Not/A)Brand";v="8", "Chromium";v="141", "Google Chrome";v="141"',
|
|
'Origin': 'https://xgne8.dyxobic.cc',
|
|
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
|
|
}
|
|
|
|
# 下载图片二进制数据
|
|
response = requests.get(img_url, headers=img_headers, timeout=5)
|
|
if response.status_code != 200:
|
|
return self.default_pic
|
|
|
|
# AES 解密配置
|
|
key = b"f5d965df75336270"
|
|
iv = b'97b60394abc2fbe1'
|
|
cipher = AES.new(key, AES.MODE_CBC, iv)
|
|
|
|
# 解密并去除填充
|
|
pt = unpad(cipher.decrypt(response.content), AES.block_size)
|
|
|
|
# 转 base64 字符串
|
|
b64_code = b64encode(pt).decode()
|
|
|
|
# 拼接成 HTML 可识别的 data URI
|
|
return f"data:image/jpeg;base64,{b64_code}"
|
|
except Exception as e:
|
|
print(f"解密图片失败: {e}")
|
|
return self.default_pic
|
|
|
|
# 框架接口
|
|
def getDependence(self): return []
|
|
def init(self, extend=""): pass
|
|
def isVideoFormat(self, url): return False
|
|
def manualVideoCheck(self): pass
|
|
def getName(self): return self.name
|
|
def homeContent(self, filter): return {"class": self.classes}
|
|
def homeVideoContent(self): return self.categoryContent("guochan-sm", "1", False, {})
|
|
|
|
# 列表解析
|
|
def _parse_videos(self, html):
|
|
"""提取视频列表,优先使用 data-src 懒加载图片"""
|
|
videos = []
|
|
pattern = r'<div class="video-item"[^>]*>(.*?)</div>\s*</div>'
|
|
|
|
for block in re.findall(pattern, html, re.S):
|
|
# 链接
|
|
href_match = re.search(r'href="([^"]+)"', block)
|
|
if not href_match or '/videos/' not in href_match.group(1):
|
|
continue
|
|
href = href_match.group(1)
|
|
|
|
# 标题
|
|
title = ""
|
|
for t_pattern in [r'alt="([^"]+)"', r'title="([^"]+)"', r'<a[^>]*>\s*([^<]+)\s*</a>']:
|
|
t_match = re.search(t_pattern, block)
|
|
if t_match:
|
|
title = t_match.group(1).strip()
|
|
break
|
|
|
|
if not title:
|
|
continue
|
|
|
|
# 图片 - 优先懒加载属性
|
|
pic = ""
|
|
for p_pattern in [r'data-src="([^"]+)"', r'data-lazy-src="([^"]+)"', r'lazy-src="([^"]+)"', r'src="([^"]+)"']:
|
|
p_match = re.search(p_pattern, block)
|
|
if p_match:
|
|
pic_url = p_match.group(1)
|
|
if pic_url and 'blob:' not in pic_url and 'poster_loading' not in pic_url:
|
|
pic = pic_url
|
|
break
|
|
|
|
# --- 处理解密逻辑 ---
|
|
pic = self._abs(pic)
|
|
if 'rulbbz.cn' in pic: # 识别到加密图床域名
|
|
pic = self._decrypt_pic(pic)
|
|
# ------------------
|
|
|
|
# 时长
|
|
remark = ""
|
|
r_match = re.search(r'>(\d{1,2}:\d{2}(?::\d{2})?)<', block)
|
|
if r_match:
|
|
remark = r_match.group(1)
|
|
|
|
videos.append({
|
|
"vod_id": self._abs(href),
|
|
"vod_name": self.clean_title(title),
|
|
"vod_pic": pic if pic else self.default_pic,
|
|
"vod_remarks": remark
|
|
})
|
|
|
|
return videos
|
|
|
|
# 分类
|
|
def categoryContent(self, tid, pg, filter, extend):
|
|
try:
|
|
pg = int(pg)
|
|
if tid in ("novels/new", "posts/all"):
|
|
url = f"{self.host}{tid}/page/{pg}/" if pg > 1 else f"{self.host}{tid}/"
|
|
else:
|
|
url = f"{self.host}videos/{tid}/page/{pg}/" if pg > 1 else f"{self.host}videos/{tid}/"
|
|
|
|
r = requests.get(url, headers=self.headers, timeout=10)
|
|
r.encoding = 'utf-8'
|
|
videos = self._parse_videos(r.text)
|
|
return self._page(videos, pg)
|
|
except Exception as e:
|
|
print(f"分类失败: {e}")
|
|
return self._page([], pg)
|
|
|
|
# 搜索
|
|
def searchContent(self, key, quick, pg='1'):
|
|
try:
|
|
pg = int(pg)
|
|
wd = urllib.parse.quote(key)
|
|
url = f"{self.host}videos/search/{wd}/page/{pg}/" if pg > 1 else f"{self.host}videos/search/{wd}/"
|
|
|
|
r = requests.get(url, headers=self.headers, timeout=10)
|
|
r.encoding = 'utf-8'
|
|
videos = self._parse_videos(r.text)
|
|
return self._page(videos, pg)
|
|
except Exception as e:
|
|
print(f"搜索失败: {e}")
|
|
return self._page([], pg)
|
|
|
|
# 详情
|
|
def detailContent(self, array):
|
|
vid = array[0] if array[0].startswith('http') else self._abs(array[0])
|
|
try:
|
|
r = requests.get(vid, headers=self.headers, timeout=10)
|
|
r.encoding = 'utf-8'
|
|
html = r.text
|
|
|
|
title = ""
|
|
title_match = re.search(r'<title>(.*?)</title>', html, re.I)
|
|
if title_match:
|
|
title = re.split(r'[-—_]', title_match.group(1))[0].strip()
|
|
|
|
pic = ""
|
|
for pic_pattern in [
|
|
r'<meta[^>]*property="og:image"[^>]*content="([^"]+)"',
|
|
r'<video[^>]*poster="([^"]+)"',
|
|
r'data-poster="([^"]+)"'
|
|
]:
|
|
pic_match = re.search(pic_pattern, html, re.I)
|
|
if pic_match and 'blob:' not in pic_match.group(1):
|
|
pic = pic_match.group(1)
|
|
break
|
|
|
|
# --- 处理解密逻辑 ---
|
|
pic = self._abs(pic)
|
|
if 'rulbbz.cn' in pic:
|
|
pic = self._decrypt_pic(pic)
|
|
# ------------------
|
|
|
|
desc = ""
|
|
desc_match = re.search(r'<meta[^>]*name="description"[^>]*content="([^"]*)"', html, re.I)
|
|
if desc_match:
|
|
desc = desc_match.group(1)
|
|
|
|
play_url = f"video://{vid}"
|
|
|
|
vod = {
|
|
"vod_id": vid,
|
|
"vod_name": self.clean_title(title) if title else "视频",
|
|
"vod_pic": pic if pic else self.default_pic,
|
|
"vod_content": self.clean_title(desc) if desc else title,
|
|
"vod_play_from": self.name,
|
|
"vod_play_url": f"{self.clean_title(title) if title else '播放'}${play_url}"
|
|
}
|
|
return {"list": [vod]}
|
|
except Exception as e:
|
|
print(f"详情失败: {e}")
|
|
return {"list": []}
|
|
|
|
def playerContent(self, flag, id, vipFlags):
|
|
return {
|
|
"parse": 0,
|
|
"playUrl": "",
|
|
"url": id,
|
|
"header": self.headers
|
|
}
|
|
|
|
def _abs(self, url):
|
|
if not url or url.startswith('blob:'):
|
|
return ""
|
|
if url.startswith('//'):
|
|
return 'https:' + url
|
|
return url if url.startswith('http') else urllib.parse.urljoin(self.host, url)
|
|
|
|
def _page(self, videos, pg):
|
|
return {
|
|
"list": videos,
|
|
"page": int(pg),
|
|
"pagecount": 9999,
|
|
"limit": 30,
|
|
"total": 999999
|
|
}
|
|
|
|
def clean_title(self, title):
|
|
if not title: return ""
|
|
title = re.sub(r'<[^>]+>', '', title)
|
|
title = re.sub(r'\s+', ' ', title)
|
|
return title.strip()
|
|
|
|
config = {"player": {}, "filter": {}}
|
|
header = property(lambda self: self.headers)
|
|
|
|
def localProxy(self, param):
|
|
return [] |