forked from tfornik/RussiaTools
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
261 lines
9.5 KiB
261 lines
9.5 KiB
# -*- coding: utf-8 -*-
|
|
#恰逢
|
|
import re
|
|
import urllib.parse
|
|
from base.spider import Spider as BaseSpile
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
|
class VideoDecryptor:
|
|
"""XOR 128 解密"""
|
|
@staticmethod
|
|
def decrypt(text: str) -> str:
|
|
if not text:
|
|
return ""
|
|
try:
|
|
return ''.join(chr(128 ^ ord(c)) for c in text)
|
|
except:
|
|
return text
|
|
|
|
@staticmethod
|
|
def from_js(js: str) -> str:
|
|
return VideoDecryptor.decrypt(m.group(1)) if (m := re.search(r"document\.write\(l\('([^']+)'\)\)", js)) else ""
|
|
|
|
|
|
class Spider(BaseSpile):
|
|
|
|
def init(self, extend=""):
|
|
self.host = "https://h4ivs.sm431.vip"
|
|
self.video_host = "https://m3u8.nl:88"
|
|
self.image_host = "https://3334.nl:33"
|
|
self.headers = {
|
|
"User-Agent": "Mozilla/5.0 (Linux; Android 13; 22127RK46C Build/TKQ1.220905.001; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/104.0.5112.97 Mobile Safari/537.36",
|
|
"Referer": self.host,
|
|
"Accept-Language": "zh-CN,zh;q=0.9",
|
|
}
|
|
self.cache = {}
|
|
|
|
def get(self, url):
|
|
try:
|
|
r = requests.get(url, headers=self.headers, timeout=15)
|
|
r.raise_for_status()
|
|
r.encoding = "utf-8"
|
|
return r.text
|
|
except:
|
|
return ""
|
|
|
|
def img_url(self, url):
|
|
"""格式化图片URL"""
|
|
if not url:
|
|
return ""
|
|
if url.startswith("//"):
|
|
url = "https:" + url
|
|
elif url.startswith("/"):
|
|
url = self.image_host + url
|
|
return f"{url}@User-Agent={self.headers['User-Agent']}@Referer={self.host}/"
|
|
|
|
def parse(self, el):
|
|
"""解析卡片"""
|
|
a = el if el.name == 'a' else el.find('a')
|
|
if not a or not (href := a.get("href", "")):
|
|
return None
|
|
|
|
href = self.host + href if href.startswith("/") else href
|
|
if not (vid := re.search(r"/vid/(\d+)", href)):
|
|
return None
|
|
|
|
vid = vid.group(1)
|
|
title = ""
|
|
|
|
# 解密标题
|
|
if p := el.find('p'):
|
|
if s := p.find('script'):
|
|
if s.string:
|
|
title = VideoDecryptor.from_js(s.string)
|
|
title = title or p.get_text(strip=True)
|
|
|
|
if not title:
|
|
for attr in ['data-title', 'data-name', 'title']:
|
|
if el.has_attr(attr) and (val := el[attr]):
|
|
if (de := VideoDecryptor.decrypt(val)) and len(de) > 3:
|
|
title = de
|
|
break
|
|
|
|
title = title or "未知标题"
|
|
if title != "未知标题":
|
|
self.cache[vid] = title
|
|
|
|
# 图片
|
|
img = ""
|
|
if node := el.select_one("img"):
|
|
img = node.get("data-src") or node.get("src") or ""
|
|
img = img or f"{self.image_host}/{vid}.jpg"
|
|
|
|
return {
|
|
"vod_id": vid,
|
|
"vod_name": title,
|
|
"vod_pic": self.img_url(img),
|
|
"vod_remarks": "",
|
|
}
|
|
|
|
def get_title(self, vid):
|
|
"""从缓存或首页获取标题"""
|
|
if vid in self.cache:
|
|
return self.cache[vid]
|
|
|
|
if html := self.get(self.host):
|
|
soup = BeautifulSoup(html, "html.parser")
|
|
for link in soup.select('a[href*="/vid/"]'):
|
|
if f'/vid/{vid}' in link.get('href', ''):
|
|
if p := link.find('p'):
|
|
if s := p.find('script'):
|
|
if s.string and (t := VideoDecryptor.from_js(s.string)):
|
|
self.cache[vid] = t
|
|
return t
|
|
if t := p.get_text(strip=True):
|
|
self.cache[vid] = t
|
|
return t
|
|
return None
|
|
|
|
def homeContent(self, filter):
|
|
return {
|
|
"class": [
|
|
{"type_name": "国产", "type_id": "1"},
|
|
{"type_name": "日本", "type_id": "2"},
|
|
{"type_name": "韩国", "type_id": "3"},
|
|
{"type_name": "欧美", "type_id": "4"},
|
|
{"type_name": "三级", "type_id": "5"},
|
|
{"type_name": "动漫", "type_id": "6"},
|
|
]
|
|
}
|
|
|
|
def homeVideoContent(self):
|
|
if not (html := self.get(self.host)):
|
|
return {"list": []}
|
|
|
|
soup = BeautifulSoup(html, "html.parser")
|
|
videos = [v for v in (self.parse(el) for el in soup.select(".vodbox, .stui-vodlist__box, .vodlist__box, .video-card, .item")) if v]
|
|
|
|
if not videos:
|
|
videos = [{"vod_id": v, "vod_name": "未知标题", "vod_pic": self.img_url(f"{self.image_host}/{v}.jpg"), "vod_remarks": ""}
|
|
for v in re.findall(r'\[]\(/vid/(\d+)\.html\)', html)]
|
|
|
|
return {"list": videos}
|
|
|
|
def categoryContent(self, tid, pg, filter, extend):
|
|
if tid == "0":
|
|
url = self.host if int(pg) == 1 else f"{self.host}/page/{pg}.html"
|
|
else:
|
|
url = f"{self.host}/list/{tid}.html" if int(pg) == 1 else f"{self.host}/list/{tid}/{pg}.html"
|
|
|
|
if not (html := self.get(url)):
|
|
return {"list": [], "page": pg, "pagecount": 1, "limit": 30, "total": 0}
|
|
|
|
soup = BeautifulSoup(html, "html.parser")
|
|
videos = [v for v in (self.parse(el) for el in soup.select(".vodbox, .stui-vodlist__box, .vodlist__box, .video-card, .item")) if v]
|
|
|
|
if not videos:
|
|
videos = [{"vod_id": v, "vod_name": "未知标题", "vod_pic": self.img_url(f"{self.image_host}/{v}.jpg"), "vod_remarks": ""}
|
|
for v in re.findall(r'\[]\(/vid/(\d+)\.html\)', html)]
|
|
|
|
last = max([int(m.group(1)) for a in soup.select("a[href*='list/']") if (m := re.search(r"/list/\d+/(\d+)\.html", a.get("href", "")))], default=int(pg))
|
|
|
|
return {"list": videos, "page": pg, "pagecount": max(last, 1), "limit": 30, "total": 99999}
|
|
|
|
def searchContent(self, key, quick, pg="1"):
|
|
url = f"{self.host}/so.html"
|
|
params = {"wd": key}
|
|
if int(pg) > 1:
|
|
params["page"] = pg
|
|
|
|
html = ""
|
|
for method in [requests.get, requests.post]:
|
|
try:
|
|
r = method(url, params=params if method == requests.get else None,
|
|
data=params if method == requests.post else None,
|
|
headers=self.headers, timeout=15)
|
|
r.raise_for_status()
|
|
r.encoding = "utf-8"
|
|
html = r.text
|
|
break
|
|
except:
|
|
continue
|
|
|
|
if not html:
|
|
return {"list": []}
|
|
|
|
soup = BeautifulSoup(html, "html.parser")
|
|
videos = [v for v in (self.parse(el) for el in soup.select(".vodbox, .stui-vodlist__box, .vodlist__box, .video-card, .item")) if v]
|
|
|
|
if not videos:
|
|
videos = [{"vod_id": v, "vod_name": "未知标题", "vod_pic": self.img_url(f"{self.image_host}/{v}.jpg"), "vod_remarks": ""}
|
|
for v in re.findall(r'\[]\(/vid/(\d+)\.html\)', html)]
|
|
|
|
last = max([int(m.group(1)) for a in soup.select("a[href*='so.html'], .pagination a, .page-link")
|
|
if (m := re.search(r"[?&]page=(\d+)", a.get("href", "")))], default=int(pg))
|
|
|
|
return {"list": videos, "page": pg, "pagecount": max(last, 1), "limit": 30, "total": 99999}
|
|
|
|
def detailContent(self, ids):
|
|
vid = ids[0]
|
|
if not (html := self.get(f"{self.host}/vid/{vid}.html")):
|
|
return {"list": []}
|
|
|
|
soup = BeautifulSoup(html, "html.parser")
|
|
|
|
# 标题
|
|
title = self.get_title(vid)
|
|
if not title:
|
|
if t := soup.find('title'):
|
|
title = re.sub(r'\s*[-_|]\s*.{0,20}$', '', t.get_text(strip=True)).strip()
|
|
|
|
if not title or len(title) < 5:
|
|
for sel in ['h1', 'h2', '.video-title', '.title']:
|
|
if (el := soup.select_one(sel)) and (txt := el.get_text(strip=True)) and len(txt) > 5:
|
|
title = txt
|
|
break
|
|
|
|
title = title or f"视频{vid}"
|
|
|
|
# 图片
|
|
pic = ""
|
|
for sel in ['.picbox img', '.vodimg img', '.video-pic img', '.poster img', 'img[data-id]']:
|
|
if (node := soup.select_one(sel)) and (p := node.get("data-src") or node.get("src")) and 'favicon' not in p.lower():
|
|
pic = p
|
|
break
|
|
|
|
if not pic or 'favicon' in pic.lower():
|
|
if meta := soup.select_one('meta[property="og:image"]'):
|
|
pic = meta.get('content', '')
|
|
|
|
pic = pic or f"{self.image_host}/{vid}.jpg"
|
|
|
|
# 简介
|
|
desc = soup.select_one(".vodinfo, .video-info, .content, .intro, .description")
|
|
desc = desc.get_text(strip=True) if desc else ""
|
|
|
|
return {"list": [{
|
|
"vod_id": vid,
|
|
"vod_name": title,
|
|
"vod_pic": self.img_url(pic),
|
|
"vod_content": desc,
|
|
"vod_play_from": "七哥比较瑟",
|
|
"vod_play_url": f"狗哥特别瑟${vid}@@0@@1",
|
|
}]}
|
|
|
|
def playerContent(self, flag, id, vipFlags):
|
|
vid = id.split("@@")[0]
|
|
return {"parse": 0, "url": f"{self.video_host}/{vid}/hls/index.m3u8", "header": self.headers}
|
|
|
|
def localProxy(self, param):
|
|
return {"code": 404, "content": ""}
|
|
|
|
def isVideoFormat(self, url):
|
|
return ".m3u8" in url.lower()
|
|
|
|
def manualVideoCheck(self):
|
|
pass
|
|
|
|
def destroy(self):
|
|
pass |