You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Ru/c/PY1/神秘影院.py

261 lines
9.5 KiB

# -*- coding: utf-8 -*-
#恰逢
import re
import urllib.parse
from base.spider import Spider as BaseSpile
import requests
from bs4 import BeautifulSoup
class VideoDecryptor:
"""XOR 128 解密"""
@staticmethod
def decrypt(text: str) -> str:
if not text:
return ""
try:
return ''.join(chr(128 ^ ord(c)) for c in text)
except:
return text
@staticmethod
def from_js(js: str) -> str:
return VideoDecryptor.decrypt(m.group(1)) if (m := re.search(r"document\.write\(l\('([^']+)'\)\)", js)) else ""
class Spider(BaseSpile):
def init(self, extend=""):
self.host = "https://h4ivs.sm431.vip"
self.video_host = "https://m3u8.nl:88"
self.image_host = "https://3334.nl:33"
self.headers = {
"User-Agent": "Mozilla/5.0 (Linux; Android 13; 22127RK46C Build/TKQ1.220905.001; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/104.0.5112.97 Mobile Safari/537.36",
"Referer": self.host,
"Accept-Language": "zh-CN,zh;q=0.9",
}
self.cache = {}
def get(self, url):
try:
r = requests.get(url, headers=self.headers, timeout=15)
r.raise_for_status()
r.encoding = "utf-8"
return r.text
except:
return ""
def img_url(self, url):
"""格式化图片URL"""
if not url:
return ""
if url.startswith("//"):
url = "https:" + url
elif url.startswith("/"):
url = self.image_host + url
return f"{url}@User-Agent={self.headers['User-Agent']}@Referer={self.host}/"
def parse(self, el):
"""解析卡片"""
a = el if el.name == 'a' else el.find('a')
if not a or not (href := a.get("href", "")):
return None
href = self.host + href if href.startswith("/") else href
if not (vid := re.search(r"/vid/(\d+)", href)):
return None
vid = vid.group(1)
title = ""
# 解密标题
if p := el.find('p'):
if s := p.find('script'):
if s.string:
title = VideoDecryptor.from_js(s.string)
title = title or p.get_text(strip=True)
if not title:
for attr in ['data-title', 'data-name', 'title']:
if el.has_attr(attr) and (val := el[attr]):
if (de := VideoDecryptor.decrypt(val)) and len(de) > 3:
title = de
break
title = title or "未知标题"
if title != "未知标题":
self.cache[vid] = title
# 图片
img = ""
if node := el.select_one("img"):
img = node.get("data-src") or node.get("src") or ""
img = img or f"{self.image_host}/{vid}.jpg"
return {
"vod_id": vid,
"vod_name": title,
"vod_pic": self.img_url(img),
"vod_remarks": "",
}
def get_title(self, vid):
"""从缓存或首页获取标题"""
if vid in self.cache:
return self.cache[vid]
if html := self.get(self.host):
soup = BeautifulSoup(html, "html.parser")
for link in soup.select('a[href*="/vid/"]'):
if f'/vid/{vid}' in link.get('href', ''):
if p := link.find('p'):
if s := p.find('script'):
if s.string and (t := VideoDecryptor.from_js(s.string)):
self.cache[vid] = t
return t
if t := p.get_text(strip=True):
self.cache[vid] = t
return t
return None
def homeContent(self, filter):
return {
"class": [
{"type_name": "国产", "type_id": "1"},
{"type_name": "日本", "type_id": "2"},
{"type_name": "韩国", "type_id": "3"},
{"type_name": "欧美", "type_id": "4"},
{"type_name": "三级", "type_id": "5"},
{"type_name": "动漫", "type_id": "6"},
]
}
def homeVideoContent(self):
if not (html := self.get(self.host)):
return {"list": []}
soup = BeautifulSoup(html, "html.parser")
videos = [v for v in (self.parse(el) for el in soup.select(".vodbox, .stui-vodlist__box, .vodlist__box, .video-card, .item")) if v]
if not videos:
videos = [{"vod_id": v, "vod_name": "未知标题", "vod_pic": self.img_url(f"{self.image_host}/{v}.jpg"), "vod_remarks": ""}
for v in re.findall(r'\[]\(/vid/(\d+)\.html\)', html)]
return {"list": videos}
def categoryContent(self, tid, pg, filter, extend):
if tid == "0":
url = self.host if int(pg) == 1 else f"{self.host}/page/{pg}.html"
else:
url = f"{self.host}/list/{tid}.html" if int(pg) == 1 else f"{self.host}/list/{tid}/{pg}.html"
if not (html := self.get(url)):
return {"list": [], "page": pg, "pagecount": 1, "limit": 30, "total": 0}
soup = BeautifulSoup(html, "html.parser")
videos = [v for v in (self.parse(el) for el in soup.select(".vodbox, .stui-vodlist__box, .vodlist__box, .video-card, .item")) if v]
if not videos:
videos = [{"vod_id": v, "vod_name": "未知标题", "vod_pic": self.img_url(f"{self.image_host}/{v}.jpg"), "vod_remarks": ""}
for v in re.findall(r'\[]\(/vid/(\d+)\.html\)', html)]
last = max([int(m.group(1)) for a in soup.select("a[href*='list/']") if (m := re.search(r"/list/\d+/(\d+)\.html", a.get("href", "")))], default=int(pg))
return {"list": videos, "page": pg, "pagecount": max(last, 1), "limit": 30, "total": 99999}
def searchContent(self, key, quick, pg="1"):
url = f"{self.host}/so.html"
params = {"wd": key}
if int(pg) > 1:
params["page"] = pg
html = ""
for method in [requests.get, requests.post]:
try:
r = method(url, params=params if method == requests.get else None,
data=params if method == requests.post else None,
headers=self.headers, timeout=15)
r.raise_for_status()
r.encoding = "utf-8"
html = r.text
break
except:
continue
if not html:
return {"list": []}
soup = BeautifulSoup(html, "html.parser")
videos = [v for v in (self.parse(el) for el in soup.select(".vodbox, .stui-vodlist__box, .vodlist__box, .video-card, .item")) if v]
if not videos:
videos = [{"vod_id": v, "vod_name": "未知标题", "vod_pic": self.img_url(f"{self.image_host}/{v}.jpg"), "vod_remarks": ""}
for v in re.findall(r'\[]\(/vid/(\d+)\.html\)', html)]
last = max([int(m.group(1)) for a in soup.select("a[href*='so.html'], .pagination a, .page-link")
if (m := re.search(r"[?&]page=(\d+)", a.get("href", "")))], default=int(pg))
return {"list": videos, "page": pg, "pagecount": max(last, 1), "limit": 30, "total": 99999}
def detailContent(self, ids):
vid = ids[0]
if not (html := self.get(f"{self.host}/vid/{vid}.html")):
return {"list": []}
soup = BeautifulSoup(html, "html.parser")
# 标题
title = self.get_title(vid)
if not title:
if t := soup.find('title'):
title = re.sub(r'\s*[-_|]\s*.{0,20}$', '', t.get_text(strip=True)).strip()
if not title or len(title) < 5:
for sel in ['h1', 'h2', '.video-title', '.title']:
if (el := soup.select_one(sel)) and (txt := el.get_text(strip=True)) and len(txt) > 5:
title = txt
break
title = title or f"视频{vid}"
# 图片
pic = ""
for sel in ['.picbox img', '.vodimg img', '.video-pic img', '.poster img', 'img[data-id]']:
if (node := soup.select_one(sel)) and (p := node.get("data-src") or node.get("src")) and 'favicon' not in p.lower():
pic = p
break
if not pic or 'favicon' in pic.lower():
if meta := soup.select_one('meta[property="og:image"]'):
pic = meta.get('content', '')
pic = pic or f"{self.image_host}/{vid}.jpg"
# 简介
desc = soup.select_one(".vodinfo, .video-info, .content, .intro, .description")
desc = desc.get_text(strip=True) if desc else ""
return {"list": [{
"vod_id": vid,
"vod_name": title,
"vod_pic": self.img_url(pic),
"vod_content": desc,
"vod_play_from": "七哥比较瑟",
"vod_play_url": f"狗哥特别瑟${vid}@@0@@1",
}]}
def playerContent(self, flag, id, vipFlags):
vid = id.split("@@")[0]
return {"parse": 0, "url": f"{self.video_host}/{vid}/hls/index.m3u8", "header": self.headers}
def localProxy(self, param):
return {"code": 404, "content": ""}
def isVideoFormat(self, url):
return ".m3u8" in url.lower()
def manualVideoCheck(self):
pass
def destroy(self):
pass