Ru/c/PY1/神秘影院.py

# -*- coding: utf-8 -*-
#恰逢
import re
import urllib.parse
from base.spider import Spider as BaseSpile
import requests
from bs4 import BeautifulSoup


class VideoDecryptor:
    """XOR 128 解密"""
    @staticmethod
    def decrypt(text: str) -> str:
        if not text:
            return ""
        try:
            return ''.join(chr(128 ^ ord(c)) for c in text)
        except:
            return text

    @staticmethod
    def from_js(js: str) -> str:
        return VideoDecryptor.decrypt(m.group(1)) if (m := re.search(r"document\.write\(l\('([^']+)'\)\)", js)) else ""


class Spider(BaseSpile):

    def init(self, extend=""):
        self.host = "https://h4ivs.sm431.vip"
        self.video_host = "https://m3u8.nl:88"
        self.image_host = "https://3334.nl:33"
        self.headers = {
            "User-Agent": "Mozilla/5.0 (Linux; Android 13; 22127RK46C Build/TKQ1.220905.001; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/104.0.5112.97 Mobile Safari/537.36",
            "Referer": self.host,
            "Accept-Language": "zh-CN,zh;q=0.9",
        }
        self.cache = {}

    def get(self, url):
        try:
            r = requests.get(url, headers=self.headers, timeout=15)
            r.raise_for_status()
            r.encoding = "utf-8"
            return r.text
        except:
            return ""

    def img_url(self, url):
        """格式化图片URL"""
        if not url:
            return ""
        if url.startswith("//"):
            url = "https:" + url
        elif url.startswith("/"):
            url = self.image_host + url
        return f"{url}@User-Agent={self.headers['User-Agent']}@Referer={self.host}/"

    def parse(self, el):
        """解析卡片"""
        a = el if el.name == 'a' else el.find('a')
        if not a or not (href := a.get("href", "")):
            return None

        href = self.host + href if href.startswith("/") else href
        if not (vid := re.search(r"/vid/(\d+)", href)):
            return None

        vid = vid.group(1)
        title = ""

        # 解密标题
        if p := el.find('p'):
            if s := p.find('script'):
                if s.string:
                    title = VideoDecryptor.from_js(s.string)
            title = title or p.get_text(strip=True)

        if not title:
            for attr in ['data-title', 'data-name', 'title']:
                if el.has_attr(attr) and (val := el[attr]):
                    if (de := VideoDecryptor.decrypt(val)) and len(de) > 3:
                        title = de
                        break

        title = title or "未知标题"
        if title != "未知标题":
            self.cache[vid] = title

        # 图片
        img = ""
        if node := el.select_one("img"):
            img = node.get("data-src") or node.get("src") or ""
        img = img or f"{self.image_host}/{vid}.jpg"

        return {
            "vod_id": vid,
            "vod_name": title,
            "vod_pic": self.img_url(img),
            "vod_remarks": "",
        }

    def get_title(self, vid):
        """从缓存或首页获取标题"""
        if vid in self.cache:
            return self.cache[vid]

        if html := self.get(self.host):
            soup = BeautifulSoup(html, "html.parser")
            for link in soup.select('a[href*="/vid/"]'):
                if f'/vid/{vid}' in link.get('href', ''):
                    if p := link.find('p'):
                        if s := p.find('script'):
                            if s.string and (t := VideoDecryptor.from_js(s.string)):
                                self.cache[vid] = t
                                return t
                        if t := p.get_text(strip=True):
                            self.cache[vid] = t
                            return t
        return None

    def homeContent(self, filter):
        return {
            "class": [
                {"type_name": "国产", "type_id": "1"},
                {"type_name": "日本", "type_id": "2"},
                {"type_name": "韩国", "type_id": "3"},
                {"type_name": "欧美", "type_id": "4"},
                {"type_name": "三级", "type_id": "5"},
                {"type_name": "动漫", "type_id": "6"},
            ]
        }

    def homeVideoContent(self):
        if not (html := self.get(self.host)):
            return {"list": []}

        soup = BeautifulSoup(html, "html.parser")
        videos = [v for v in (self.parse(el) for el in soup.select(".vodbox, .stui-vodlist__box, .vodlist__box, .video-card, .item")) if v]

        if not videos:
            videos = [{"vod_id": v, "vod_name": "未知标题", "vod_pic": self.img_url(f"{self.image_host}/{v}.jpg"), "vod_remarks": ""}
                     for v in re.findall(r'\[]\(/vid/(\d+)\.html\)', html)]

        return {"list": videos}

    def categoryContent(self, tid, pg, filter, extend):
        if tid == "0":
            url = self.host if int(pg) == 1 else f"{self.host}/page/{pg}.html"
        else:
            url = f"{self.host}/list/{tid}.html" if int(pg) == 1 else f"{self.host}/list/{tid}/{pg}.html"

        if not (html := self.get(url)):
            return {"list": [], "page": pg, "pagecount": 1, "limit": 30, "total": 0}

        soup = BeautifulSoup(html, "html.parser")
        videos = [v for v in (self.parse(el) for el in soup.select(".vodbox, .stui-vodlist__box, .vodlist__box, .video-card, .item")) if v]

        if not videos:
            videos = [{"vod_id": v, "vod_name": "未知标题", "vod_pic": self.img_url(f"{self.image_host}/{v}.jpg"), "vod_remarks": ""}
                     for v in re.findall(r'\[]\(/vid/(\d+)\.html\)', html)]

        last = max([int(m.group(1)) for a in soup.select("a[href*='list/']") if (m := re.search(r"/list/\d+/(\d+)\.html", a.get("href", "")))], default=int(pg))

        return {"list": videos, "page": pg, "pagecount": max(last, 1), "limit": 30, "total": 99999}

    def searchContent(self, key, quick, pg="1"):
        url = f"{self.host}/so.html"
        params = {"wd": key}
        if int(pg) > 1:
            params["page"] = pg

        html = ""
        for method in [requests.get, requests.post]:
            try:
                r = method(url, params=params if method == requests.get else None,
                          data=params if method == requests.post else None,
                          headers=self.headers, timeout=15)
                r.raise_for_status()
                r.encoding = "utf-8"
                html = r.text
                break
            except:
                continue

        if not html:
            return {"list": []}

        soup = BeautifulSoup(html, "html.parser")
        videos = [v for v in (self.parse(el) for el in soup.select(".vodbox, .stui-vodlist__box, .vodlist__box, .video-card, .item")) if v]

        if not videos:
            videos = [{"vod_id": v, "vod_name": "未知标题", "vod_pic": self.img_url(f"{self.image_host}/{v}.jpg"), "vod_remarks": ""}
                     for v in re.findall(r'\[]\(/vid/(\d+)\.html\)', html)]

        last = max([int(m.group(1)) for a in soup.select("a[href*='so.html'], .pagination a, .page-link")
                   if (m := re.search(r"[?&]page=(\d+)", a.get("href", "")))], default=int(pg))

        return {"list": videos, "page": pg, "pagecount": max(last, 1), "limit": 30, "total": 99999}

    def detailContent(self, ids):
        vid = ids[0]
        if not (html := self.get(f"{self.host}/vid/{vid}.html")):
            return {"list": []}

        soup = BeautifulSoup(html, "html.parser")

        # 标题
        title = self.get_title(vid)
        if not title:
            if t := soup.find('title'):
                title = re.sub(r'\s*[-_|]\s*.{0,20}$', '', t.get_text(strip=True)).strip()

            if not title or len(title) < 5:
                for sel in ['h1', 'h2', '.video-title', '.title']:
                    if (el := soup.select_one(sel)) and (txt := el.get_text(strip=True)) and len(txt) > 5:
                        title = txt
                        break

        title = title or f"视频{vid}"

        # 图片
        pic = ""
        for sel in ['.picbox img', '.vodimg img', '.video-pic img', '.poster img', 'img[data-id]']:
            if (node := soup.select_one(sel)) and (p := node.get("data-src") or node.get("src")) and 'favicon' not in p.lower():
                pic = p
                break

        if not pic or 'favicon' in pic.lower():
            if meta := soup.select_one('meta[property="og:image"]'):
                pic = meta.get('content', '')

        pic = pic or f"{self.image_host}/{vid}.jpg"

        # 简介
        desc = soup.select_one(".vodinfo, .video-info, .content, .intro, .description")
        desc = desc.get_text(strip=True) if desc else ""

        return {"list": [{
            "vod_id": vid,
            "vod_name": title,
            "vod_pic": self.img_url(pic),
            "vod_content": desc,
            "vod_play_from": "七哥比较瑟",
            "vod_play_url": f"狗哥特别瑟${vid}@@0@@1",
        }]}

    def playerContent(self, flag, id, vipFlags):
        vid = id.split("@@")[0]
        return {"parse": 0, "url": f"{self.video_host}/{vid}/hls/index.m3u8", "header": self.headers}

    def localProxy(self, param):
        return {"code": 404, "content": ""}

    def isVideoFormat(self, url):
        return ".m3u8" in url.lower()

    def manualVideoCheck(self):
        pass

    def destroy(self):
        pass