You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Ru/c/PY1/tube8.py

223 lines
9.6 KiB

# coding=utf-8
# !/usr/bin/python
import re
import sys
from pyquery import PyQuery as pq
from requests import Session
sys.path.append('..')
from base.spider import Spider
class Spider(Spider):
def init(self, extend=""):
self.host = "https://www.tube8.com"
self.headers = {
'User-Agent': 'Mozilla/5.0 (Linux; Android 10; SM-G960F Build/QP1A.190711.020; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/133.0.6943.98 Mobile Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Referer': self.host + '/',
}
self.session = Session()
self.session.headers.update(self.headers)
self.cat_map = {
'amateur': '业余自拍', 'anal': '肛交', 'asian': '亚裔', 'bigdick': '大屌',
'blowjob': '口交', 'creampie': '内射', 'ebony': '黑人', 'fetish': '恋物',
'gay': '同性', 'hardcore': '硬核', 'hentai': '动漫', 'latina': '拉丁',
'lesbian': '女同', 'mature': '熟女', 'milf': '人妻', 'public': '公共场所',
'teens': '少女(18+)', 'threesome': '3P/多P', 'vr-porn': 'VR视频',
'big-tits': '巨乳', 'blonde': '金发', 'facial': '颜射', 'gangbang': '群交',
'handjob': '手淫', 'massage': '按摩', 'masturbation': '自慰', 'pov': '第一人称',
'roleplay': '角色扮演', 'squirt': '潮吹', 'stockings': '丝袜', 'student': '学生',
'babe': '美女', 'bbw': '肥满', 'brunette': '黑发', 'bukkake': '颜射派对',
'cartoon': '卡通', 'casting': '试镜', 'compilation': '合集', 'cosplay': 'Cosplay',
'couple': '情侣', 'cumshot': '射精', 'double-penetration': '双洞',
'euro': '欧洲', 'funny': '搞笑', 'group-sex': '群交', 'homemade': '自制',
'indian': '印度', 'interracial': '跨种族', 'japanese': '日本', 'outdoor': '户外',
'reality': '真人秀', 'redhead': '红发', 'school': '学校', 'step-fantasy': '继亲幻想',
'striptease': '脱衣舞', 'toys': '玩具', 'vintage': '经典', 'webcam': '网络摄像头',
'china': '中国', 'korean': '韩国', 'thai': '泰国', 'filipina': '菲律宾',
'vietnamese': '越南', 'taiwanese': '台湾', 'hong-kong': '香港',
'russian': '俄罗斯', 'german': '德国', 'french': '法国', 'british': '英国',
'american': '美国', 'italian': '意大利', 'brazilian': '巴西', 'czech': '捷克'
}
def homeContent(self, filter):
classes = [
{'type_name': '最新视频', 'type_id': '/newest'},
{'type_name': '类别大全', 'type_id': '/categories'},
{'type_name': '全部频道', 'type_id': '/channels'},
{'type_name': '全部明星', 'type_id': '/pornstars'}
]
sort_filter = {'key': 'sort', 'name': '排序', 'value': [
{'n': '最新', 'v': ''},
{'n': '最热', 'v': '/most-viewed'},
{'n': '评分', 'v': '/best'},
{'n': '最长', 'v': '/longest'}
]}
filter_config = {}
for item in classes:
filter_config[item['type_id']] = [sort_filter]
return {'class': classes, 'filters': filter_config}
def homeVideoContent(self):
return self.categoryContent('/newest', 1, None, None)
def categoryContent(self, tid, pg, filter, extend):
if extend is None: extend = {}
vdata = []
if tid == '/categories':
data = self.getpq('/categories.html')
for i in data('a[href*="/cat/"]').items():
href = i.attr('href')
if not href or 'porn-video' in href: continue
cat_key = href.strip('/').split('/')[-1]
raw_name = i.text().replace('Category', '').strip()
name = self.cat_map.get(cat_key, raw_name)
vdata.append(self._make_folder(href, name, '', 1.33))
elif tid == '/channels':
url = f"/channels/page/{pg}/" if str(pg) != "1" else "/channels"
data = self.getpq(url)
for i in data('a[href*="/channel/"]').items():
href = i.attr('href')
if not href or 'porn-video' in href: continue
name = i.find('.channel-name').text() or i.find('.title').text() or i.text()
pic = i.find('img').attr('src') or i.find('img').attr('data-src') or ''
vdata.append(self._make_folder(href, name, pic, 1.0))
elif tid == '/pornstars':
url = f"/pornstars/page/{pg}/" if str(pg) != "1" else "/pornstars"
data = self.getpq(url)
for i in data('a[href*="/pornstar/"]').items():
href = i.attr('href')
if not href or 'porn-video' in href: continue
name = i.find('.pornstar-name').text() or i.text()
pic = i.find('img').attr('src') or i.find('img').attr('data-src') or ''
vdata.append(self._make_folder(href, name, pic, 0.75))
else:
real_tid = tid.replace('folder_', '').rstrip('/')
sort_suffix = extend.get('sort', '')
if sort_suffix and sort_suffix not in real_tid:
real_tid += sort_suffix
full_url_check = self._fix_url(real_tid)
is_entity_page = '/pornstar/' in full_url_check or '/channel/' in full_url_check
if str(pg) == "1":
url = real_tid
else:
if is_entity_page:
url = f"{real_tid}/?page={pg}"
else:
url = f"{real_tid}/page/{pg}/"
data = self.getpq(self._fix_url(url))
vdata = self.getlist(data)
return {'page': pg, 'pagecount': 9999, 'limit': 20, 'total': 999999, 'list': vdata}
def detailContent(self, ids):
page_url = ids[0]
try:
response = self.session.get(page_url)
data = response.text
except Exception:
return {'list': []}
if response.status_code == 404 or \
any(x in data for x in ["has been flagged", "Disabled Video", "Removed Video", "Inactive Video"]):
return {'list': [{'vod_name': '视频已删除', 'vod_play_url': ''}]}
d = pq(data)
title = d('meta[property="og:title"]').attr('content') or "Unknown"
cover = d('meta[property="og:image"]').attr('content')
video_urls = []
try:
match = re.search(r'"format":"mp4",.*?"videoUrl":"([^"]+)"', data)
if match:
json_url = match.group(1).replace("\\/", "/")
json_res = self.session.get(json_url).json()
valid_urls = []
q_map = {'4K': 2160, '2160': 2160, '1440': 1440, '1080': 1080, '720': 720, '480': 480, '240': 240}
for elem in json_res:
quality = str(elem.get('quality', 'SD')).upper()
if not quality.endswith('P'): quality += 'P'
q_val = 0
for k, v in q_map.items():
if k in quality:
q_val = v
break
if elem.get('videoUrl'):
valid_urls.append({'q': quality, 'v': q_val, 'url': elem['videoUrl']})
valid_urls.sort(key=lambda x: x['v'], reverse=True)
video_urls = [f"{i['q']}${i['url']}" for i in valid_urls]
else:
video_urls.append("解析失败$ERROR")
except Exception as e:
video_urls.append(f"解析异常${str(e)}")
return {'list': [{'vod_name': title, 'vod_pic': cover, 'vod_play_from': 'Tube8', 'vod_play_url': '#'.join(video_urls)}]}
def searchContent(self, key, quick, pg="1"):
url = f'/searches.html?q={key}&page={pg}'
return {'list': self.getlist(self.getpq(url)), 'page': pg}
def playerContent(self, flag, id, vipFlags):
return {'parse': 0, 'url': id, 'header': self.headers}
def getlist(self, data):
vlist = []
for i in data('.video-box, .videoblock').items():
link = i('a.tmvideolink') or i('a')
href = link.attr('href')
if not href or 'porn-video' not in href: continue
title = i('.video-title-text').text() or link.attr('title') or i('img').attr('alt')
pic = i('img.thumb-image').attr('data-src') or i('img').attr('src')
duration = i('.video-duration').text()
vlist.append({
'vod_id': self._fix_url(href),
'vod_name': title,
'vod_pic': pic or '',
'vod_remarks': duration,
'style': {'ratio': 1.77, 'type': 'rect'}
})
return vlist
def getpq(self, url):
url = self._fix_url(url)
try:
return pq(self.session.get(url).text)
except Exception:
return pq("<html></html>")
def _fix_url(self, url):
if not url.startswith('http'):
return self.host + ('' if url.startswith('/') else '/') + url
return url
def _make_folder(self, href, name, pic, ratio):
return {
'vod_id': f"folder_{href}",
'vod_name': name.strip(),
'vod_pic': pic,
'vod_tag': 'folder',
'style': {'ratio': ratio, 'type': 'rect'}
}