Remove xpath

This commit is contained in:
jhengazuki 2025-08-05 13:44:03 +08:00
parent 37c2826312
commit d71a2bd83e
8 changed files with 1 additions and 1445 deletions

View File

@ -46,7 +46,6 @@ dependencies {
implementation 'com.github.thegrizzlylabs:sardine-android:0.9'
implementation 'wang.harlon.quickjs:wrapper-android:3.2.3'
implementation 'com.google.code.gson:gson:2.13.1'
implementation 'cn.wanghaomiao:JsoupXpath:2.5.3'
implementation 'com.hierynomus:smbj:0.14.0'
implementation 'com.orhanobut:logger:2.2.0'
implementation 'org.jsoup:jsoup:1.21.1'

View File

@ -1,695 +0,0 @@
package com.github.catvod.bean.xpath;
import com.github.catvod.crawler.SpiderDebug;
import org.json.JSONObject;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Rule {
/**
* user-agent
*/
private String ua;
/**
* 取得分類和首頁推薦的Url
*/
private String homeUrl;
/**
* 分類節點 xpath
*/
private String cateNode;
/**
* 分類節點名 xpath
*/
private String cateName;
/**
* 正則對取到的數據進行二次處理
*/
private Pattern cateNameR;
/**
* 分類節點 id xpath
*/
private String cateId;
/**
* 正則對取到的數據進行二次處理
*/
private Pattern cateIdR;
/**
* 手動指定分類如果有則不從 homeUrl 中獲取分類
*/
private final LinkedHashMap<String, String> cateManual = new LinkedHashMap<>();
/**
* 篩選
*/
private JSONObject filter;
/**
* 更新推薦影片節點 xpath
*/
private String homeVodNode;
/**
* 更新推薦影片名稱 xpath
*/
private String homeVodName;
/**
* 正則對取到的數據進行二次處理
*/
private Pattern homeVodNameR;
/**
* 更新推薦影片 id xpath
*/
private String homeVodId;
/**
* 正則對取到的數據進行二次處理
*/
private Pattern homeVodIdR;
/**
* 更新推薦影片圖片 xpath
*/
private String homeVodImg;
/**
* 正則對取到的數據進行二次處理
*/
private Pattern homeVodImgR;
/**
* 更新推薦影片簡介 xpath
*/
private String homeVodMark;
/**
* 正則對取到的數據進行二次處理
*/
private Pattern homeVodMarkR;
/**
* 分類頁地址
*/
private String cateUrl;
/**
* 分類頁影片節點 xpath
*/
private String cateVodNode;
/**
* 分類頁影片名稱 xpath
*/
private String cateVodName;
/**
* 正則對取到的數據進行二次處理
*/
private Pattern cateVodNameR;
/**
* 分類頁影片影片id xpath
*/
private String cateVodId;
/**
* 正則對取到的數據進行二次處理
*/
private Pattern cateVodIdR;
/**
* 分類頁影片影片圖片 xpath
*/
private String cateVodImg;
/**
* 正則對取到的數據進行二次處理
*/
private Pattern cateVodImgR;
/**
* 分類頁影片影片簡介 xpath
*/
private String cateVodMark;
/**
* 正則對取到的數據進行二次處理
*/
private Pattern cateVodMarkR;
/**
* 詳情頁面
*/
private String dtUrl;
/**
* 詳情節點 xpath
*/
private String dtNode;
/**
* 詳情影片 xpath
*/
private String dtName;
/**
* 正則對取到的數據進行二次處理
*/
private Pattern dtNameR;
/**
* 詳情影片圖片 xpath
*/
private String dtImg;
/**
* 正則對取到的數據進行二次處理
*/
private Pattern dtImgR;
/**
* 詳情影片分類 xpath
*/
private String dtCate;
/**
* 正則對取到的數據進行二次處理
*/
private Pattern dtCateR;
/**
* 詳情影片年份 xpath
*/
private String dtYear;
/**
* 正則對取到的數據進行二次處理
*/
private Pattern dtYearR;
/**
* 詳情影片地區 xpath
*/
private String dtArea;
/**
* 正則對取到的數據進行二次處理
*/
private Pattern dtAreaR;
/**
* 詳情影片簡介 xpath
*/
private String dtMark;
/**
* 正則對取到的數據進行二次處理
*/
private Pattern dtMarkR;
/**
* 詳情演員 xpath
*/
private String dtActor;
/**
* 正則對取到的數據進行二次處理
*/
private Pattern dtActorR;
/**
* 詳情導演 xpath
*/
private String dtDirector;
/**
* 正則對取到的數據進行二次處理
*/
private Pattern dtDirectorR;
/**
* 詳情說明 xpath
*/
private String dtDesc;
/**
* 正則對取到的數據進行二次處理
*/
private Pattern dtDescR;
/**
* 詳情播放來源節點
*/
private String dtFromNode;
/**
* 詳情播放來源名稱 xpath
*/
private String dtFromName;
/**
* 詳情
*/
private Pattern dtFromNameR;
/**
* 詳情播放地址列表節點 xpath
*/
private String dtUrlNode;
/**
* 詳情播放地址節點 xpath
*/
private String dtUrlSubNode;
/**
* 詳情播放地址id xpath
*/
private String dtUrlId;
/**
* 詳情
*/
private Pattern dtUrlIdR;
/**
* 詳情播放地址名稱 xpath
*/
private String dtUrlName;
/**
* 詳情
*/
private Pattern dtUrlNameR;
/**
* 播放頁面url
*/
private String playUrl;
/**
* 播放解析調用ua
*/
private String playUa;
/**
* 播放解析調用referer
*/
private String playReferer;
/**
* 搜尋頁地址
*/
private String searchUrl;
/**
* 搜尋頁影片節點 xpath
*/
private String scVodNode;
/**
* 搜尋頁影片名稱 xpath
*/
private String scVodName;
/**
* 正則對取到的數據進行二次處理
*/
private Pattern scVodNameR;
/**
* 搜尋頁影片id xpath
*/
private String scVodId;
/**
* 正則對取到的數據進行二次處理
*/
private Pattern scVodIdR;
/**
* 搜尋頁影片圖片 xpath
*/
private String scVodImg;
/**
* 正則對取到的數據進行二次處理
*/
private Pattern scVodImgR;
/**
* 搜尋頁影片簡介 xpath
*/
private String scVodMark;
/**
* 正則對取到的數據進行二次處理
*/
private Pattern scVodMarkR;
private static Pattern getPattern(JSONObject json, String key) {
String v = json.optString(key).trim();
if (v.isEmpty())
return null;
else {
try {
return Pattern.compile(v);
} catch (Exception e) {
SpiderDebug.log(e);
}
}
return null;
}
private static String doReplaceRegex(Pattern pattern, String src) {
if (pattern == null)
return src;
try {
Matcher matcher = pattern.matcher(src);
if (matcher.find()) {
return matcher.group(1).trim();
}
} catch (Exception e) {
SpiderDebug.log(e);
}
return src;
}
public static Rule fromJson(String json) {
try {
JSONObject jsonObj = new JSONObject(json);
Rule rule = new Rule();
rule.ua = jsonObj.optString("ua");
rule.homeUrl = jsonObj.optString("homeUrl").trim();
rule.cateNode = jsonObj.optString("cateNode").trim();
rule.cateName = jsonObj.optString("cateName").trim();
rule.cateNameR = getPattern(jsonObj, "cateNameR");
rule.cateId = jsonObj.optString("cateId").trim();
rule.cateIdR = getPattern(jsonObj, "cateIdR");
JSONObject navs = jsonObj.optJSONObject("cateManual");
if (navs != null) {
Iterator<String> keys = navs.keys();
while (keys.hasNext()) {
String name = keys.next();
rule.cateManual.put(name.trim(), navs.getString(name).trim());
}
}
rule.filter = jsonObj.optJSONObject("filter");
rule.homeVodNode = jsonObj.optString("homeVodNode").trim();
rule.homeVodName = jsonObj.optString("homeVodName").trim();
rule.homeVodNameR = getPattern(jsonObj, "homeVodNameR");
rule.homeVodId = jsonObj.optString("homeVodId").trim();
rule.homeVodIdR = getPattern(jsonObj, "homeVodIdR");
rule.homeVodImg = jsonObj.optString("homeVodImg").trim();
rule.homeVodImgR = getPattern(jsonObj, "homeVodImgR");
rule.homeVodMark = jsonObj.optString("homeVodMark").trim();
rule.homeVodMarkR = getPattern(jsonObj, "homeVodMarkR");
rule.cateUrl = jsonObj.optString("cateUrl").trim();
rule.cateVodNode = jsonObj.optString("cateVodNode").trim();
rule.cateVodName = jsonObj.optString("cateVodName").trim();
rule.cateVodNameR = getPattern(jsonObj, "cateVodNameR");
rule.cateVodId = jsonObj.optString("cateVodId").trim();
rule.cateVodIdR = getPattern(jsonObj, "cateVodIdR");
rule.cateVodImg = jsonObj.optString("cateVodImg").trim();
rule.cateVodImgR = getPattern(jsonObj, "cateVodImgR");
rule.cateVodMark = jsonObj.optString("cateVodMark").trim();
rule.cateVodMarkR = getPattern(jsonObj, "cateVodMarkR");
rule.dtUrl = jsonObj.optString("dtUrl");
rule.dtNode = jsonObj.optString("dtNode");
rule.dtName = jsonObj.optString("dtName");
rule.dtNameR = getPattern(jsonObj, "dtNameR");
rule.dtImg = jsonObj.optString("dtImg");
rule.dtImgR = getPattern(jsonObj, "dtImgR");
rule.dtCate = jsonObj.optString("dtCate");
rule.dtCateR = getPattern(jsonObj, "dtCateR");
rule.dtYear = jsonObj.optString("dtYear");
rule.dtYearR = getPattern(jsonObj, "dtYearR");
rule.dtArea = jsonObj.optString("dtArea");
rule.dtAreaR = getPattern(jsonObj, "dtAreaR");
rule.dtMark = jsonObj.optString("dtMark");
rule.dtMarkR = getPattern(jsonObj, "dtMarkR");
rule.dtActor = jsonObj.optString("dtActor");
rule.dtActorR = getPattern(jsonObj, "dtActorR");
rule.dtDirector = jsonObj.optString("dtDirector");
rule.dtDirectorR = getPattern(jsonObj, "dtDirectorR");
rule.dtDesc = jsonObj.optString("dtDesc");
rule.dtDescR = getPattern(jsonObj, "dtDescR");
rule.dtFromNode = jsonObj.optString("dtFromNode");
rule.dtFromName = jsonObj.optString("dtFromName");
rule.dtFromNameR = getPattern(jsonObj, "dtFromNameR");
rule.dtUrlNode = jsonObj.optString("dtUrlNode");
rule.dtUrlSubNode = jsonObj.optString("dtUrlSubNode");
rule.dtUrlId = jsonObj.optString("dtUrlId");
rule.dtUrlIdR = getPattern(jsonObj, "dtUrlIdR");
rule.dtUrlName = jsonObj.optString("dtUrlName");
rule.dtUrlNameR = getPattern(jsonObj, "dtUrlNameR");
rule.playUrl = jsonObj.optString("playUrl");
rule.playUa = jsonObj.optString("playUa");
rule.playReferer = jsonObj.optString("playReferer");
rule.searchUrl = jsonObj.optString("searchUrl");
rule.scVodNode = jsonObj.optString("scVodNode").trim();
rule.scVodName = jsonObj.optString("scVodName").trim();
rule.scVodNameR = getPattern(jsonObj, "scVodNameR");
rule.scVodId = jsonObj.optString("scVodId").trim();
rule.scVodIdR = getPattern(jsonObj, "scVodIdR");
rule.scVodImg = jsonObj.optString("scVodImg").trim();
rule.scVodImgR = getPattern(jsonObj, "scVodImgR");
rule.scVodMark = jsonObj.optString("scVodMark").trim();
rule.scVodMarkR = getPattern(jsonObj, "scVodMarkR");
return rule;
} catch (Exception e) {
SpiderDebug.log(e);
}
return null;
}
public String getUa() {
return ua;
}
public String getHomeUrl() {
return homeUrl;
}
public String getCateNode() {
return cateNode;
}
public String getCateName() {
return cateName;
}
public String getCateNameR(String src) {
return doReplaceRegex(cateNameR, src);
}
public String getCateId() {
return cateId;
}
public String getCateIdR(String src) {
return doReplaceRegex(cateIdR, src);
}
public LinkedHashMap<String, String> getCateManual() {
return cateManual;
}
public JSONObject getFilter() {
return filter;
}
public String getHomeVodNode() {
return homeVodNode;
}
public String getHomeVodName() {
return homeVodName;
}
public String getHomeVodNameR(String src) {
return doReplaceRegex(homeVodNameR, src);
}
public String getHomeVodId() {
return homeVodId;
}
public String getHomeVodIdR(String src) {
return doReplaceRegex(homeVodIdR, src);
}
public String getHomeVodImg() {
return homeVodImg;
}
public String getHomeVodImgR(String src) {
return doReplaceRegex(homeVodImgR, src);
}
public String getHomeVodMark() {
return homeVodMark;
}
public String getHomeVodMarkR(String src) {
return doReplaceRegex(homeVodMarkR, src);
}
public String getCateUrl() {
return cateUrl;
}
public String getCateVodNode() {
return cateVodNode;
}
public String getCateVodName() {
return cateVodName;
}
public String getCateVodNameR(String src) {
return doReplaceRegex(cateVodNameR, src);
}
public String getCateVodId() {
return cateVodId;
}
public String getCateVodIdR(String src) {
return doReplaceRegex(cateVodIdR, src);
}
public String getCateVodImg() {
return cateVodImg;
}
public String getCateVodImgR(String src) {
return doReplaceRegex(cateVodImgR, src);
}
public String getCateVodMark() {
return cateVodMark;
}
public String getCateVodMarkR(String src) {
return doReplaceRegex(cateVodNameR, src);
}
public String getDetailUrl() {
return dtUrl;
}
public String getDetailNode() {
return dtNode;
}
public String getDetailName() {
return dtName;
}
public String getDetailNameR(String src) {
return doReplaceRegex(dtNameR, src);
}
public String getDetailImg() {
return dtImg;
}
public String getDetailImgR(String src) {
return doReplaceRegex(dtImgR, src);
}
public String getDetailCate() {
return dtCate;
}
public String getDetailCateR(String src) {
return doReplaceRegex(dtCateR, src);
}
public String getDetailYear() {
return dtYear;
}
public String getDetailYearR(String src) {
return doReplaceRegex(dtYearR, src);
}
public String getDetailArea() {
return dtArea;
}
public String getDetailAreaR(String src) {
return doReplaceRegex(dtAreaR, src);
}
public String getDetailMark() {
return dtMark;
}
public String getDetailMarkR(String src) {
return doReplaceRegex(dtMarkR, src);
}
public String getDetailActor() {
return dtActor;
}
public String getDetailActorR(String src) {
return doReplaceRegex(dtActorR, src);
}
public String getDetailDirector() {
return dtDirector;
}
public String getDetailDirectorR(String src) {
return doReplaceRegex(dtDirectorR, src);
}
public String getDetailDesc() {
return dtDesc;
}
public String getDetailDescR(String src) {
return doReplaceRegex(dtDescR, src);
}
public String getDetailFromNode() {
return dtFromNode;
}
public String getDetailFromName() {
return dtFromName;
}
public String getDetailFromNameR(String src) {
return doReplaceRegex(dtFromNameR, src);
}
public String getDetailUrlNode() {
return dtUrlNode;
}
public String getDetailUrlSubNode() {
return dtUrlSubNode;
}
public String getDetailUrlId() {
return dtUrlId;
}
public String getDetailUrlIdR(String src) {
return doReplaceRegex(dtUrlIdR, src);
}
public String getDetailUrlName() {
return dtUrlName;
}
public String getDetailUrlNameR(String src) {
return doReplaceRegex(dtUrlNameR, src);
}
public String getPlayUrl() {
return playUrl;
}
public String getPlayUa() {
return playUa;
}
public String getPlayReferer() {
return playReferer;
}
public String getSearchUrl() {
return searchUrl;
}
public String getSearchVodNode() {
return scVodNode;
}
public String getSearchVodName() {
return scVodName;
}
public String getSearchVodNameR(String src) {
return doReplaceRegex(scVodNameR, src);
}
public String getSearchVodId() {
return scVodId;
}
public String getSearchVodIdR(String src) {
return doReplaceRegex(scVodIdR, src);
}
public String getSearchVodImg() {
return scVodImg;
}
public String getSearchVodImgR(String src) {
return doReplaceRegex(scVodImgR, src);
}
public String getSearchVodMark() {
return scVodMark;
}
public String getSearchVodMarkR(String src) {
return doReplaceRegex(scVodMarkR, src);
}
}

View File

@ -1,138 +0,0 @@
package com.github.catvod.spider;
import com.github.catvod.bean.Class;
import com.github.catvod.bean.Filter;
import com.github.catvod.bean.Result;
import com.github.catvod.bean.Vod;
import com.github.catvod.crawler.Spider;
import com.github.catvod.net.OkHttp;
import com.github.catvod.utils.Util;
import org.json.JSONObject;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
public class Hanime extends Spider {
private static final String siteUrl = "https://hanime1.me";
private HashMap<String, String> getHeaders() {
HashMap<String, String> headers = new HashMap<>();
headers.put("User-Agent", Util.CHROME);
return headers;
}
private Filter getFilter(String name, String key, List<String> texts) {
List<Filter.Value> values = new ArrayList<>();
if (!key.equals("by")) values.add(new Filter.Value("全部", ""));
for (String text : texts) {
if (text.isEmpty()) continue;
values.add(new Filter.Value(text));
}
return new Filter(key, name, values);
}
@Override
public String homeContent(boolean filter) throws Exception {
List<Vod> list = new ArrayList<>();
List<Class> classes = new ArrayList<>();
LinkedHashMap<String, List<Filter>> filters = new LinkedHashMap<>();
Document doc1 = Jsoup.parse(OkHttp.string(siteUrl.concat("/search?genre=裏番"), getHeaders()));
List<String> sorts = doc1.select("div.hentai-sort-options-wrapper").eachText();
List<String> years = doc1.getElementById("year").select("option").eachAttr("value");
Document doc2 = Jsoup.parse(OkHttp.string(siteUrl, getHeaders()));
for (Element element : doc2.select("a.nav-item")) {
String text = element.text();
if (text.equals("新番預告") || text.equals("H漫畫")) continue;
classes.add(new Class(text));
List<Filter> array = new ArrayList<>();
array.add(getFilter("排序", "by", sorts));
array.add(getFilter("年份", "year", years));
filters.put(text, array);
}
for (Element element : doc2.select("a")) {
if (element.attr("href").contains("watch")) {
String pic = element.select("div > img").attr("src");
String url = element.attr("href");
String name = element.select("div > div").text();
String id = url.split("=")[1];
if (name.contains("smart_display") || name.isEmpty()) continue;
list.add(new Vod(id, name, pic));
}
}
return Result.string(classes, list, filters);
}
@Override
public String categoryContent(String tid, String pg, boolean filter, HashMap<String, String> extend) throws Exception {
List<Vod> list = new ArrayList<>();
if (extend.get("by") == null) extend.put("by", "最新上市");
if (extend.get("year") == null) extend.put("year", "");
String target = siteUrl.concat("/search?genre=").concat(tid).concat("&page=").concat(pg).concat("&sort=").concat(extend.get("by")).concat("&year=").concat(extend.get("year"));
Document doc = Jsoup.parse(OkHttp.string(target, getHeaders()));
for (Element element : doc.select("div.col-xs-6")) {
String pic = element.select("img").get(1).attr("src");
String url = element.select("a.overlay").attr("href");
String name = element.select("div.card-mobile-title").text();
String id = url.split("=")[1];
list.add(new Vod(id, name, pic));
}
if (list.isEmpty()) {
for (Element element : doc.select("a")) {
if (element.attr("href").contains("watch")) {
String pic = element.select("div > img").attr("src");
String url = element.attr("href");
String name = element.select("div > div").text();
String id = url.split("=")[1];
if (name.contains("smart_display")) continue;
list.add(new Vod(id, name, pic));
}
}
}
return Result.string(list);
}
@Override
public String detailContent(List<String> ids) throws Exception {
Document doc = Jsoup.parse(OkHttp.string(siteUrl.concat("/watch?v=").concat(ids.get(0)), getHeaders()));
String name = doc.getElementById("shareBtn-title").text();
JSONObject json = new JSONObject(doc.select("script[type=application/ld+json]").html().trim());
String content = json.optString("description");
String pic = json.optJSONArray("thumbnailUrl").optString(0);
String url = json.optString("contentUrl");;
Vod vod = new Vod();
vod.setVodId(ids.get(0));
vod.setVodPic(pic);
vod.setVodName(name);
vod.setVodContent(content);
vod.setVodPlayFrom("Hanime1");
vod.setVodPlayUrl("播放$" + url);
return Result.string(vod);
}
@Override
public String searchContent(String key, boolean quick) throws Exception {
List<Vod> list = new ArrayList<>();
String target = siteUrl.concat("/search?query=").concat(key).concat("&genre=&sort=&year=&month=&duration=");
Document doc = Jsoup.parse(OkHttp.string(target, getHeaders()));
for (Element element : doc.select("div.col-xs-6")) {
String pic = element.select("img").get(1).attr("src");
String url = element.select("a.overlay").attr("href");
String name = element.select("div.card-mobile-title").text();
String id = url.split("=")[1];
list.add(new Vod(id, name, pic));
}
return Result.string(list);
}
@Override
public String playerContent(String flag, String id, List<String> vipFlags) throws Exception {
return Result.get().url(id).header(getHeaders()).string();
}
}

View File

@ -1,345 +0,0 @@
package com.github.catvod.spider;
import android.content.Context;
import android.text.TextUtils;
import com.github.catvod.bean.Class;
import com.github.catvod.bean.Result;
import com.github.catvod.bean.Vod;
import com.github.catvod.bean.xpath.Rule;
import com.github.catvod.crawler.Spider;
import com.github.catvod.crawler.SpiderDebug;
import com.github.catvod.net.OkHttp;
import com.github.catvod.utils.Util;
import org.json.JSONArray;
import org.json.JSONObject;
import org.seimicrawler.xpath.JXDocument;
import org.seimicrawler.xpath.JXNode;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Set;
public class XPath extends Spider {
private HashMap<String, String> getHeaders() {
HashMap<String, String> headers = new HashMap<>();
headers.put("User-Agent", rule.getUa().isEmpty() ? Util.CHROME : rule.getUa());
return headers;
}
@Override
public void init(Context context, String extend) {
this.ext = extend;
}
@Override
public String homeContent(boolean filter) {
fetchRule();
List<Vod> list = new ArrayList<>();
List<Class> classes = new ArrayList<>();
if (!rule.getCateManual().isEmpty()) {
Set<String> keys = rule.getCateManual().keySet();
for (String k : keys) {
classes.add(new Class(rule.getCateManual().get(k), k));
}
}
String webUrl = rule.getHomeUrl();
JXDocument doc = JXDocument.create(fetch(webUrl));
if (rule.getCateManual().isEmpty()) {
List<JXNode> navNodes = doc.selN(rule.getCateNode());
for (int i = 0; i < navNodes.size(); i++) {
String name = navNodes.get(i).selOne(rule.getCateName()).asString().trim();
name = rule.getCateNameR(name);
String id = navNodes.get(i).selOne(rule.getCateId()).asString().trim();
id = rule.getCateIdR(id);
classes.add(new Class(id, name));
}
}
if (!rule.getHomeVodNode().isEmpty()) {
List<JXNode> vodNodes = doc.selN(rule.getHomeVodNode());
for (int i = 0; i < vodNodes.size(); i++) {
String name = vodNodes.get(i).selOne(rule.getHomeVodName()).asString().trim();
name = rule.getHomeVodNameR(name);
String id = vodNodes.get(i).selOne(rule.getHomeVodId()).asString().trim();
id = rule.getHomeVodIdR(id);
String pic = vodNodes.get(i).selOne(rule.getHomeVodImg()).asString().trim();
pic = rule.getHomeVodImgR(pic);
pic = Util.fixUrl(webUrl, pic);
String mark = "";
if (!rule.getHomeVodMark().isEmpty()) {
try {
mark = vodNodes.get(i).selOne(rule.getHomeVodMark()).asString().trim();
mark = rule.getHomeVodMarkR(mark);
} catch (Exception e) {
SpiderDebug.log(e);
}
}
list.add(new Vod(id, name, pic, mark));
}
}
return Result.string(classes, list, rule.getFilter());
}
protected String categoryUrl(String tid, String pg, boolean filter, HashMap<String, String> extend) {
return rule.getCateUrl().replace("{cateId}", tid).replace("{catePg}", pg);
}
@Override
public String categoryContent(String tid, String pg, boolean filter, HashMap<String, String> extend) {
fetchRule();
List<Vod> list = new ArrayList<>();
String webUrl = categoryUrl(tid, pg, filter, extend);
JXDocument doc = JXDocument.create(fetch(webUrl));
List<JXNode> vodNodes = doc.selN(rule.getCateVodNode());
for (int i = 0; i < vodNodes.size(); i++) {
String name = vodNodes.get(i).selOne(rule.getCateVodName()).asString().trim();
name = rule.getCateVodNameR(name);
String id = vodNodes.get(i).selOne(rule.getCateVodId()).asString().trim();
id = rule.getCateVodIdR(id);
String pic = vodNodes.get(i).selOne(rule.getCateVodImg()).asString().trim();
pic = rule.getCateVodImgR(pic);
pic = Util.fixUrl(webUrl, pic);
String mark = "";
if (!rule.getCateVodMark().isEmpty()) {
try {
mark = vodNodes.get(i).selOne(rule.getCateVodMark()).asString().trim();
mark = rule.getCateVodMarkR(mark);
} catch (Exception e) {
SpiderDebug.log(e);
}
}
list.add(new Vod(id, name, pic, mark));
}
return Result.string(list);
}
@Override
public String detailContent(List<String> ids) {
fetchRule();
String webUrl = rule.getDetailUrl().replace("{vid}", ids.get(0));
String webContent = fetch(webUrl);
JXDocument doc = JXDocument.create(webContent);
JXNode vodNode = doc.selNOne(rule.getDetailNode());
String cover = "", title = "", desc = "", category = "", area = "", year = "", remark = "", director = "", actor = "";
title = vodNode.selOne(rule.getDetailName()).asString().trim();
title = rule.getDetailNameR(title);
cover = vodNode.selOne(rule.getDetailImg()).asString().trim();
cover = rule.getDetailImgR(cover);
cover = Util.fixUrl(webUrl, cover);
if (!rule.getDetailCate().isEmpty()) {
try {
category = vodNode.selOne(rule.getDetailCate()).asString().trim();
category = rule.getDetailCateR(category);
} catch (Exception e) {
SpiderDebug.log(e);
}
}
if (!rule.getDetailYear().isEmpty()) {
try {
year = vodNode.selOne(rule.getDetailYear()).asString().trim();
year = rule.getDetailYearR(year);
} catch (Exception e) {
SpiderDebug.log(e);
}
}
if (!rule.getDetailArea().isEmpty()) {
try {
area = vodNode.selOne(rule.getDetailArea()).asString().trim();
area = rule.getDetailAreaR(area);
} catch (Exception e) {
SpiderDebug.log(e);
}
}
if (!rule.getDetailMark().isEmpty()) {
try {
remark = vodNode.selOne(rule.getDetailMark()).asString().trim();
remark = rule.getDetailMarkR(remark);
} catch (Exception e) {
SpiderDebug.log(e);
}
}
if (!rule.getDetailActor().isEmpty()) {
try {
actor = vodNode.selOne(rule.getDetailActor()).asString().trim();
actor = rule.getDetailActorR(actor);
} catch (Exception e) {
SpiderDebug.log(e);
}
}
if (!rule.getDetailDirector().isEmpty()) {
try {
director = vodNode.selOne(rule.getDetailDirector()).asString().trim();
director = rule.getDetailDirectorR(director);
} catch (Exception e) {
SpiderDebug.log(e);
}
}
if (!rule.getDetailDesc().isEmpty()) {
try {
desc = vodNode.selOne(rule.getDetailDesc()).asString().trim();
desc = rule.getDetailDescR(desc);
} catch (Exception e) {
SpiderDebug.log(e);
}
}
Vod vod = new Vod();
vod.setVodId(ids.get(0));
vod.setVodName(title);
vod.setVodPic(cover);
vod.setTypeName(category);
vod.setVodYear(year);
vod.setVodArea(area);
vod.setVodRemarks(remark);
vod.setVodActor(actor);
vod.setVodDirector(director);
vod.setVodContent(desc);
ArrayList<String> playFrom = new ArrayList<>();
List<JXNode> fromNodes = doc.selN(rule.getDetailFromNode());
for (int i = 0; i < fromNodes.size(); i++) {
String name = fromNodes.get(i).selOne(rule.getDetailFromName()).asString().trim();
name = rule.getDetailFromNameR(name);
playFrom.add(name);
}
ArrayList<String> playList = new ArrayList<>();
List<JXNode> urlListNodes = doc.selN(rule.getDetailUrlNode());
for (int i = 0; i < urlListNodes.size(); i++) {
List<JXNode> urlNodes = urlListNodes.get(i).sel(rule.getDetailUrlSubNode());
List<String> vodItems = new ArrayList<>();
for (int j = 0; j < urlNodes.size(); j++) {
String name = urlNodes.get(j).selOne(rule.getDetailUrlName()).asString().trim();
name = rule.getDetailUrlNameR(name);
String id = urlNodes.get(j).selOne(rule.getDetailUrlId()).asString().trim();
id = rule.getDetailUrlIdR(id);
vodItems.add(name + "$" + id);
}
// 排除播放列表為空的播放源
if (vodItems.isEmpty() && playFrom.size() > i) {
playFrom.set(i, "");
}
playList.add(TextUtils.join("#", vodItems));
}
// 排除播放列表為空的播放源
for (int i = playFrom.size() - 1; i >= 0; i--) {
if (playFrom.get(i).isEmpty()) playFrom.remove(i);
}
for (int i = playList.size() - 1; i >= 0; i--) {
if (playList.get(i).isEmpty()) playList.remove(i);
}
for (int i = playList.size() - 1; i >= 0; i--) {
if (i >= playFrom.size()) playList.remove(i);
}
vod.setVodPlayFrom(TextUtils.join("$$$", playFrom));
vod.setVodPlayUrl(TextUtils.join("$$$", playList));
return Result.string(vod);
}
@Override
public String playerContent(String flag, String id, List<String> vipFlags) {
fetchRule();
String webUrl = rule.getPlayUrl().isEmpty() ? id : rule.getPlayUrl().replace("{playUrl}", id);
SpiderDebug.log(webUrl);
HashMap<String, String> headers = new HashMap<>();
if (!rule.getPlayUa().isEmpty()) headers.put("User-Agent", rule.getPlayUa());
if (!rule.getPlayReferer().isEmpty()) headers.put("Referer", rule.getPlayReferer());
return Result.get().parse().url(webUrl).header(headers).string();
}
@Override
public String searchContent(String key, boolean quick) throws Exception {
fetchRule();
if (rule.getSearchUrl().isEmpty()) return "";
String webUrl = rule.getSearchUrl().replace("{wd}", URLEncoder.encode(key));
String webContent = fetch(webUrl);
List<Vod> list = new ArrayList<>();
if (rule.getSearchVodNode().startsWith("json:")) {
String[] node = rule.getSearchVodNode().substring(5).split(">");
JSONObject data = new JSONObject(webContent);
for (int i = 0; i < node.length; i++) {
if (i == node.length - 1) {
JSONArray vodArray = data.getJSONArray(node[i]);
for (int j = 0; j < vodArray.length(); j++) {
JSONObject vod = vodArray.getJSONObject(j);
String name = vod.optString(rule.getSearchVodName()).trim();
name = rule.getSearchVodNameR(name);
String id = vod.optString(rule.getSearchVodId()).trim();
id = rule.getSearchVodIdR(id);
String pic = vod.optString(rule.getSearchVodImg()).trim();
pic = rule.getSearchVodImgR(pic);
pic = Util.fixUrl(webUrl, pic);
String mark = vod.optString(rule.getSearchVodMark()).trim();
mark = rule.getSearchVodMarkR(mark);
list.add(new Vod(id, name, pic, mark));
}
} else {
data = data.getJSONObject(node[i]);
}
}
} else {
JXDocument doc = JXDocument.create(webContent);
List<JXNode> vodNodes = doc.selN(rule.getSearchVodNode());
for (int i = 0; i < vodNodes.size(); i++) {
String name = vodNodes.get(i).selOne(rule.getSearchVodName()).asString().trim();
name = rule.getSearchVodNameR(name);
String id = vodNodes.get(i).selOne(rule.getSearchVodId()).asString().trim();
id = rule.getSearchVodIdR(id);
String pic = vodNodes.get(i).selOne(rule.getSearchVodImg()).asString().trim();
pic = rule.getSearchVodImgR(pic);
pic = Util.fixUrl(webUrl, pic);
String mark = "";
if (!rule.getCateVodMark().isEmpty()) {
try {
mark = vodNodes.get(i).selOne(rule.getSearchVodMark()).asString().trim();
mark = rule.getSearchVodMarkR(mark);
} catch (Exception e) {
SpiderDebug.log(e);
}
}
list.add(new Vod(id, name, pic, mark));
}
}
return Result.string(list);
}
@Override
public boolean manualVideoCheck() {
return false;
}
@Override
public boolean isVideoFormat(String url) {
return Util.isVideoFormat(url);
}
protected String ext = null;
protected Rule rule = null;
protected void fetchRule() {
if (rule == null) {
if (ext != null) {
if (ext.startsWith("http")) {
String json = OkHttp.string(ext, null);
rule = Rule.fromJson(json);
loadRuleExt(json);
} else {
rule = Rule.fromJson(ext);
loadRuleExt(ext);
}
}
}
}
protected void loadRuleExt(String json) {
}
protected String fetch(String webUrl) {
SpiderDebug.log(webUrl);
return OkHttp.string(webUrl, getHeaders());
}
}

View File

@ -1,36 +0,0 @@
package com.github.catvod.spider;
import android.text.TextUtils;
import java.net.URLEncoder;
import java.util.HashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class XPathFilter extends XPath {
@Override
protected void loadRuleExt(String json) {
super.loadRuleExt(json);
}
@Override
protected String categoryUrl(String tid, String pg, boolean filter, HashMap<String, String> extend) {
String cateUrl = rule.getCateUrl();
if (filter && extend != null && !extend.isEmpty()) {
for (String key : extend.keySet()) {
String value = extend.get(key);
if (!TextUtils.isEmpty(value)) {
cateUrl = cateUrl.replace("{" + key + "}", URLEncoder.encode(value));
}
}
}
cateUrl = cateUrl.replace("{cateId}", tid).replace("{catePg}", pg);
Matcher m = Pattern.compile("\\{(.*?)\\}").matcher(cateUrl);
while (m.find()) {
String n = m.group(0).replace("{", "").replace("}", "");
cateUrl = cateUrl.replace(m.group(0), "").replace("/" + n + "/", "");
}
return cateUrl;
}
}

View File

@ -1,196 +0,0 @@
package com.github.catvod.spider;
import android.content.Context;
import android.text.TextUtils;
import android.util.Base64;
import com.github.catvod.crawler.SpiderDebug;
import com.github.catvod.utils.Util;
import com.google.gson.Gson;
import org.json.JSONException;
import org.json.JSONObject;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import java.net.URLDecoder;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class XPathMac extends XPath {
// 嘗試分析直連
private boolean decodePlayUrl;
// 嘗試匹配官源標識以調用應用配置中的解析列表
private boolean decodeVipFlag;
// 播放器配置js
private String playerConfigJs = "";
// 播放器配置js取值正則
private String playerConfigJsRegex = "[\\W|\\S|.]*?MacPlayerConfig.player_list[\\W|\\S|.]*?=([\\W|\\S|.]*?),MacPlayerConfig.downer_list";
// 站點里播放源對應的真實官源
private final HashMap<String, String> show2VipFlag = new HashMap<>();
/**
* mac cms 直連和官源調用應用內播放列表支持
*
* @param context
* @param extend
*/
public void init(Context context, String extend) {
super.init(context, extend);
}
@Override
protected void loadRuleExt(String json) {
try {
JSONObject jsonObj = new JSONObject(json);
decodePlayUrl = jsonObj.optBoolean("dcPlayUrl", false);
decodeVipFlag = jsonObj.optBoolean("dcVipFlag", false);
JSONObject dcShow2Vip = jsonObj.optJSONObject("dcShow2Vip");
if (dcShow2Vip != null) {
Iterator<String> keys = dcShow2Vip.keys();
while (keys.hasNext()) {
String name = keys.next();
show2VipFlag.put(name.trim(), dcShow2Vip.getString(name).trim());
}
}
playerConfigJs = jsonObj.optString("pCfgJs").trim();
playerConfigJsRegex = jsonObj.optString("pCfgJsR", playerConfigJsRegex).trim();
} catch (JSONException e) {
SpiderDebug.log(e);
}
}
@Override
public String homeContent(boolean filter) {
String result = super.homeContent(filter);
if (!result.isEmpty() && !playerConfigJs.isEmpty()) { // 嘗試通過playerConfigJs獲取展示和flag匹配關系
String webContent = fetch(playerConfigJs);
Matcher matcher = Pattern.compile(playerConfigJsRegex).matcher(webContent);
if (matcher.find()) {
try {
JSONObject jsonObject = new JSONObject(matcher.group(1));
Iterator<String> keys = jsonObject.keys();
while (keys.hasNext()) {
String key = keys.next();
JSONObject keyObj = jsonObject.optJSONObject(key);
if (keyObj == null) continue;
String show = keyObj.optString("show").trim();
if (show.isEmpty()) continue;
show2VipFlag.put(show, key);
}
} catch (Exception e) {
SpiderDebug.log(e);
}
}
}
return result;
}
@Override
public String detailContent(List<String> ids) {
String result = super.detailContent(ids);
if (decodeVipFlag && !result.isEmpty()) {
try {
JSONObject jsonObject = new JSONObject(result);
String[] playFrom = jsonObject.optJSONArray("list").getJSONObject(0).optString("vod_play_from").split("\\$\\$\\$");
if (playFrom.length > 0) {
for (int i = 0; i < playFrom.length; i++) {
if (show2VipFlag.containsKey(playFrom[i])) {
playFrom[i] = show2VipFlag.get(playFrom[i]);
}
}
jsonObject.optJSONArray("list").getJSONObject(0).put("vod_play_from", TextUtils.join("$$$", playFrom));
result = jsonObject.toString();
}
} catch (Throwable th) {
SpiderDebug.log(th);
}
}
return result;
}
@Override
public String playerContent(String flag, String id, List<String> vipFlags) {
fetchRule();
String webUrl = rule.getPlayUrl().isEmpty() ? id : rule.getPlayUrl().replace("{playUrl}", id);
String videoUrl = null;
// 嘗試分析直連
if (decodePlayUrl) {
try {
Document doc = Jsoup.parse(fetch(webUrl));
Elements allScript = doc.select("script");
for (int i = 0; i < allScript.size(); i++) {
String scContent = allScript.get(i).html().trim();
if (scContent.startsWith("var player_")) {
int start = scContent.indexOf('{');
int end = scContent.lastIndexOf('}') + 1;
String json = scContent.substring(start, end);
JSONObject player = new JSONObject(json);
String videoUrlTmp = player.getString("url");
if (player.has("encrypt")) {
int encrypt = player.getInt("encrypt");
if (encrypt == 1) {
videoUrlTmp = URLDecoder.decode(videoUrlTmp);
} else if (encrypt == 2) {
videoUrlTmp = new String(Base64.decode(videoUrlTmp, Base64.DEFAULT));
videoUrlTmp = URLDecoder.decode(videoUrlTmp);
}
}
videoUrl = videoUrlTmp;
break;
}
}
} catch (Exception e) {
SpiderDebug.log(e);
}
}
if (videoUrl != null) {
// 適配2.0.6的調用應用內解析列表的支持, 需要配合直連分析和匹配官源解析一起使用參考cjt影視和極品直連
if (decodeVipFlag && Util.isVip(videoUrl)) { // 使用jx:1
try {
JSONObject result = new JSONObject();
result.put("parse", 1);
result.put("jx", "1");
result.put("url", videoUrl);
return result.toString();
} catch (Exception e) {
SpiderDebug.log(e);
}
} else if (decodeVipFlag && vipFlags.contains(flag)) { // 是否使用應用內解析列表解析官源
try {
JSONObject result = new JSONObject();
result.put("parse", 1);
result.put("playUrl", "");
result.put("url", videoUrl);
result.put("header", "");
return result.toString();
} catch (Exception e) {
SpiderDebug.log(e);
}
}
// 如果是視頻直連 直接返回免解
else if (isVideoFormat(videoUrl)) {
try {
JSONObject result = new JSONObject();
result.put("parse", 0);
result.put("playUrl", "");
result.put("url", videoUrl);
HashMap<String, String> headers = new HashMap<>();
if (!rule.getPlayUa().isEmpty()) headers.put("User-Agent", rule.getPlayUa());
if (!rule.getPlayReferer().isEmpty()) headers.put("Referer", rule.getPlayReferer());
result.put("header", new Gson().toJson(headers));
return result.toString();
} catch (Exception e) {
SpiderDebug.log(e);
}
}
}
// 上述都失敗了就按默認模式走
return super.playerContent(flag, id, vipFlags);
}
}

View File

@ -1,31 +0,0 @@
package com.github.catvod.spider;
import android.text.TextUtils;
import java.net.URLEncoder;
import java.util.HashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class XPathMacFilter extends XPathMac {
@Override
protected String categoryUrl(String tid, String pg, boolean filter, HashMap<String, String> extend) {
String cateUrl = rule.getCateUrl();
if (filter && extend != null && !extend.isEmpty()) {
for (String key : extend.keySet()) {
String value = extend.get(key);
if (!TextUtils.isEmpty(value)) {
cateUrl = cateUrl.replace("{" + key + "}", URLEncoder.encode(value));
}
}
}
cateUrl = cateUrl.replace("{cateId}", tid).replace("{catePg}", pg);
Matcher m = Pattern.compile("\\{(.*?)\\}").matcher(cateUrl);
while (m.find()) {
String n = m.group(0).replace("{", "").replace("}", "");
cateUrl = cateUrl.replace(m.group(0), "").replace("/" + n + "/", "");
}
return cateUrl;
}
}

View File

@ -53,9 +53,7 @@ public class YHDM extends Spider {
List<Class> classes = new ArrayList<>();
List<String> typeIds = Arrays.asList("guochandongman", "ribendongman", "dongmandianying", "omeidongman");
List<String> typeNames = Arrays.asList("国产动漫", "日本动漫", "动漫电影", "欧美动漫");
for (int i = 0; i < typeIds.size(); i++)
classes.add(new Class(typeIds.get(i), typeNames.get(i)));
for (int i = 0; i < typeIds.size(); i++) classes.add(new Class(typeIds.get(i), typeNames.get(i)));
Document doc = Jsoup.parse(OkHttp.string(siteUrl, getHeader()));
List<Vod> list = new ArrayList<>();
for (Element li : doc.select(".stui-vodlist.clearfix .myui-vodlist__box")) {