From d71a2bd83ea7638d81ea071db4bdde011df05e92 Mon Sep 17 00:00:00 2001 From: jhengazuki Date: Tue, 5 Aug 2025 13:44:03 +0800 Subject: [PATCH] Remove xpath --- app/build.gradle | 1 - .../com/github/catvod/bean/xpath/Rule.java | 695 ------------------ .../java/com/github/catvod/spider/Hanime.java | 138 ---- .../java/com/github/catvod/spider/XPath.java | 345 --------- .../com/github/catvod/spider/XPathFilter.java | 36 - .../com/github/catvod/spider/XPathMac.java | 196 ----- .../github/catvod/spider/XPathMacFilter.java | 31 - .../java/com/github/catvod/spider/YHDM.java | 4 +- 8 files changed, 1 insertion(+), 1445 deletions(-) delete mode 100644 app/src/main/java/com/github/catvod/bean/xpath/Rule.java delete mode 100644 app/src/main/java/com/github/catvod/spider/Hanime.java delete mode 100644 app/src/main/java/com/github/catvod/spider/XPath.java delete mode 100644 app/src/main/java/com/github/catvod/spider/XPathFilter.java delete mode 100644 app/src/main/java/com/github/catvod/spider/XPathMac.java delete mode 100644 app/src/main/java/com/github/catvod/spider/XPathMacFilter.java diff --git a/app/build.gradle b/app/build.gradle index 31f7ee1d..2d5132e8 100644 --- a/app/build.gradle +++ b/app/build.gradle @@ -46,7 +46,6 @@ dependencies { implementation 'com.github.thegrizzlylabs:sardine-android:0.9' implementation 'wang.harlon.quickjs:wrapper-android:3.2.3' implementation 'com.google.code.gson:gson:2.13.1' - implementation 'cn.wanghaomiao:JsoupXpath:2.5.3' implementation 'com.hierynomus:smbj:0.14.0' implementation 'com.orhanobut:logger:2.2.0' implementation 'org.jsoup:jsoup:1.21.1' diff --git a/app/src/main/java/com/github/catvod/bean/xpath/Rule.java b/app/src/main/java/com/github/catvod/bean/xpath/Rule.java deleted file mode 100644 index 78c9f326..00000000 --- a/app/src/main/java/com/github/catvod/bean/xpath/Rule.java +++ /dev/null @@ -1,695 +0,0 @@ -package com.github.catvod.bean.xpath; - -import com.github.catvod.crawler.SpiderDebug; - -import org.json.JSONObject; - -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -public class Rule { - /** - * user-agent - */ - private String ua; - /** - * 取得分類和首頁推薦的Url - */ - private String homeUrl; - /** - * 分類節點 xpath - */ - private String cateNode; - /** - * 分類節點名 xpath - */ - private String cateName; - /** - * 正則對取到的數據進行二次處理 - */ - private Pattern cateNameR; - /** - * 分類節點 id xpath - */ - private String cateId; - /** - * 正則對取到的數據進行二次處理 - */ - private Pattern cateIdR; - /** - * 手動指定分類如果有則不從 homeUrl 中獲取分類 - */ - private final LinkedHashMap cateManual = new LinkedHashMap<>(); - - /** - * 篩選 - */ - private JSONObject filter; - - /** - * 更新推薦影片節點 xpath - */ - private String homeVodNode; - /** - * 更新推薦影片名稱 xpath - */ - private String homeVodName; - /** - * 正則對取到的數據進行二次處理 - */ - private Pattern homeVodNameR; - /** - * 更新推薦影片 id xpath - */ - private String homeVodId; - /** - * 正則對取到的數據進行二次處理 - */ - private Pattern homeVodIdR; - /** - * 更新推薦影片圖片 xpath - */ - private String homeVodImg; - /** - * 正則對取到的數據進行二次處理 - */ - private Pattern homeVodImgR; - /** - * 更新推薦影片簡介 xpath - */ - private String homeVodMark; - /** - * 正則對取到的數據進行二次處理 - */ - private Pattern homeVodMarkR; - /** - * 分類頁地址 - */ - private String cateUrl; - /** - * 分類頁影片節點 xpath - */ - private String cateVodNode; - /** - * 分類頁影片名稱 xpath - */ - private String cateVodName; - /** - * 正則對取到的數據進行二次處理 - */ - private Pattern cateVodNameR; - /** - * 分類頁影片影片id xpath - */ - private String cateVodId; - /** - * 正則對取到的數據進行二次處理 - */ - private Pattern cateVodIdR; - /** - * 分類頁影片影片圖片 xpath - */ - private String cateVodImg; - /** - * 正則對取到的數據進行二次處理 - */ - private Pattern cateVodImgR; - /** - * 分類頁影片影片簡介 xpath - */ - private String cateVodMark; - /** - * 正則對取到的數據進行二次處理 - */ - private Pattern cateVodMarkR; - - /** - * 詳情頁面 - */ - private String dtUrl; - /** - * 詳情節點 xpath - */ - private String dtNode; - /** - * 詳情影片 xpath - */ - private String dtName; - /** - * 正則對取到的數據進行二次處理 - */ - private Pattern dtNameR; - /** - * 詳情影片圖片 xpath - */ - private String dtImg; - /** - * 正則對取到的數據進行二次處理 - */ - private Pattern dtImgR; - /** - * 詳情影片分類 xpath - */ - private String dtCate; - /** - * 正則對取到的數據進行二次處理 - */ - private Pattern dtCateR; - /** - * 詳情影片年份 xpath - */ - private String dtYear; - /** - * 正則對取到的數據進行二次處理 - */ - private Pattern dtYearR; - /** - * 詳情影片地區 xpath - */ - private String dtArea; - /** - * 正則對取到的數據進行二次處理 - */ - private Pattern dtAreaR; - /** - * 詳情影片簡介 xpath - */ - private String dtMark; - /** - * 正則對取到的數據進行二次處理 - */ - private Pattern dtMarkR; - /** - * 詳情演員 xpath - */ - private String dtActor; - /** - * 正則對取到的數據進行二次處理 - */ - private Pattern dtActorR; - /** - * 詳情導演 xpath - */ - private String dtDirector; - /** - * 正則對取到的數據進行二次處理 - */ - private Pattern dtDirectorR; - /** - * 詳情說明 xpath - */ - private String dtDesc; - /** - * 正則對取到的數據進行二次處理 - */ - private Pattern dtDescR; - - /** - * 詳情播放來源節點 - */ - private String dtFromNode; - /** - * 詳情播放來源名稱 xpath - */ - private String dtFromName; - /** - * 詳情 - */ - private Pattern dtFromNameR; - /** - * 詳情播放地址列表節點 xpath - */ - private String dtUrlNode; - /** - * 詳情播放地址節點 xpath - */ - private String dtUrlSubNode; - /** - * 詳情播放地址id xpath - */ - private String dtUrlId; - /** - * 詳情 - */ - private Pattern dtUrlIdR; - /** - * 詳情播放地址名稱 xpath - */ - private String dtUrlName; - /** - * 詳情 - */ - private Pattern dtUrlNameR; - /** - * 播放頁面url - */ - private String playUrl; - /** - * 播放解析調用ua - */ - private String playUa; - /** - * 播放解析調用referer - */ - private String playReferer; - - /** - * 搜尋頁地址 - */ - private String searchUrl; - - /** - * 搜尋頁影片節點 xpath - */ - private String scVodNode; - /** - * 搜尋頁影片名稱 xpath - */ - private String scVodName; - /** - * 正則對取到的數據進行二次處理 - */ - private Pattern scVodNameR; - /** - * 搜尋頁影片id xpath - */ - private String scVodId; - /** - * 正則對取到的數據進行二次處理 - */ - private Pattern scVodIdR; - /** - * 搜尋頁影片圖片 xpath - */ - private String scVodImg; - /** - * 正則對取到的數據進行二次處理 - */ - private Pattern scVodImgR; - /** - * 搜尋頁影片簡介 xpath - */ - private String scVodMark; - /** - * 正則對取到的數據進行二次處理 - */ - private Pattern scVodMarkR; - - private static Pattern getPattern(JSONObject json, String key) { - String v = json.optString(key).trim(); - if (v.isEmpty()) - return null; - else { - try { - return Pattern.compile(v); - } catch (Exception e) { - SpiderDebug.log(e); - } - } - return null; - } - - private static String doReplaceRegex(Pattern pattern, String src) { - if (pattern == null) - return src; - try { - Matcher matcher = pattern.matcher(src); - if (matcher.find()) { - return matcher.group(1).trim(); - } - } catch (Exception e) { - SpiderDebug.log(e); - } - return src; - } - - public static Rule fromJson(String json) { - try { - JSONObject jsonObj = new JSONObject(json); - Rule rule = new Rule(); - rule.ua = jsonObj.optString("ua"); - rule.homeUrl = jsonObj.optString("homeUrl").trim(); - rule.cateNode = jsonObj.optString("cateNode").trim(); - rule.cateName = jsonObj.optString("cateName").trim(); - rule.cateNameR = getPattern(jsonObj, "cateNameR"); - rule.cateId = jsonObj.optString("cateId").trim(); - rule.cateIdR = getPattern(jsonObj, "cateIdR"); - JSONObject navs = jsonObj.optJSONObject("cateManual"); - if (navs != null) { - Iterator keys = navs.keys(); - while (keys.hasNext()) { - String name = keys.next(); - rule.cateManual.put(name.trim(), navs.getString(name).trim()); - } - } - rule.filter = jsonObj.optJSONObject("filter"); - rule.homeVodNode = jsonObj.optString("homeVodNode").trim(); - rule.homeVodName = jsonObj.optString("homeVodName").trim(); - rule.homeVodNameR = getPattern(jsonObj, "homeVodNameR"); - rule.homeVodId = jsonObj.optString("homeVodId").trim(); - rule.homeVodIdR = getPattern(jsonObj, "homeVodIdR"); - rule.homeVodImg = jsonObj.optString("homeVodImg").trim(); - rule.homeVodImgR = getPattern(jsonObj, "homeVodImgR"); - rule.homeVodMark = jsonObj.optString("homeVodMark").trim(); - rule.homeVodMarkR = getPattern(jsonObj, "homeVodMarkR"); - rule.cateUrl = jsonObj.optString("cateUrl").trim(); - rule.cateVodNode = jsonObj.optString("cateVodNode").trim(); - rule.cateVodName = jsonObj.optString("cateVodName").trim(); - rule.cateVodNameR = getPattern(jsonObj, "cateVodNameR"); - rule.cateVodId = jsonObj.optString("cateVodId").trim(); - rule.cateVodIdR = getPattern(jsonObj, "cateVodIdR"); - rule.cateVodImg = jsonObj.optString("cateVodImg").trim(); - rule.cateVodImgR = getPattern(jsonObj, "cateVodImgR"); - rule.cateVodMark = jsonObj.optString("cateVodMark").trim(); - rule.cateVodMarkR = getPattern(jsonObj, "cateVodMarkR"); - rule.dtUrl = jsonObj.optString("dtUrl"); - rule.dtNode = jsonObj.optString("dtNode"); - rule.dtName = jsonObj.optString("dtName"); - rule.dtNameR = getPattern(jsonObj, "dtNameR"); - rule.dtImg = jsonObj.optString("dtImg"); - rule.dtImgR = getPattern(jsonObj, "dtImgR"); - rule.dtCate = jsonObj.optString("dtCate"); - rule.dtCateR = getPattern(jsonObj, "dtCateR"); - rule.dtYear = jsonObj.optString("dtYear"); - rule.dtYearR = getPattern(jsonObj, "dtYearR"); - rule.dtArea = jsonObj.optString("dtArea"); - rule.dtAreaR = getPattern(jsonObj, "dtAreaR"); - rule.dtMark = jsonObj.optString("dtMark"); - rule.dtMarkR = getPattern(jsonObj, "dtMarkR"); - rule.dtActor = jsonObj.optString("dtActor"); - rule.dtActorR = getPattern(jsonObj, "dtActorR"); - rule.dtDirector = jsonObj.optString("dtDirector"); - rule.dtDirectorR = getPattern(jsonObj, "dtDirectorR"); - rule.dtDesc = jsonObj.optString("dtDesc"); - rule.dtDescR = getPattern(jsonObj, "dtDescR"); - rule.dtFromNode = jsonObj.optString("dtFromNode"); - rule.dtFromName = jsonObj.optString("dtFromName"); - rule.dtFromNameR = getPattern(jsonObj, "dtFromNameR"); - rule.dtUrlNode = jsonObj.optString("dtUrlNode"); - rule.dtUrlSubNode = jsonObj.optString("dtUrlSubNode"); - rule.dtUrlId = jsonObj.optString("dtUrlId"); - rule.dtUrlIdR = getPattern(jsonObj, "dtUrlIdR"); - rule.dtUrlName = jsonObj.optString("dtUrlName"); - rule.dtUrlNameR = getPattern(jsonObj, "dtUrlNameR"); - rule.playUrl = jsonObj.optString("playUrl"); - rule.playUa = jsonObj.optString("playUa"); - rule.playReferer = jsonObj.optString("playReferer"); - rule.searchUrl = jsonObj.optString("searchUrl"); - rule.scVodNode = jsonObj.optString("scVodNode").trim(); - rule.scVodName = jsonObj.optString("scVodName").trim(); - rule.scVodNameR = getPattern(jsonObj, "scVodNameR"); - rule.scVodId = jsonObj.optString("scVodId").trim(); - rule.scVodIdR = getPattern(jsonObj, "scVodIdR"); - rule.scVodImg = jsonObj.optString("scVodImg").trim(); - rule.scVodImgR = getPattern(jsonObj, "scVodImgR"); - rule.scVodMark = jsonObj.optString("scVodMark").trim(); - rule.scVodMarkR = getPattern(jsonObj, "scVodMarkR"); - return rule; - } catch (Exception e) { - SpiderDebug.log(e); - } - return null; - } - - public String getUa() { - return ua; - } - - public String getHomeUrl() { - return homeUrl; - } - - public String getCateNode() { - return cateNode; - } - - public String getCateName() { - return cateName; - } - - public String getCateNameR(String src) { - return doReplaceRegex(cateNameR, src); - } - - public String getCateId() { - return cateId; - } - - public String getCateIdR(String src) { - return doReplaceRegex(cateIdR, src); - } - - public LinkedHashMap getCateManual() { - return cateManual; - } - - public JSONObject getFilter() { - return filter; - } - - public String getHomeVodNode() { - return homeVodNode; - } - - public String getHomeVodName() { - return homeVodName; - } - - public String getHomeVodNameR(String src) { - return doReplaceRegex(homeVodNameR, src); - } - - public String getHomeVodId() { - return homeVodId; - } - - public String getHomeVodIdR(String src) { - return doReplaceRegex(homeVodIdR, src); - } - - public String getHomeVodImg() { - return homeVodImg; - } - - public String getHomeVodImgR(String src) { - return doReplaceRegex(homeVodImgR, src); - } - - public String getHomeVodMark() { - return homeVodMark; - } - - public String getHomeVodMarkR(String src) { - return doReplaceRegex(homeVodMarkR, src); - } - - public String getCateUrl() { - return cateUrl; - } - - public String getCateVodNode() { - return cateVodNode; - } - - public String getCateVodName() { - return cateVodName; - } - - public String getCateVodNameR(String src) { - return doReplaceRegex(cateVodNameR, src); - } - - public String getCateVodId() { - return cateVodId; - } - - public String getCateVodIdR(String src) { - return doReplaceRegex(cateVodIdR, src); - } - - public String getCateVodImg() { - return cateVodImg; - } - - public String getCateVodImgR(String src) { - return doReplaceRegex(cateVodImgR, src); - } - - public String getCateVodMark() { - return cateVodMark; - } - - public String getCateVodMarkR(String src) { - return doReplaceRegex(cateVodNameR, src); - } - - public String getDetailUrl() { - return dtUrl; - } - - public String getDetailNode() { - return dtNode; - } - - public String getDetailName() { - return dtName; - } - - public String getDetailNameR(String src) { - return doReplaceRegex(dtNameR, src); - } - - public String getDetailImg() { - return dtImg; - } - - public String getDetailImgR(String src) { - return doReplaceRegex(dtImgR, src); - } - - public String getDetailCate() { - return dtCate; - } - - public String getDetailCateR(String src) { - return doReplaceRegex(dtCateR, src); - } - - public String getDetailYear() { - return dtYear; - } - - public String getDetailYearR(String src) { - return doReplaceRegex(dtYearR, src); - } - - public String getDetailArea() { - return dtArea; - } - - public String getDetailAreaR(String src) { - return doReplaceRegex(dtAreaR, src); - } - - public String getDetailMark() { - return dtMark; - } - - public String getDetailMarkR(String src) { - return doReplaceRegex(dtMarkR, src); - } - - public String getDetailActor() { - return dtActor; - } - - public String getDetailActorR(String src) { - return doReplaceRegex(dtActorR, src); - } - - public String getDetailDirector() { - return dtDirector; - } - - public String getDetailDirectorR(String src) { - return doReplaceRegex(dtDirectorR, src); - } - - public String getDetailDesc() { - return dtDesc; - } - - public String getDetailDescR(String src) { - return doReplaceRegex(dtDescR, src); - } - - public String getDetailFromNode() { - return dtFromNode; - } - - public String getDetailFromName() { - return dtFromName; - } - - public String getDetailFromNameR(String src) { - return doReplaceRegex(dtFromNameR, src); - } - - public String getDetailUrlNode() { - return dtUrlNode; - } - - public String getDetailUrlSubNode() { - return dtUrlSubNode; - } - - public String getDetailUrlId() { - return dtUrlId; - } - - public String getDetailUrlIdR(String src) { - return doReplaceRegex(dtUrlIdR, src); - } - - public String getDetailUrlName() { - return dtUrlName; - } - - public String getDetailUrlNameR(String src) { - return doReplaceRegex(dtUrlNameR, src); - } - - public String getPlayUrl() { - return playUrl; - } - - public String getPlayUa() { - return playUa; - } - - public String getPlayReferer() { - return playReferer; - } - - public String getSearchUrl() { - return searchUrl; - } - - public String getSearchVodNode() { - return scVodNode; - } - - public String getSearchVodName() { - return scVodName; - } - - public String getSearchVodNameR(String src) { - return doReplaceRegex(scVodNameR, src); - } - - public String getSearchVodId() { - return scVodId; - } - - public String getSearchVodIdR(String src) { - return doReplaceRegex(scVodIdR, src); - } - - public String getSearchVodImg() { - return scVodImg; - } - - public String getSearchVodImgR(String src) { - return doReplaceRegex(scVodImgR, src); - } - - public String getSearchVodMark() { - return scVodMark; - } - - public String getSearchVodMarkR(String src) { - return doReplaceRegex(scVodMarkR, src); - } -} \ No newline at end of file diff --git a/app/src/main/java/com/github/catvod/spider/Hanime.java b/app/src/main/java/com/github/catvod/spider/Hanime.java deleted file mode 100644 index c03a05c5..00000000 --- a/app/src/main/java/com/github/catvod/spider/Hanime.java +++ /dev/null @@ -1,138 +0,0 @@ -package com.github.catvod.spider; - -import com.github.catvod.bean.Class; -import com.github.catvod.bean.Filter; -import com.github.catvod.bean.Result; -import com.github.catvod.bean.Vod; -import com.github.catvod.crawler.Spider; -import com.github.catvod.net.OkHttp; -import com.github.catvod.utils.Util; - -import org.json.JSONObject; -import org.jsoup.Jsoup; -import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.LinkedHashMap; -import java.util.List; - -public class Hanime extends Spider { - - private static final String siteUrl = "https://hanime1.me"; - - private HashMap getHeaders() { - HashMap headers = new HashMap<>(); - headers.put("User-Agent", Util.CHROME); - return headers; - } - - private Filter getFilter(String name, String key, List texts) { - List values = new ArrayList<>(); - if (!key.equals("by")) values.add(new Filter.Value("全部", "")); - for (String text : texts) { - if (text.isEmpty()) continue; - values.add(new Filter.Value(text)); - } - return new Filter(key, name, values); - } - - @Override - public String homeContent(boolean filter) throws Exception { - List list = new ArrayList<>(); - List classes = new ArrayList<>(); - LinkedHashMap> filters = new LinkedHashMap<>(); - Document doc1 = Jsoup.parse(OkHttp.string(siteUrl.concat("/search?genre=裏番"), getHeaders())); - List sorts = doc1.select("div.hentai-sort-options-wrapper").eachText(); - List years = doc1.getElementById("year").select("option").eachAttr("value"); - Document doc2 = Jsoup.parse(OkHttp.string(siteUrl, getHeaders())); - for (Element element : doc2.select("a.nav-item")) { - String text = element.text(); - if (text.equals("新番預告") || text.equals("H漫畫")) continue; - classes.add(new Class(text)); - List array = new ArrayList<>(); - array.add(getFilter("排序", "by", sorts)); - array.add(getFilter("年份", "year", years)); - filters.put(text, array); - } - for (Element element : doc2.select("a")) { - if (element.attr("href").contains("watch")) { - String pic = element.select("div > img").attr("src"); - String url = element.attr("href"); - String name = element.select("div > div").text(); - String id = url.split("=")[1]; - if (name.contains("smart_display") || name.isEmpty()) continue; - list.add(new Vod(id, name, pic)); - } - } - return Result.string(classes, list, filters); - } - - @Override - public String categoryContent(String tid, String pg, boolean filter, HashMap extend) throws Exception { - List list = new ArrayList<>(); - if (extend.get("by") == null) extend.put("by", "最新上市"); - if (extend.get("year") == null) extend.put("year", ""); - String target = siteUrl.concat("/search?genre=").concat(tid).concat("&page=").concat(pg).concat("&sort=").concat(extend.get("by")).concat("&year=").concat(extend.get("year")); - Document doc = Jsoup.parse(OkHttp.string(target, getHeaders())); - for (Element element : doc.select("div.col-xs-6")) { - String pic = element.select("img").get(1).attr("src"); - String url = element.select("a.overlay").attr("href"); - String name = element.select("div.card-mobile-title").text(); - String id = url.split("=")[1]; - list.add(new Vod(id, name, pic)); - } - if (list.isEmpty()) { - for (Element element : doc.select("a")) { - if (element.attr("href").contains("watch")) { - String pic = element.select("div > img").attr("src"); - String url = element.attr("href"); - String name = element.select("div > div").text(); - String id = url.split("=")[1]; - if (name.contains("smart_display")) continue; - list.add(new Vod(id, name, pic)); - } - } - } - return Result.string(list); - } - - @Override - public String detailContent(List ids) throws Exception { - Document doc = Jsoup.parse(OkHttp.string(siteUrl.concat("/watch?v=").concat(ids.get(0)), getHeaders())); - String name = doc.getElementById("shareBtn-title").text(); - JSONObject json = new JSONObject(doc.select("script[type=application/ld+json]").html().trim()); - String content = json.optString("description"); - String pic = json.optJSONArray("thumbnailUrl").optString(0); - String url = json.optString("contentUrl");; - Vod vod = new Vod(); - vod.setVodId(ids.get(0)); - vod.setVodPic(pic); - vod.setVodName(name); - vod.setVodContent(content); - vod.setVodPlayFrom("Hanime1"); - vod.setVodPlayUrl("播放$" + url); - return Result.string(vod); - } - - @Override - public String searchContent(String key, boolean quick) throws Exception { - List list = new ArrayList<>(); - String target = siteUrl.concat("/search?query=").concat(key).concat("&genre=&sort=&year=&month=&duration="); - Document doc = Jsoup.parse(OkHttp.string(target, getHeaders())); - for (Element element : doc.select("div.col-xs-6")) { - String pic = element.select("img").get(1).attr("src"); - String url = element.select("a.overlay").attr("href"); - String name = element.select("div.card-mobile-title").text(); - String id = url.split("=")[1]; - list.add(new Vod(id, name, pic)); - } - return Result.string(list); - } - - @Override - public String playerContent(String flag, String id, List vipFlags) throws Exception { - return Result.get().url(id).header(getHeaders()).string(); - } -} diff --git a/app/src/main/java/com/github/catvod/spider/XPath.java b/app/src/main/java/com/github/catvod/spider/XPath.java deleted file mode 100644 index 26eb1b74..00000000 --- a/app/src/main/java/com/github/catvod/spider/XPath.java +++ /dev/null @@ -1,345 +0,0 @@ -package com.github.catvod.spider; - -import android.content.Context; -import android.text.TextUtils; - -import com.github.catvod.bean.Class; -import com.github.catvod.bean.Result; -import com.github.catvod.bean.Vod; -import com.github.catvod.bean.xpath.Rule; -import com.github.catvod.crawler.Spider; -import com.github.catvod.crawler.SpiderDebug; -import com.github.catvod.net.OkHttp; -import com.github.catvod.utils.Util; - -import org.json.JSONArray; -import org.json.JSONObject; -import org.seimicrawler.xpath.JXDocument; -import org.seimicrawler.xpath.JXNode; - -import java.net.URLEncoder; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Set; - -public class XPath extends Spider { - - private HashMap getHeaders() { - HashMap headers = new HashMap<>(); - headers.put("User-Agent", rule.getUa().isEmpty() ? Util.CHROME : rule.getUa()); - return headers; - } - - @Override - public void init(Context context, String extend) { - this.ext = extend; - } - - @Override - public String homeContent(boolean filter) { - fetchRule(); - List list = new ArrayList<>(); - List classes = new ArrayList<>(); - if (!rule.getCateManual().isEmpty()) { - Set keys = rule.getCateManual().keySet(); - for (String k : keys) { - classes.add(new Class(rule.getCateManual().get(k), k)); - } - } - String webUrl = rule.getHomeUrl(); - JXDocument doc = JXDocument.create(fetch(webUrl)); - if (rule.getCateManual().isEmpty()) { - List navNodes = doc.selN(rule.getCateNode()); - for (int i = 0; i < navNodes.size(); i++) { - String name = navNodes.get(i).selOne(rule.getCateName()).asString().trim(); - name = rule.getCateNameR(name); - String id = navNodes.get(i).selOne(rule.getCateId()).asString().trim(); - id = rule.getCateIdR(id); - classes.add(new Class(id, name)); - } - } - if (!rule.getHomeVodNode().isEmpty()) { - List vodNodes = doc.selN(rule.getHomeVodNode()); - for (int i = 0; i < vodNodes.size(); i++) { - String name = vodNodes.get(i).selOne(rule.getHomeVodName()).asString().trim(); - name = rule.getHomeVodNameR(name); - String id = vodNodes.get(i).selOne(rule.getHomeVodId()).asString().trim(); - id = rule.getHomeVodIdR(id); - String pic = vodNodes.get(i).selOne(rule.getHomeVodImg()).asString().trim(); - pic = rule.getHomeVodImgR(pic); - pic = Util.fixUrl(webUrl, pic); - String mark = ""; - if (!rule.getHomeVodMark().isEmpty()) { - try { - mark = vodNodes.get(i).selOne(rule.getHomeVodMark()).asString().trim(); - mark = rule.getHomeVodMarkR(mark); - } catch (Exception e) { - SpiderDebug.log(e); - } - } - list.add(new Vod(id, name, pic, mark)); - } - } - return Result.string(classes, list, rule.getFilter()); - } - - protected String categoryUrl(String tid, String pg, boolean filter, HashMap extend) { - return rule.getCateUrl().replace("{cateId}", tid).replace("{catePg}", pg); - } - - @Override - public String categoryContent(String tid, String pg, boolean filter, HashMap extend) { - fetchRule(); - List list = new ArrayList<>(); - String webUrl = categoryUrl(tid, pg, filter, extend); - JXDocument doc = JXDocument.create(fetch(webUrl)); - List vodNodes = doc.selN(rule.getCateVodNode()); - for (int i = 0; i < vodNodes.size(); i++) { - String name = vodNodes.get(i).selOne(rule.getCateVodName()).asString().trim(); - name = rule.getCateVodNameR(name); - String id = vodNodes.get(i).selOne(rule.getCateVodId()).asString().trim(); - id = rule.getCateVodIdR(id); - String pic = vodNodes.get(i).selOne(rule.getCateVodImg()).asString().trim(); - pic = rule.getCateVodImgR(pic); - pic = Util.fixUrl(webUrl, pic); - String mark = ""; - if (!rule.getCateVodMark().isEmpty()) { - try { - mark = vodNodes.get(i).selOne(rule.getCateVodMark()).asString().trim(); - mark = rule.getCateVodMarkR(mark); - } catch (Exception e) { - SpiderDebug.log(e); - } - } - list.add(new Vod(id, name, pic, mark)); - } - return Result.string(list); - } - - @Override - public String detailContent(List ids) { - fetchRule(); - String webUrl = rule.getDetailUrl().replace("{vid}", ids.get(0)); - String webContent = fetch(webUrl); - JXDocument doc = JXDocument.create(webContent); - JXNode vodNode = doc.selNOne(rule.getDetailNode()); - String cover = "", title = "", desc = "", category = "", area = "", year = "", remark = "", director = "", actor = ""; - title = vodNode.selOne(rule.getDetailName()).asString().trim(); - title = rule.getDetailNameR(title); - cover = vodNode.selOne(rule.getDetailImg()).asString().trim(); - cover = rule.getDetailImgR(cover); - cover = Util.fixUrl(webUrl, cover); - if (!rule.getDetailCate().isEmpty()) { - try { - category = vodNode.selOne(rule.getDetailCate()).asString().trim(); - category = rule.getDetailCateR(category); - } catch (Exception e) { - SpiderDebug.log(e); - } - } - if (!rule.getDetailYear().isEmpty()) { - try { - year = vodNode.selOne(rule.getDetailYear()).asString().trim(); - year = rule.getDetailYearR(year); - } catch (Exception e) { - SpiderDebug.log(e); - } - } - if (!rule.getDetailArea().isEmpty()) { - try { - area = vodNode.selOne(rule.getDetailArea()).asString().trim(); - area = rule.getDetailAreaR(area); - } catch (Exception e) { - SpiderDebug.log(e); - } - } - if (!rule.getDetailMark().isEmpty()) { - try { - remark = vodNode.selOne(rule.getDetailMark()).asString().trim(); - remark = rule.getDetailMarkR(remark); - } catch (Exception e) { - SpiderDebug.log(e); - } - } - if (!rule.getDetailActor().isEmpty()) { - try { - actor = vodNode.selOne(rule.getDetailActor()).asString().trim(); - actor = rule.getDetailActorR(actor); - } catch (Exception e) { - SpiderDebug.log(e); - } - } - if (!rule.getDetailDirector().isEmpty()) { - try { - director = vodNode.selOne(rule.getDetailDirector()).asString().trim(); - director = rule.getDetailDirectorR(director); - } catch (Exception e) { - SpiderDebug.log(e); - } - } - if (!rule.getDetailDesc().isEmpty()) { - try { - desc = vodNode.selOne(rule.getDetailDesc()).asString().trim(); - desc = rule.getDetailDescR(desc); - } catch (Exception e) { - SpiderDebug.log(e); - } - } - - Vod vod = new Vod(); - vod.setVodId(ids.get(0)); - vod.setVodName(title); - vod.setVodPic(cover); - vod.setTypeName(category); - vod.setVodYear(year); - vod.setVodArea(area); - vod.setVodRemarks(remark); - vod.setVodActor(actor); - vod.setVodDirector(director); - vod.setVodContent(desc); - - ArrayList playFrom = new ArrayList<>(); - List fromNodes = doc.selN(rule.getDetailFromNode()); - for (int i = 0; i < fromNodes.size(); i++) { - String name = fromNodes.get(i).selOne(rule.getDetailFromName()).asString().trim(); - name = rule.getDetailFromNameR(name); - playFrom.add(name); - } - - ArrayList playList = new ArrayList<>(); - List urlListNodes = doc.selN(rule.getDetailUrlNode()); - for (int i = 0; i < urlListNodes.size(); i++) { - List urlNodes = urlListNodes.get(i).sel(rule.getDetailUrlSubNode()); - List vodItems = new ArrayList<>(); - for (int j = 0; j < urlNodes.size(); j++) { - String name = urlNodes.get(j).selOne(rule.getDetailUrlName()).asString().trim(); - name = rule.getDetailUrlNameR(name); - String id = urlNodes.get(j).selOne(rule.getDetailUrlId()).asString().trim(); - id = rule.getDetailUrlIdR(id); - vodItems.add(name + "$" + id); - } - // 排除播放列表為空的播放源 - if (vodItems.isEmpty() && playFrom.size() > i) { - playFrom.set(i, ""); - } - playList.add(TextUtils.join("#", vodItems)); - } - // 排除播放列表為空的播放源 - for (int i = playFrom.size() - 1; i >= 0; i--) { - if (playFrom.get(i).isEmpty()) playFrom.remove(i); - } - for (int i = playList.size() - 1; i >= 0; i--) { - if (playList.get(i).isEmpty()) playList.remove(i); - } - for (int i = playList.size() - 1; i >= 0; i--) { - if (i >= playFrom.size()) playList.remove(i); - } - vod.setVodPlayFrom(TextUtils.join("$$$", playFrom)); - vod.setVodPlayUrl(TextUtils.join("$$$", playList)); - return Result.string(vod); - } - - @Override - public String playerContent(String flag, String id, List vipFlags) { - fetchRule(); - String webUrl = rule.getPlayUrl().isEmpty() ? id : rule.getPlayUrl().replace("{playUrl}", id); - SpiderDebug.log(webUrl); - HashMap headers = new HashMap<>(); - if (!rule.getPlayUa().isEmpty()) headers.put("User-Agent", rule.getPlayUa()); - if (!rule.getPlayReferer().isEmpty()) headers.put("Referer", rule.getPlayReferer()); - return Result.get().parse().url(webUrl).header(headers).string(); - } - - @Override - public String searchContent(String key, boolean quick) throws Exception { - fetchRule(); - if (rule.getSearchUrl().isEmpty()) return ""; - String webUrl = rule.getSearchUrl().replace("{wd}", URLEncoder.encode(key)); - String webContent = fetch(webUrl); - List list = new ArrayList<>(); - if (rule.getSearchVodNode().startsWith("json:")) { - String[] node = rule.getSearchVodNode().substring(5).split(">"); - JSONObject data = new JSONObject(webContent); - for (int i = 0; i < node.length; i++) { - if (i == node.length - 1) { - JSONArray vodArray = data.getJSONArray(node[i]); - for (int j = 0; j < vodArray.length(); j++) { - JSONObject vod = vodArray.getJSONObject(j); - String name = vod.optString(rule.getSearchVodName()).trim(); - name = rule.getSearchVodNameR(name); - String id = vod.optString(rule.getSearchVodId()).trim(); - id = rule.getSearchVodIdR(id); - String pic = vod.optString(rule.getSearchVodImg()).trim(); - pic = rule.getSearchVodImgR(pic); - pic = Util.fixUrl(webUrl, pic); - String mark = vod.optString(rule.getSearchVodMark()).trim(); - mark = rule.getSearchVodMarkR(mark); - list.add(new Vod(id, name, pic, mark)); - } - } else { - data = data.getJSONObject(node[i]); - } - } - } else { - JXDocument doc = JXDocument.create(webContent); - List vodNodes = doc.selN(rule.getSearchVodNode()); - for (int i = 0; i < vodNodes.size(); i++) { - String name = vodNodes.get(i).selOne(rule.getSearchVodName()).asString().trim(); - name = rule.getSearchVodNameR(name); - String id = vodNodes.get(i).selOne(rule.getSearchVodId()).asString().trim(); - id = rule.getSearchVodIdR(id); - String pic = vodNodes.get(i).selOne(rule.getSearchVodImg()).asString().trim(); - pic = rule.getSearchVodImgR(pic); - pic = Util.fixUrl(webUrl, pic); - String mark = ""; - if (!rule.getCateVodMark().isEmpty()) { - try { - mark = vodNodes.get(i).selOne(rule.getSearchVodMark()).asString().trim(); - mark = rule.getSearchVodMarkR(mark); - } catch (Exception e) { - SpiderDebug.log(e); - } - } - list.add(new Vod(id, name, pic, mark)); - } - } - return Result.string(list); - } - - @Override - public boolean manualVideoCheck() { - return false; - } - - @Override - public boolean isVideoFormat(String url) { - return Util.isVideoFormat(url); - } - - protected String ext = null; - protected Rule rule = null; - - protected void fetchRule() { - if (rule == null) { - if (ext != null) { - if (ext.startsWith("http")) { - String json = OkHttp.string(ext, null); - rule = Rule.fromJson(json); - loadRuleExt(json); - } else { - rule = Rule.fromJson(ext); - loadRuleExt(ext); - } - } - } - } - - protected void loadRuleExt(String json) { - } - - protected String fetch(String webUrl) { - SpiderDebug.log(webUrl); - return OkHttp.string(webUrl, getHeaders()); - } -} \ No newline at end of file diff --git a/app/src/main/java/com/github/catvod/spider/XPathFilter.java b/app/src/main/java/com/github/catvod/spider/XPathFilter.java deleted file mode 100644 index 387b99bb..00000000 --- a/app/src/main/java/com/github/catvod/spider/XPathFilter.java +++ /dev/null @@ -1,36 +0,0 @@ -package com.github.catvod.spider; - -import android.text.TextUtils; - -import java.net.URLEncoder; -import java.util.HashMap; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -public class XPathFilter extends XPath { - - @Override - protected void loadRuleExt(String json) { - super.loadRuleExt(json); - } - - @Override - protected String categoryUrl(String tid, String pg, boolean filter, HashMap extend) { - String cateUrl = rule.getCateUrl(); - if (filter && extend != null && !extend.isEmpty()) { - for (String key : extend.keySet()) { - String value = extend.get(key); - if (!TextUtils.isEmpty(value)) { - cateUrl = cateUrl.replace("{" + key + "}", URLEncoder.encode(value)); - } - } - } - cateUrl = cateUrl.replace("{cateId}", tid).replace("{catePg}", pg); - Matcher m = Pattern.compile("\\{(.*?)\\}").matcher(cateUrl); - while (m.find()) { - String n = m.group(0).replace("{", "").replace("}", ""); - cateUrl = cateUrl.replace(m.group(0), "").replace("/" + n + "/", ""); - } - return cateUrl; - } -} \ No newline at end of file diff --git a/app/src/main/java/com/github/catvod/spider/XPathMac.java b/app/src/main/java/com/github/catvod/spider/XPathMac.java deleted file mode 100644 index 0154b52c..00000000 --- a/app/src/main/java/com/github/catvod/spider/XPathMac.java +++ /dev/null @@ -1,196 +0,0 @@ -package com.github.catvod.spider; - -import android.content.Context; -import android.text.TextUtils; -import android.util.Base64; - -import com.github.catvod.crawler.SpiderDebug; -import com.github.catvod.utils.Util; -import com.google.gson.Gson; - -import org.json.JSONException; -import org.json.JSONObject; -import org.jsoup.Jsoup; -import org.jsoup.nodes.Document; -import org.jsoup.select.Elements; - -import java.net.URLDecoder; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -public class XPathMac extends XPath { - - // 嘗試分析直連 - private boolean decodePlayUrl; - // 嘗試匹配官源標識以調用應用配置中的解析列表 - private boolean decodeVipFlag; - // 播放器配置js - private String playerConfigJs = ""; - // 播放器配置js取值正則 - private String playerConfigJsRegex = "[\\W|\\S|.]*?MacPlayerConfig.player_list[\\W|\\S|.]*?=([\\W|\\S|.]*?),MacPlayerConfig.downer_list"; - // 站點里播放源對應的真實官源 - private final HashMap show2VipFlag = new HashMap<>(); - - /** - * mac cms 直連和官源調用應用內播放列表支持 - * - * @param context - * @param extend - */ - public void init(Context context, String extend) { - super.init(context, extend); - } - - @Override - protected void loadRuleExt(String json) { - try { - JSONObject jsonObj = new JSONObject(json); - decodePlayUrl = jsonObj.optBoolean("dcPlayUrl", false); - decodeVipFlag = jsonObj.optBoolean("dcVipFlag", false); - JSONObject dcShow2Vip = jsonObj.optJSONObject("dcShow2Vip"); - if (dcShow2Vip != null) { - Iterator keys = dcShow2Vip.keys(); - while (keys.hasNext()) { - String name = keys.next(); - show2VipFlag.put(name.trim(), dcShow2Vip.getString(name).trim()); - } - } - playerConfigJs = jsonObj.optString("pCfgJs").trim(); - playerConfigJsRegex = jsonObj.optString("pCfgJsR", playerConfigJsRegex).trim(); - } catch (JSONException e) { - SpiderDebug.log(e); - } - } - - @Override - public String homeContent(boolean filter) { - String result = super.homeContent(filter); - if (!result.isEmpty() && !playerConfigJs.isEmpty()) { // 嘗試通過playerConfigJs獲取展示和flag匹配關系 - String webContent = fetch(playerConfigJs); - Matcher matcher = Pattern.compile(playerConfigJsRegex).matcher(webContent); - if (matcher.find()) { - try { - JSONObject jsonObject = new JSONObject(matcher.group(1)); - Iterator keys = jsonObject.keys(); - while (keys.hasNext()) { - String key = keys.next(); - JSONObject keyObj = jsonObject.optJSONObject(key); - if (keyObj == null) continue; - String show = keyObj.optString("show").trim(); - if (show.isEmpty()) continue; - show2VipFlag.put(show, key); - } - } catch (Exception e) { - SpiderDebug.log(e); - } - } - } - return result; - } - - @Override - public String detailContent(List ids) { - String result = super.detailContent(ids); - if (decodeVipFlag && !result.isEmpty()) { - try { - JSONObject jsonObject = new JSONObject(result); - String[] playFrom = jsonObject.optJSONArray("list").getJSONObject(0).optString("vod_play_from").split("\\$\\$\\$"); - if (playFrom.length > 0) { - for (int i = 0; i < playFrom.length; i++) { - if (show2VipFlag.containsKey(playFrom[i])) { - playFrom[i] = show2VipFlag.get(playFrom[i]); - } - } - jsonObject.optJSONArray("list").getJSONObject(0).put("vod_play_from", TextUtils.join("$$$", playFrom)); - result = jsonObject.toString(); - } - } catch (Throwable th) { - SpiderDebug.log(th); - } - } - return result; - } - - @Override - public String playerContent(String flag, String id, List vipFlags) { - fetchRule(); - String webUrl = rule.getPlayUrl().isEmpty() ? id : rule.getPlayUrl().replace("{playUrl}", id); - String videoUrl = null; - // 嘗試分析直連 - if (decodePlayUrl) { - try { - Document doc = Jsoup.parse(fetch(webUrl)); - Elements allScript = doc.select("script"); - for (int i = 0; i < allScript.size(); i++) { - String scContent = allScript.get(i).html().trim(); - if (scContent.startsWith("var player_")) { - int start = scContent.indexOf('{'); - int end = scContent.lastIndexOf('}') + 1; - String json = scContent.substring(start, end); - JSONObject player = new JSONObject(json); - String videoUrlTmp = player.getString("url"); - if (player.has("encrypt")) { - int encrypt = player.getInt("encrypt"); - if (encrypt == 1) { - videoUrlTmp = URLDecoder.decode(videoUrlTmp); - } else if (encrypt == 2) { - videoUrlTmp = new String(Base64.decode(videoUrlTmp, Base64.DEFAULT)); - videoUrlTmp = URLDecoder.decode(videoUrlTmp); - } - } - videoUrl = videoUrlTmp; - break; - } - } - } catch (Exception e) { - SpiderDebug.log(e); - } - } - if (videoUrl != null) { - // 適配2.0.6的調用應用內解析列表的支持, 需要配合直連分析和匹配官源解析一起使用,參考cjt影視和極品直連 - if (decodeVipFlag && Util.isVip(videoUrl)) { // 使用jx:1 - try { - JSONObject result = new JSONObject(); - result.put("parse", 1); - result.put("jx", "1"); - result.put("url", videoUrl); - return result.toString(); - } catch (Exception e) { - SpiderDebug.log(e); - } - } else if (decodeVipFlag && vipFlags.contains(flag)) { // 是否使用應用內解析列表解析官源 - try { - JSONObject result = new JSONObject(); - result.put("parse", 1); - result.put("playUrl", ""); - result.put("url", videoUrl); - result.put("header", ""); - return result.toString(); - } catch (Exception e) { - SpiderDebug.log(e); - } - } - // 如果是視頻直連 直接返回免解 - else if (isVideoFormat(videoUrl)) { - try { - JSONObject result = new JSONObject(); - result.put("parse", 0); - result.put("playUrl", ""); - result.put("url", videoUrl); - HashMap headers = new HashMap<>(); - if (!rule.getPlayUa().isEmpty()) headers.put("User-Agent", rule.getPlayUa()); - if (!rule.getPlayReferer().isEmpty()) headers.put("Referer", rule.getPlayReferer()); - result.put("header", new Gson().toJson(headers)); - return result.toString(); - } catch (Exception e) { - SpiderDebug.log(e); - } - } - } - // 上述都失敗了就按默認模式走 - return super.playerContent(flag, id, vipFlags); - } -} \ No newline at end of file diff --git a/app/src/main/java/com/github/catvod/spider/XPathMacFilter.java b/app/src/main/java/com/github/catvod/spider/XPathMacFilter.java deleted file mode 100644 index 78f9ecc8..00000000 --- a/app/src/main/java/com/github/catvod/spider/XPathMacFilter.java +++ /dev/null @@ -1,31 +0,0 @@ -package com.github.catvod.spider; - -import android.text.TextUtils; - -import java.net.URLEncoder; -import java.util.HashMap; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -public class XPathMacFilter extends XPathMac { - - @Override - protected String categoryUrl(String tid, String pg, boolean filter, HashMap extend) { - String cateUrl = rule.getCateUrl(); - if (filter && extend != null && !extend.isEmpty()) { - for (String key : extend.keySet()) { - String value = extend.get(key); - if (!TextUtils.isEmpty(value)) { - cateUrl = cateUrl.replace("{" + key + "}", URLEncoder.encode(value)); - } - } - } - cateUrl = cateUrl.replace("{cateId}", tid).replace("{catePg}", pg); - Matcher m = Pattern.compile("\\{(.*?)\\}").matcher(cateUrl); - while (m.find()) { - String n = m.group(0).replace("{", "").replace("}", ""); - cateUrl = cateUrl.replace(m.group(0), "").replace("/" + n + "/", ""); - } - return cateUrl; - } -} \ No newline at end of file diff --git a/app/src/main/java/com/github/catvod/spider/YHDM.java b/app/src/main/java/com/github/catvod/spider/YHDM.java index a633da48..00b5ad3a 100644 --- a/app/src/main/java/com/github/catvod/spider/YHDM.java +++ b/app/src/main/java/com/github/catvod/spider/YHDM.java @@ -53,9 +53,7 @@ public class YHDM extends Spider { List classes = new ArrayList<>(); List typeIds = Arrays.asList("guochandongman", "ribendongman", "dongmandianying", "omeidongman"); List typeNames = Arrays.asList("国产动漫", "日本动漫", "动漫电影", "欧美动漫"); - for (int i = 0; i < typeIds.size(); i++) - classes.add(new Class(typeIds.get(i), typeNames.get(i))); - + for (int i = 0; i < typeIds.size(); i++) classes.add(new Class(typeIds.get(i), typeNames.get(i))); Document doc = Jsoup.parse(OkHttp.string(siteUrl, getHeader())); List list = new ArrayList<>(); for (Element li : doc.select(".stui-vodlist.clearfix .myui-vodlist__box")) {