Clean xpath code

This commit is contained in:
FongMi 2023-02-15 23:49:00 +08:00
parent d2aa52b588
commit 57a4523cdb
3 changed files with 222 additions and 329 deletions

View File

@ -3,7 +3,9 @@ package com.github.catvod.spider;
import android.content.Context; import android.content.Context;
import android.text.TextUtils; import android.text.TextUtils;
import com.github.catvod.bean.Class;
import com.github.catvod.bean.Result; import com.github.catvod.bean.Result;
import com.github.catvod.bean.Vod;
import com.github.catvod.bean.xpath.Rule; import com.github.catvod.bean.xpath.Rule;
import com.github.catvod.crawler.Spider; import com.github.catvod.crawler.Spider;
import com.github.catvod.crawler.SpiderDebug; import com.github.catvod.crawler.SpiderDebug;
@ -23,6 +25,12 @@ import java.util.Set;
public class XPath extends Spider { public class XPath extends Spider {
private HashMap<String, String> getHeaders() {
HashMap<String, String> headers = new HashMap<>();
headers.put("User-Agent", rule.getUa().isEmpty() ? "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.54 Safari/537.36" : rule.getUa());
return headers;
}
@Override @Override
public void init(Context context) { public void init(Context context) {
super.init(context); super.init(context);
@ -35,20 +43,15 @@ public class XPath extends Spider {
@Override @Override
public String homeContent(boolean filter) { public String homeContent(boolean filter) {
try {
fetchRule(); fetchRule();
JSONObject result = new JSONObject(); List<Vod> list = new ArrayList<>();
JSONArray classes = new JSONArray(); List<Class> classes = new ArrayList<>();
if (rule.getCateManual().size() > 0) { if (rule.getCateManual().size() > 0) {
Set<String> keys = rule.getCateManual().keySet(); Set<String> keys = rule.getCateManual().keySet();
for (String k : keys) { for (String k : keys) {
JSONObject jsonObject = new JSONObject(); classes.add(new Class(rule.getCateManual().get(k), k));
jsonObject.put("type_name", k);
jsonObject.put("type_id", rule.getCateManual().get(k));
classes.put(jsonObject);
} }
} }
try {
String webUrl = rule.getHomeUrl(); String webUrl = rule.getHomeUrl();
JXDocument doc = JXDocument.create(fetch(webUrl)); JXDocument doc = JXDocument.create(fetch(webUrl));
if (rule.getCateManual().size() == 0) { if (rule.getCateManual().size() == 0) {
@ -58,15 +61,10 @@ public class XPath extends Spider {
name = rule.getCateNameR(name); name = rule.getCateNameR(name);
String id = navNodes.get(i).selOne(rule.getCateId()).asString().trim(); String id = navNodes.get(i).selOne(rule.getCateId()).asString().trim();
id = rule.getCateIdR(id); id = rule.getCateIdR(id);
JSONObject jsonObject = new JSONObject(); classes.add(new Class(id, name));
jsonObject.put("type_id", id);
jsonObject.put("type_name", name);
classes.put(jsonObject);
} }
} }
if (!rule.getHomeVodNode().isEmpty()) { if (!rule.getHomeVodNode().isEmpty()) {
try {
JSONArray videos = new JSONArray();
List<JXNode> vodNodes = doc.selN(rule.getHomeVodNode()); List<JXNode> vodNodes = doc.selN(rule.getHomeVodNode());
for (int i = 0; i < vodNodes.size(); i++) { for (int i = 0; i < vodNodes.size(); i++) {
String name = vodNodes.get(i).selOne(rule.getHomeVodName()).asString().trim(); String name = vodNodes.get(i).selOne(rule.getHomeVodName()).asString().trim();
@ -85,47 +83,10 @@ public class XPath extends Spider {
SpiderDebug.log(e); SpiderDebug.log(e);
} }
} }
JSONObject v = new JSONObject(); list.add(new Vod(id, name, pic, mark));
v.put("vod_id", id);
v.put("vod_name", name);
v.put("vod_pic", pic);
v.put("vod_remarks", mark);
videos.put(v);
}
result.put("list", videos);
} catch (Exception e) {
SpiderDebug.log(e);
} }
} }
} catch (Exception e) { return Result.string(classes, list, rule.getFilter());
SpiderDebug.log(e);
}
result.put("class", classes);
if (filter && rule.getFilter() != null) {
result.put("filters", rule.getFilter());
}
return result.toString();
} catch (
Exception e) {
SpiderDebug.log(e);
}
return "";
}
protected HashMap<String, String> getHeaders() {
HashMap<String, String> headers = new HashMap<>();
headers.put("User-Agent", rule.getUa().isEmpty() ? "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.54 Safari/537.36" : rule.getUa());
return headers;
}
@Override
public String homeVideoContent() {
try {
fetchRule();
} catch (Exception e) {
SpiderDebug.log(e);
}
return "";
} }
protected String categoryUrl(String tid, String pg, boolean filter, HashMap<String, String> extend) { protected String categoryUrl(String tid, String pg, boolean filter, HashMap<String, String> extend) {
@ -134,10 +95,9 @@ public class XPath extends Spider {
@Override @Override
public String categoryContent(String tid, String pg, boolean filter, HashMap<String, String> extend) { public String categoryContent(String tid, String pg, boolean filter, HashMap<String, String> extend) {
try {
fetchRule(); fetchRule();
List<Vod> list = new ArrayList<>();
String webUrl = categoryUrl(tid, pg, filter, extend); String webUrl = categoryUrl(tid, pg, filter, extend);
JSONArray videos = new JSONArray();
JXDocument doc = JXDocument.create(fetch(webUrl)); JXDocument doc = JXDocument.create(fetch(webUrl));
List<JXNode> vodNodes = doc.selN(rule.getCateVodNode()); List<JXNode> vodNodes = doc.selN(rule.getCateVodNode());
for (int i = 0; i < vodNodes.size(); i++) { for (int i = 0; i < vodNodes.size(); i++) {
@ -157,44 +117,24 @@ public class XPath extends Spider {
SpiderDebug.log(e); SpiderDebug.log(e);
} }
} }
JSONObject v = new JSONObject(); list.add(new Vod(id, name, pic, mark));
v.put("vod_id", id);
v.put("vod_name", name);
v.put("vod_pic", pic);
v.put("vod_remarks", mark);
videos.put(v);
} }
JSONObject result = new JSONObject(); return Result.string(list);
result.put("page", pg);
result.put("pagecount", Integer.MAX_VALUE);
result.put("limit", 90);
result.put("total", Integer.MAX_VALUE);
result.put("list", videos);
return result.toString();
} catch (Exception e) {
SpiderDebug.log(e);
}
return "";
} }
@Override @Override
public String detailContent(List<String> ids) { public String detailContent(List<String> ids) {
try {
fetchRule(); fetchRule();
String webUrl = rule.getDetailUrl().replace("{vid}", ids.get(0)); String webUrl = rule.getDetailUrl().replace("{vid}", ids.get(0));
String webContent = fetch(webUrl); String webContent = fetch(webUrl);
JXDocument doc = JXDocument.create(webContent); JXDocument doc = JXDocument.create(webContent);
JXNode vodNode = doc.selNOne(rule.getDetailNode()); JXNode vodNode = doc.selNOne(rule.getDetailNode());
String cover = "", title = "", desc = "", category = "", area = "", year = "", remark = "", director = "", actor = ""; String cover = "", title = "", desc = "", category = "", area = "", year = "", remark = "", director = "", actor = "";
title = vodNode.selOne(rule.getDetailName()).asString().trim(); title = vodNode.selOne(rule.getDetailName()).asString().trim();
title = rule.getDetailNameR(title); title = rule.getDetailNameR(title);
cover = vodNode.selOne(rule.getDetailImg()).asString().trim(); cover = vodNode.selOne(rule.getDetailImg()).asString().trim();
cover = rule.getDetailImgR(cover); cover = rule.getDetailImgR(cover);
cover = Utils.fixUrl(webUrl, cover); cover = Utils.fixUrl(webUrl, cover);
if (!rule.getDetailCate().isEmpty()) { if (!rule.getDetailCate().isEmpty()) {
try { try {
category = vodNode.selOne(rule.getDetailCate()).asString().trim(); category = vodNode.selOne(rule.getDetailCate()).asString().trim();
@ -252,20 +192,19 @@ public class XPath extends Spider {
} }
} }
JSONObject vod = new JSONObject(); Vod vod = new Vod();
vod.put("vod_id", ids.get(0)); vod.setVodId(ids.get(0));
vod.put("vod_name", title); vod.setVodName(title);
vod.put("vod_pic", cover); vod.setVodPic(cover);
vod.put("type_name", category); vod.setTypeName(category);
vod.put("vod_year", year); vod.setVodYear(year);
vod.put("vod_area", area); vod.setVodArea(area);
vod.put("vod_remarks", remark); vod.setVodRemarks(remark);
vod.put("vod_actor", actor); vod.setVodActor(actor);
vod.put("vod_director", director); vod.setVodDirector(director);
vod.put("vod_content", desc); vod.setVodContent(desc);
ArrayList<String> playFrom = new ArrayList<>(); ArrayList<String> playFrom = new ArrayList<>();
List<JXNode> fromNodes = doc.selN(rule.getDetailFromNode()); List<JXNode> fromNodes = doc.selN(rule.getDetailFromNode());
for (int i = 0; i < fromNodes.size(); i++) { for (int i = 0; i < fromNodes.size(); i++) {
String name = fromNodes.get(i).selOne(rule.getDetailFromName()).asString().trim(); String name = fromNodes.get(i).selOne(rule.getDetailFromName()).asString().trim();
@ -293,31 +232,17 @@ public class XPath extends Spider {
} }
// 排除播放列表為空的播放源 // 排除播放列表為空的播放源
for (int i = playFrom.size() - 1; i >= 0; i--) { for (int i = playFrom.size() - 1; i >= 0; i--) {
if (playFrom.get(i).isEmpty()) if (playFrom.get(i).isEmpty()) playFrom.remove(i);
playFrom.remove(i);
} }
for (int i = playList.size() - 1; i >= 0; i--) { for (int i = playList.size() - 1; i >= 0; i--) {
if (playList.get(i).isEmpty()) if (playList.get(i).isEmpty()) playList.remove(i);
playList.remove(i);
} }
for (int i = playList.size() - 1; i >= 0; i--) { for (int i = playList.size() - 1; i >= 0; i--) {
if (i >= playFrom.size()) if (i >= playFrom.size()) playList.remove(i);
playList.remove(i);
} }
String vod_play_from = TextUtils.join("$$$", playFrom); vod.setVodPlayFrom(TextUtils.join("$$$", playFrom));
String vod_play_url = TextUtils.join("$$$", playList); vod.setVodPlayUrl(TextUtils.join("$$$", playList));
vod.put("vod_play_from", vod_play_from); return Result.string(vod);
vod.put("vod_play_url", vod_play_url);
JSONObject result = new JSONObject();
JSONArray list = new JSONArray();
list.put(vod);
result.put("list", list);
return result.toString();
} catch (Exception e) {
SpiderDebug.log(e);
}
return "";
} }
@Override @Override
@ -332,16 +257,12 @@ public class XPath extends Spider {
} }
@Override @Override
public String searchContent(String key, boolean quick) { public String searchContent(String key, boolean quick) throws Exception {
try {
fetchRule(); fetchRule();
if (rule.getSearchUrl().isEmpty()) { if (rule.getSearchUrl().isEmpty()) return "";
return "";
}
String webUrl = rule.getSearchUrl().replace("{wd}", URLEncoder.encode(key)); String webUrl = rule.getSearchUrl().replace("{wd}", URLEncoder.encode(key));
String webContent = fetch(webUrl); String webContent = fetch(webUrl);
JSONObject result = new JSONObject(); List<Vod> list = new ArrayList<>();
JSONArray videos = new JSONArray();
if (rule.getSearchVodNode().startsWith("json:")) { if (rule.getSearchVodNode().startsWith("json:")) {
String[] node = rule.getSearchVodNode().substring(5).split(">"); String[] node = rule.getSearchVodNode().substring(5).split(">");
JSONObject data = new JSONObject(webContent); JSONObject data = new JSONObject(webContent);
@ -359,12 +280,7 @@ public class XPath extends Spider {
pic = Utils.fixUrl(webUrl, pic); pic = Utils.fixUrl(webUrl, pic);
String mark = vod.optString(rule.getSearchVodMark()).trim(); String mark = vod.optString(rule.getSearchVodMark()).trim();
mark = rule.getSearchVodMarkR(mark); mark = rule.getSearchVodMarkR(mark);
JSONObject v = new JSONObject(); list.add(new Vod(id, name, pic, mark));
v.put("vod_id", id);
v.put("vod_name", name);
v.put("vod_pic", pic);
v.put("vod_remarks", mark);
videos.put(v);
} }
} else { } else {
data = data.getJSONObject(node[i]); data = data.getJSONObject(node[i]);
@ -390,21 +306,10 @@ public class XPath extends Spider {
SpiderDebug.log(e); SpiderDebug.log(e);
} }
} }
JSONObject v = new JSONObject(); list.add(new Vod(id, name, pic, mark));
v.put("vod_id", id);
v.put("vod_name", name);
v.put("vod_pic", pic);
v.put("vod_remarks", mark);
videos.put(v);
} }
} }
result.put("list", videos); return Result.string(list);
return result.toString();
} catch (
Exception e) {
SpiderDebug.log(e);
}
return "";
} }
@Override @Override
@ -412,20 +317,9 @@ public class XPath extends Spider {
return false; return false;
} }
private final String[] videoFormatList = new String[]{".m3u8", ".mp4", ".mpeg", ".flv"};
@Override @Override
public boolean isVideoFormat(String url) { public boolean isVideoFormat(String url) {
url = url.toLowerCase(); return Utils.isVideoFormat(url);
if (url.contains("=http") || url.contains("=https") || url.contains("=https%3a%2f") || url.contains("=http%3a%2f")) {
return false;
}
for (String format : videoFormatList) {
if (url.contains(format)) {
return true;
}
}
return false;
} }
protected String ext = null; protected String ext = null;
@ -447,7 +341,6 @@ public class XPath extends Spider {
} }
protected void loadRuleExt(String json) { protected void loadRuleExt(String json) {
} }
protected String fetch(String webUrl) { protected String fetch(String webUrl) {

View File

@ -89,7 +89,6 @@ public class XPathMac extends XPath {
SpiderDebug.log(e); SpiderDebug.log(e);
} }
} }
// SpiderDebug.log(webContent);
} }
return result; return result;
} }

View File

@ -30,6 +30,7 @@ public class Utils {
} }
public static boolean isVideoFormat(String url) { public static boolean isVideoFormat(String url) {
if (url.contains("url=http") || url.contains(".js") || url.contains(".css") || url.contains(".html")) return false;
return Sniffer.RULE.matcher(url).find(); return Sniffer.RULE.matcher(url).find();
} }