Clean xpath code
This commit is contained in:
parent
d2aa52b588
commit
57a4523cdb
|
|
@ -3,7 +3,9 @@ package com.github.catvod.spider;
|
||||||
import android.content.Context;
|
import android.content.Context;
|
||||||
import android.text.TextUtils;
|
import android.text.TextUtils;
|
||||||
|
|
||||||
|
import com.github.catvod.bean.Class;
|
||||||
import com.github.catvod.bean.Result;
|
import com.github.catvod.bean.Result;
|
||||||
|
import com.github.catvod.bean.Vod;
|
||||||
import com.github.catvod.bean.xpath.Rule;
|
import com.github.catvod.bean.xpath.Rule;
|
||||||
import com.github.catvod.crawler.Spider;
|
import com.github.catvod.crawler.Spider;
|
||||||
import com.github.catvod.crawler.SpiderDebug;
|
import com.github.catvod.crawler.SpiderDebug;
|
||||||
|
|
@ -23,6 +25,12 @@ import java.util.Set;
|
||||||
|
|
||||||
public class XPath extends Spider {
|
public class XPath extends Spider {
|
||||||
|
|
||||||
|
private HashMap<String, String> getHeaders() {
|
||||||
|
HashMap<String, String> headers = new HashMap<>();
|
||||||
|
headers.put("User-Agent", rule.getUa().isEmpty() ? "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.54 Safari/537.36" : rule.getUa());
|
||||||
|
return headers;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void init(Context context) {
|
public void init(Context context) {
|
||||||
super.init(context);
|
super.init(context);
|
||||||
|
|
@ -35,20 +43,15 @@ public class XPath extends Spider {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String homeContent(boolean filter) {
|
public String homeContent(boolean filter) {
|
||||||
try {
|
|
||||||
fetchRule();
|
fetchRule();
|
||||||
JSONObject result = new JSONObject();
|
List<Vod> list = new ArrayList<>();
|
||||||
JSONArray classes = new JSONArray();
|
List<Class> classes = new ArrayList<>();
|
||||||
if (rule.getCateManual().size() > 0) {
|
if (rule.getCateManual().size() > 0) {
|
||||||
Set<String> keys = rule.getCateManual().keySet();
|
Set<String> keys = rule.getCateManual().keySet();
|
||||||
for (String k : keys) {
|
for (String k : keys) {
|
||||||
JSONObject jsonObject = new JSONObject();
|
classes.add(new Class(rule.getCateManual().get(k), k));
|
||||||
jsonObject.put("type_name", k);
|
|
||||||
jsonObject.put("type_id", rule.getCateManual().get(k));
|
|
||||||
classes.put(jsonObject);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
try {
|
|
||||||
String webUrl = rule.getHomeUrl();
|
String webUrl = rule.getHomeUrl();
|
||||||
JXDocument doc = JXDocument.create(fetch(webUrl));
|
JXDocument doc = JXDocument.create(fetch(webUrl));
|
||||||
if (rule.getCateManual().size() == 0) {
|
if (rule.getCateManual().size() == 0) {
|
||||||
|
|
@ -58,15 +61,10 @@ public class XPath extends Spider {
|
||||||
name = rule.getCateNameR(name);
|
name = rule.getCateNameR(name);
|
||||||
String id = navNodes.get(i).selOne(rule.getCateId()).asString().trim();
|
String id = navNodes.get(i).selOne(rule.getCateId()).asString().trim();
|
||||||
id = rule.getCateIdR(id);
|
id = rule.getCateIdR(id);
|
||||||
JSONObject jsonObject = new JSONObject();
|
classes.add(new Class(id, name));
|
||||||
jsonObject.put("type_id", id);
|
|
||||||
jsonObject.put("type_name", name);
|
|
||||||
classes.put(jsonObject);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!rule.getHomeVodNode().isEmpty()) {
|
if (!rule.getHomeVodNode().isEmpty()) {
|
||||||
try {
|
|
||||||
JSONArray videos = new JSONArray();
|
|
||||||
List<JXNode> vodNodes = doc.selN(rule.getHomeVodNode());
|
List<JXNode> vodNodes = doc.selN(rule.getHomeVodNode());
|
||||||
for (int i = 0; i < vodNodes.size(); i++) {
|
for (int i = 0; i < vodNodes.size(); i++) {
|
||||||
String name = vodNodes.get(i).selOne(rule.getHomeVodName()).asString().trim();
|
String name = vodNodes.get(i).selOne(rule.getHomeVodName()).asString().trim();
|
||||||
|
|
@ -85,47 +83,10 @@ public class XPath extends Spider {
|
||||||
SpiderDebug.log(e);
|
SpiderDebug.log(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
JSONObject v = new JSONObject();
|
list.add(new Vod(id, name, pic, mark));
|
||||||
v.put("vod_id", id);
|
|
||||||
v.put("vod_name", name);
|
|
||||||
v.put("vod_pic", pic);
|
|
||||||
v.put("vod_remarks", mark);
|
|
||||||
videos.put(v);
|
|
||||||
}
|
|
||||||
result.put("list", videos);
|
|
||||||
} catch (Exception e) {
|
|
||||||
SpiderDebug.log(e);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (Exception e) {
|
return Result.string(classes, list, rule.getFilter());
|
||||||
SpiderDebug.log(e);
|
|
||||||
}
|
|
||||||
result.put("class", classes);
|
|
||||||
if (filter && rule.getFilter() != null) {
|
|
||||||
result.put("filters", rule.getFilter());
|
|
||||||
}
|
|
||||||
return result.toString();
|
|
||||||
} catch (
|
|
||||||
Exception e) {
|
|
||||||
SpiderDebug.log(e);
|
|
||||||
}
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
protected HashMap<String, String> getHeaders() {
|
|
||||||
HashMap<String, String> headers = new HashMap<>();
|
|
||||||
headers.put("User-Agent", rule.getUa().isEmpty() ? "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.54 Safari/537.36" : rule.getUa());
|
|
||||||
return headers;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String homeVideoContent() {
|
|
||||||
try {
|
|
||||||
fetchRule();
|
|
||||||
} catch (Exception e) {
|
|
||||||
SpiderDebug.log(e);
|
|
||||||
}
|
|
||||||
return "";
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected String categoryUrl(String tid, String pg, boolean filter, HashMap<String, String> extend) {
|
protected String categoryUrl(String tid, String pg, boolean filter, HashMap<String, String> extend) {
|
||||||
|
|
@ -134,10 +95,9 @@ public class XPath extends Spider {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String categoryContent(String tid, String pg, boolean filter, HashMap<String, String> extend) {
|
public String categoryContent(String tid, String pg, boolean filter, HashMap<String, String> extend) {
|
||||||
try {
|
|
||||||
fetchRule();
|
fetchRule();
|
||||||
|
List<Vod> list = new ArrayList<>();
|
||||||
String webUrl = categoryUrl(tid, pg, filter, extend);
|
String webUrl = categoryUrl(tid, pg, filter, extend);
|
||||||
JSONArray videos = new JSONArray();
|
|
||||||
JXDocument doc = JXDocument.create(fetch(webUrl));
|
JXDocument doc = JXDocument.create(fetch(webUrl));
|
||||||
List<JXNode> vodNodes = doc.selN(rule.getCateVodNode());
|
List<JXNode> vodNodes = doc.selN(rule.getCateVodNode());
|
||||||
for (int i = 0; i < vodNodes.size(); i++) {
|
for (int i = 0; i < vodNodes.size(); i++) {
|
||||||
|
|
@ -157,44 +117,24 @@ public class XPath extends Spider {
|
||||||
SpiderDebug.log(e);
|
SpiderDebug.log(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
JSONObject v = new JSONObject();
|
list.add(new Vod(id, name, pic, mark));
|
||||||
v.put("vod_id", id);
|
|
||||||
v.put("vod_name", name);
|
|
||||||
v.put("vod_pic", pic);
|
|
||||||
v.put("vod_remarks", mark);
|
|
||||||
videos.put(v);
|
|
||||||
}
|
}
|
||||||
JSONObject result = new JSONObject();
|
return Result.string(list);
|
||||||
result.put("page", pg);
|
|
||||||
result.put("pagecount", Integer.MAX_VALUE);
|
|
||||||
result.put("limit", 90);
|
|
||||||
result.put("total", Integer.MAX_VALUE);
|
|
||||||
result.put("list", videos);
|
|
||||||
return result.toString();
|
|
||||||
} catch (Exception e) {
|
|
||||||
SpiderDebug.log(e);
|
|
||||||
}
|
|
||||||
return "";
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String detailContent(List<String> ids) {
|
public String detailContent(List<String> ids) {
|
||||||
try {
|
|
||||||
fetchRule();
|
fetchRule();
|
||||||
String webUrl = rule.getDetailUrl().replace("{vid}", ids.get(0));
|
String webUrl = rule.getDetailUrl().replace("{vid}", ids.get(0));
|
||||||
String webContent = fetch(webUrl);
|
String webContent = fetch(webUrl);
|
||||||
JXDocument doc = JXDocument.create(webContent);
|
JXDocument doc = JXDocument.create(webContent);
|
||||||
JXNode vodNode = doc.selNOne(rule.getDetailNode());
|
JXNode vodNode = doc.selNOne(rule.getDetailNode());
|
||||||
|
|
||||||
String cover = "", title = "", desc = "", category = "", area = "", year = "", remark = "", director = "", actor = "";
|
String cover = "", title = "", desc = "", category = "", area = "", year = "", remark = "", director = "", actor = "";
|
||||||
|
|
||||||
title = vodNode.selOne(rule.getDetailName()).asString().trim();
|
title = vodNode.selOne(rule.getDetailName()).asString().trim();
|
||||||
title = rule.getDetailNameR(title);
|
title = rule.getDetailNameR(title);
|
||||||
|
|
||||||
cover = vodNode.selOne(rule.getDetailImg()).asString().trim();
|
cover = vodNode.selOne(rule.getDetailImg()).asString().trim();
|
||||||
cover = rule.getDetailImgR(cover);
|
cover = rule.getDetailImgR(cover);
|
||||||
cover = Utils.fixUrl(webUrl, cover);
|
cover = Utils.fixUrl(webUrl, cover);
|
||||||
|
|
||||||
if (!rule.getDetailCate().isEmpty()) {
|
if (!rule.getDetailCate().isEmpty()) {
|
||||||
try {
|
try {
|
||||||
category = vodNode.selOne(rule.getDetailCate()).asString().trim();
|
category = vodNode.selOne(rule.getDetailCate()).asString().trim();
|
||||||
|
|
@ -252,20 +192,19 @@ public class XPath extends Spider {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
JSONObject vod = new JSONObject();
|
Vod vod = new Vod();
|
||||||
vod.put("vod_id", ids.get(0));
|
vod.setVodId(ids.get(0));
|
||||||
vod.put("vod_name", title);
|
vod.setVodName(title);
|
||||||
vod.put("vod_pic", cover);
|
vod.setVodPic(cover);
|
||||||
vod.put("type_name", category);
|
vod.setTypeName(category);
|
||||||
vod.put("vod_year", year);
|
vod.setVodYear(year);
|
||||||
vod.put("vod_area", area);
|
vod.setVodArea(area);
|
||||||
vod.put("vod_remarks", remark);
|
vod.setVodRemarks(remark);
|
||||||
vod.put("vod_actor", actor);
|
vod.setVodActor(actor);
|
||||||
vod.put("vod_director", director);
|
vod.setVodDirector(director);
|
||||||
vod.put("vod_content", desc);
|
vod.setVodContent(desc);
|
||||||
|
|
||||||
ArrayList<String> playFrom = new ArrayList<>();
|
ArrayList<String> playFrom = new ArrayList<>();
|
||||||
|
|
||||||
List<JXNode> fromNodes = doc.selN(rule.getDetailFromNode());
|
List<JXNode> fromNodes = doc.selN(rule.getDetailFromNode());
|
||||||
for (int i = 0; i < fromNodes.size(); i++) {
|
for (int i = 0; i < fromNodes.size(); i++) {
|
||||||
String name = fromNodes.get(i).selOne(rule.getDetailFromName()).asString().trim();
|
String name = fromNodes.get(i).selOne(rule.getDetailFromName()).asString().trim();
|
||||||
|
|
@ -293,31 +232,17 @@ public class XPath extends Spider {
|
||||||
}
|
}
|
||||||
// 排除播放列表為空的播放源
|
// 排除播放列表為空的播放源
|
||||||
for (int i = playFrom.size() - 1; i >= 0; i--) {
|
for (int i = playFrom.size() - 1; i >= 0; i--) {
|
||||||
if (playFrom.get(i).isEmpty())
|
if (playFrom.get(i).isEmpty()) playFrom.remove(i);
|
||||||
playFrom.remove(i);
|
|
||||||
}
|
}
|
||||||
for (int i = playList.size() - 1; i >= 0; i--) {
|
for (int i = playList.size() - 1; i >= 0; i--) {
|
||||||
if (playList.get(i).isEmpty())
|
if (playList.get(i).isEmpty()) playList.remove(i);
|
||||||
playList.remove(i);
|
|
||||||
}
|
}
|
||||||
for (int i = playList.size() - 1; i >= 0; i--) {
|
for (int i = playList.size() - 1; i >= 0; i--) {
|
||||||
if (i >= playFrom.size())
|
if (i >= playFrom.size()) playList.remove(i);
|
||||||
playList.remove(i);
|
|
||||||
}
|
}
|
||||||
String vod_play_from = TextUtils.join("$$$", playFrom);
|
vod.setVodPlayFrom(TextUtils.join("$$$", playFrom));
|
||||||
String vod_play_url = TextUtils.join("$$$", playList);
|
vod.setVodPlayUrl(TextUtils.join("$$$", playList));
|
||||||
vod.put("vod_play_from", vod_play_from);
|
return Result.string(vod);
|
||||||
vod.put("vod_play_url", vod_play_url);
|
|
||||||
|
|
||||||
JSONObject result = new JSONObject();
|
|
||||||
JSONArray list = new JSONArray();
|
|
||||||
list.put(vod);
|
|
||||||
result.put("list", list);
|
|
||||||
return result.toString();
|
|
||||||
} catch (Exception e) {
|
|
||||||
SpiderDebug.log(e);
|
|
||||||
}
|
|
||||||
return "";
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
@ -332,16 +257,12 @@ public class XPath extends Spider {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String searchContent(String key, boolean quick) {
|
public String searchContent(String key, boolean quick) throws Exception {
|
||||||
try {
|
|
||||||
fetchRule();
|
fetchRule();
|
||||||
if (rule.getSearchUrl().isEmpty()) {
|
if (rule.getSearchUrl().isEmpty()) return "";
|
||||||
return "";
|
|
||||||
}
|
|
||||||
String webUrl = rule.getSearchUrl().replace("{wd}", URLEncoder.encode(key));
|
String webUrl = rule.getSearchUrl().replace("{wd}", URLEncoder.encode(key));
|
||||||
String webContent = fetch(webUrl);
|
String webContent = fetch(webUrl);
|
||||||
JSONObject result = new JSONObject();
|
List<Vod> list = new ArrayList<>();
|
||||||
JSONArray videos = new JSONArray();
|
|
||||||
if (rule.getSearchVodNode().startsWith("json:")) {
|
if (rule.getSearchVodNode().startsWith("json:")) {
|
||||||
String[] node = rule.getSearchVodNode().substring(5).split(">");
|
String[] node = rule.getSearchVodNode().substring(5).split(">");
|
||||||
JSONObject data = new JSONObject(webContent);
|
JSONObject data = new JSONObject(webContent);
|
||||||
|
|
@ -359,12 +280,7 @@ public class XPath extends Spider {
|
||||||
pic = Utils.fixUrl(webUrl, pic);
|
pic = Utils.fixUrl(webUrl, pic);
|
||||||
String mark = vod.optString(rule.getSearchVodMark()).trim();
|
String mark = vod.optString(rule.getSearchVodMark()).trim();
|
||||||
mark = rule.getSearchVodMarkR(mark);
|
mark = rule.getSearchVodMarkR(mark);
|
||||||
JSONObject v = new JSONObject();
|
list.add(new Vod(id, name, pic, mark));
|
||||||
v.put("vod_id", id);
|
|
||||||
v.put("vod_name", name);
|
|
||||||
v.put("vod_pic", pic);
|
|
||||||
v.put("vod_remarks", mark);
|
|
||||||
videos.put(v);
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
data = data.getJSONObject(node[i]);
|
data = data.getJSONObject(node[i]);
|
||||||
|
|
@ -390,21 +306,10 @@ public class XPath extends Spider {
|
||||||
SpiderDebug.log(e);
|
SpiderDebug.log(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
JSONObject v = new JSONObject();
|
list.add(new Vod(id, name, pic, mark));
|
||||||
v.put("vod_id", id);
|
|
||||||
v.put("vod_name", name);
|
|
||||||
v.put("vod_pic", pic);
|
|
||||||
v.put("vod_remarks", mark);
|
|
||||||
videos.put(v);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
result.put("list", videos);
|
return Result.string(list);
|
||||||
return result.toString();
|
|
||||||
} catch (
|
|
||||||
Exception e) {
|
|
||||||
SpiderDebug.log(e);
|
|
||||||
}
|
|
||||||
return "";
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
@ -412,20 +317,9 @@ public class XPath extends Spider {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
private final String[] videoFormatList = new String[]{".m3u8", ".mp4", ".mpeg", ".flv"};
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean isVideoFormat(String url) {
|
public boolean isVideoFormat(String url) {
|
||||||
url = url.toLowerCase();
|
return Utils.isVideoFormat(url);
|
||||||
if (url.contains("=http") || url.contains("=https") || url.contains("=https%3a%2f") || url.contains("=http%3a%2f")) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
for (String format : videoFormatList) {
|
|
||||||
if (url.contains(format)) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected String ext = null;
|
protected String ext = null;
|
||||||
|
|
@ -447,7 +341,6 @@ public class XPath extends Spider {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void loadRuleExt(String json) {
|
protected void loadRuleExt(String json) {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected String fetch(String webUrl) {
|
protected String fetch(String webUrl) {
|
||||||
|
|
|
||||||
|
|
@ -89,7 +89,6 @@ public class XPathMac extends XPath {
|
||||||
SpiderDebug.log(e);
|
SpiderDebug.log(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// SpiderDebug.log(webContent);
|
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -30,6 +30,7 @@ public class Utils {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean isVideoFormat(String url) {
|
public static boolean isVideoFormat(String url) {
|
||||||
|
if (url.contains("url=http") || url.contains(".js") || url.contains(".css") || url.contains(".html")) return false;
|
||||||
return Sniffer.RULE.matcher(url).find();
|
return Sniffer.RULE.matcher(url).find();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue