Add files via upload

This commit is contained in:
zxyadc 2024-03-23 19:02:40 +08:00 committed by GitHub
parent 0520c7ce20
commit 623e646c1f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 582 additions and 0 deletions

View File

@ -0,0 +1,73 @@
package com.github.catvod.parser;
import android.util.Base64;
import com.github.catvod.crawler.SpiderDebug;
import com.github.catvod.utils.Misc;
import com.github.catvod.utils.okhttp.OkHttpUtil;
import org.json.JSONObject;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Set;
public class JsonBasic {
public static JSONObject parse(LinkedHashMap<String, String> jx, String url) {
try {
SpiderDebug.log("Load Json Parse Basic...");
if (jx.size() > 0) {
Set<String> jxNames = jx.keySet();
for (String jxName : jxNames) {
String parseUrl = jx.get(jxName);
HashMap<String, String> reqHeaders = getReqHeader(parseUrl);
try {
String realUrl = reqHeaders.get("url");
reqHeaders.remove("url");
SpiderDebug.log(realUrl + url);
String json = OkHttpUtil.string(realUrl + url, reqHeaders);
JSONObject taskResult = Misc.jsonParse(url, json);
if (taskResult == null)
continue;
taskResult.put("jxFrom", jxName);
SpiderDebug.log(taskResult.toString());
return taskResult;
} catch (Throwable th) {
SpiderDebug.log(th);
}
}
}
} catch (Throwable th) {
SpiderDebug.log(th);
}
return new JSONObject();
}
public static HashMap<String, String> getReqHeader(String url) {
HashMap<String, String> reqHeaders = new HashMap<>();
reqHeaders.put("url", url);
if (url.contains("cat_ext")) {
try {
int start = url.indexOf("cat_ext=");
int end = url.indexOf("&", start);
String ext = url.substring(start + 8, end);
ext = new String(Base64.decode(ext, Base64.DEFAULT | Base64.URL_SAFE | Base64.NO_WRAP));
String newUrl = url.substring(0, start) + url.substring(end + 1);
JSONObject jsonObject = new JSONObject(ext);
if (jsonObject.has("header")) {
JSONObject headerJson = jsonObject.optJSONObject("header");
Iterator<String> keys = headerJson.keys();
while (keys.hasNext()) {
String key = keys.next();
reqHeaders.put(key, headerJson.optString(key, ""));
}
}
reqHeaders.put("url", newUrl);
} catch (Throwable th) {
}
}
return reqHeaders;
}
}

View File

@ -0,0 +1,94 @@
package com.github.catvod.parser;
import com.github.catvod.crawler.SpiderDebug;
import com.github.catvod.utils.Misc;
import com.github.catvod.utils.okhttp.OkHttpUtil;
import org.json.JSONObject;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.CompletionService;
import java.util.concurrent.ExecutorCompletionService;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
/**
* 并发解析直到获得第一个结果
* <p>
* 默认解析超时时间为15秒如果需要请自定义SpiderReq的HttpClient
* <p>
* Author: CatVod
*/
public class JsonParallel {
private static final String ParseOKTag = "p_json_parse";
public static JSONObject parse(LinkedHashMap<String, String> jx, String url) {
try {
if (jx.size() > 0) {
ExecutorService executorService = Executors.newFixedThreadPool(3);
CompletionService<JSONObject> completionService = new ExecutorCompletionService<JSONObject>(executorService);
List<Future<JSONObject>> futures = new ArrayList<>();
Set<String> jxNames = jx.keySet();
for (String jxName : jxNames) {
String parseUrl = jx.get(jxName);
futures.add(completionService.submit(new Callable<JSONObject>() {
@Override
public JSONObject call() throws Exception {
try {
HashMap<String, String> reqHeaders = JsonBasic.getReqHeader(parseUrl);
String realUrl = reqHeaders.get("url");
reqHeaders.remove("url");
SpiderDebug.log(realUrl + url);
String json = OkHttpUtil.string(realUrl + url, ParseOKTag, reqHeaders);
JSONObject taskResult = Misc.jsonParse(url, json);
taskResult.put("jxFrom", jxName);
SpiderDebug.log(taskResult.toString());
return taskResult;
} catch (Throwable th) {
SpiderDebug.log(th);
return null;
}
}
}));
}
JSONObject pTaskResult = null;
for (int i = 0; i < futures.size(); ++i) {
Future<JSONObject> completed = completionService.take();
try {
pTaskResult = completed.get();
if (pTaskResult != null) {
OkHttpUtil.cancel(ParseOKTag);
for (int j = 0; j < futures.size(); j++) {
try {
futures.get(j).cancel(true);
} catch (Throwable th) {
SpiderDebug.log(th);
}
}
futures.clear();
break;
}
} catch (Throwable th) {
SpiderDebug.log(th);
}
}
try {
executorService.shutdownNow();
} catch (Throwable th) {
SpiderDebug.log(th);
}
if (pTaskResult != null)
return pTaskResult;
}
} catch (Throwable th) {
SpiderDebug.log(th);
}
return new JSONObject();
}
}

View File

@ -0,0 +1,48 @@
package com.github.catvod.parser;
import com.github.catvod.crawler.SpiderDebug;
import com.github.catvod.utils.Misc;
import com.github.catvod.utils.okhttp.OkHttpUtil;
import org.json.JSONObject;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Set;
/**
* 依次解析失败尝试下一个
* <p>
* 默认解析超时时间为15秒如果需要请自定义SpiderReq的HttpClient
* <p>
* Author: CatVod
*/
public class JsonSequence {
public static JSONObject parse(LinkedHashMap<String, String> jx, String url) {
try {
if (jx.size() > 0) {
Set<String> jxNames = jx.keySet();
for (String jxName : jxNames) {
String parseUrl = jx.get(jxName);
try {
HashMap<String, String> reqHeaders = JsonBasic.getReqHeader(parseUrl);
String realUrl = reqHeaders.get("url");
reqHeaders.remove("url");
SpiderDebug.log(realUrl + url);
String json = OkHttpUtil.string(realUrl + url, reqHeaders);
JSONObject taskResult = Misc.jsonParse(url, json);
if (taskResult == null)
continue;
taskResult.put("jxFrom", jxName);
return taskResult;
} catch (Throwable th) {
SpiderDebug.log(th);
}
}
}
} catch (Throwable th) {
SpiderDebug.log(th);
}
return new JSONObject();
}
}

View File

@ -0,0 +1,189 @@
package com.github.catvod.parser;
import android.util.Base64;
import com.github.catvod.crawler.SpiderDebug;
import org.json.JSONArray;
import org.json.JSONObject;
import java.io.ByteArrayInputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
public class MixDemo {
/*
"parses": [
{
"name": "聚合",
"type": 3,
"url": "Demo"
},
{
"name": "解析",
"type": 1,
"url": "https://192.168.10.88/jx.php?url=",
"ext": {
"flag": [
"qq",
"iqiyi",
"qiyi",
"爱奇艺",
"腾讯",
"letv",
"sohu",
"tudou",
"pptv",
"mgtv",
"wasu",
"bilibili"
]
}
}]
*/
public static HashMap<String, ArrayList<String>> flagWebJx = new HashMap<>();
static HashMap<String, ArrayList<String>> configs = null;
public static JSONObject parse(LinkedHashMap<String, HashMap<String, String>> jx, String nameMe, String flag, String url) {
try {
if (configs == null) {
configs = new HashMap<>();
Iterator<String> keys = jx.keySet().iterator();
while (keys.hasNext()) {
String key = keys.next();
HashMap<String, String> parseBean = jx.get(key);
String type = parseBean.get("type");
if (type.equals("1") || type.equals("0")) {
try {
JSONArray flags = new JSONObject(parseBean.get("ext")).getJSONArray("flag");
for (int j = 0; j < flags.length(); j++) {
String flagKey = flags.getString(j);
ArrayList<String> flagJx = configs.get(flagKey);
if (flagJx == null) {
flagJx = new ArrayList<>();
configs.put(flagKey, flagJx);
}
flagJx.add(key);
}
} catch (Throwable th) {
}
}
}
}
// 通过上面的配置获得解析列表
LinkedHashMap<String, String> jsonJx = new LinkedHashMap<>();
ArrayList<String> webJx = new ArrayList<>();
ArrayList<String> flagJx = configs.get(flag);
if (flagJx != null && !flagJx.isEmpty()) {
for (int i = 0; i < flagJx.size(); i++) {
String key = flagJx.get(i);
HashMap<String, String> parseBean = jx.get(key);
String type = parseBean.get("type");
if (type.equals("1")) {
jsonJx.put(key, mixUrl(parseBean.get("url"), parseBean.get("ext")));
} else if (type.equals("0")) {
webJx.add(parseBean.get("url"));
}
}
} else {
Iterator<String> keys = jx.keySet().iterator();
while (keys.hasNext()) {
String key = keys.next();
HashMap<String, String> parseBean = jx.get(key);
String type = parseBean.get("type");
if (type.equals("1")) {
jsonJx.put(key, mixUrl(parseBean.get("url"), parseBean.get("ext")));
} else if (type.equals("0")) {
webJx.add(parseBean.get("url"));
}
}
}
if (!webJx.isEmpty()) {
flagWebJx.put(flag, webJx);
}
// 优先使用json并发解析
JSONObject jsonResult = JsonParallel.parse(jsonJx, url);
if (jsonResult != null && jsonResult.has("url")) {
return jsonResult;
}
// json解析没有得到结果 用webview解析
if (!webJx.isEmpty()) {
JSONObject webResult = new JSONObject();
webResult.put("url", "proxy://do=MixDemo&flag=" + flag + "&url=" + Base64.encodeToString(url.getBytes(), Base64.DEFAULT | Base64.URL_SAFE | Base64.NO_WRAP));
webResult.put("parse", 1);
return webResult;
}
} catch (Throwable th) {
SpiderDebug.log(th);
}
return new JSONObject();
}
private static String mixUrl(String url, String ext) {
if (ext.trim().length() > 0) {
int idx = url.indexOf("?");
if (idx > 0) {
return url.substring(0, idx + 1) + "cat_ext=" + Base64.encodeToString(ext.getBytes(), Base64.DEFAULT | Base64.URL_SAFE | Base64.NO_WRAP) + "&" + url.substring(idx + 1);
}
}
return url;
}
public static Object[] loadHtml(String flag, String url) {
try {
url = new String(Base64.decode(url, Base64.DEFAULT | Base64.URL_SAFE | Base64.NO_WRAP), "UTF-8");
String html = "\n" +
"<!doctype html>\n" +
"<html>\n" +
"<head>\n" +
"<title>解析</title>\n" +
"<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />\n" +
"<meta http-equiv=\"X-UA-Compatible\" content=\"IE=EmulateIE10\" />\n" +
"<meta name=\"renderer\" content=\"webkit|ie-comp|ie-stand\">\n" +
"<meta name=\"viewport\" content=\"width=device-width\">\n" +
"</head>\n" +
"<body>\n" +
"<script>\n" +
"var apiArray=[#jxs#];\n" +
"var urlPs=\"#url#\";\n" +
"var iframeHtml=\"\";\n" +
"for(var i=0;i<apiArray.length;i++){\n" +
"var URL=apiArray[i]+urlPs;\n" +
"iframeHtml=iframeHtml+\"<iframe sandbox='allow-scripts allow-same-origin allow-forms' frameborder='0' allowfullscreen='true' webkitallowfullscreen='true' mozallowfullscreen='true' src=\"+URL+\"></iframe>\";\n" +
"}\n" +
"document.write(iframeHtml);\n" +
"</script>\n" +
"</body>\n" +
"</html>";
StringBuilder jxs = new StringBuilder();
if (flagWebJx.containsKey(flag)) {
ArrayList<String> jxUrls = flagWebJx.get(flag);
for (int i = 0; i < jxUrls.size(); i++) {
jxs.append("\"");
jxs.append(jxUrls.get(i));
jxs.append("\"");
if (i < jxUrls.size() - 1) {
jxs.append(",");
}
}
}
html = html.replace("#url#", url).replace("#jxs#", jxs.toString());
Object[] result = new Object[3];
result[0] = 200;
result[1] = "text/html; charset=\"UTF-8\"";
ByteArrayInputStream baos = new ByteArrayInputStream(html.toString().getBytes("UTF-8"));
result[2] = baos;
return result;
} catch (Throwable th) {
th.printStackTrace();
}
return null;
}
}

View File

@ -0,0 +1,178 @@
package com.github.catvod.parser;
import android.util.Base64;
import com.github.catvod.crawler.SpiderDebug;
import com.github.catvod.utils.Misc;
import org.json.JSONArray;
import org.json.JSONObject;
import java.io.ByteArrayInputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
public class MixWeb {
/*
"parses": [
{
"name": "聚合",
"type": 3,
"url": "Demo"
},
{
"name": "解析",
"type": 1,
"url": "https://192.168.10.88/jx.php?url=",
"ext": {
"flag": [
"qq",
"iqiyi",
"qiyi",
"爱奇艺",
"腾讯",
"letv",
"sohu",
"tudou",
"pptv",
"mgtv",
"wasu",
"bilibili"
]
}
}]
*/
public static HashMap<String, ArrayList<String>> flagWebJx = new HashMap<>();
static HashMap<String, ArrayList<String>> configs = null;
public static JSONObject parse(LinkedHashMap<String, HashMap<String, String>> jx, String nameMe, String flag, String url) {
try {
if (configs == null) {
configs = new HashMap<>();
Iterator<String> keys = jx.keySet().iterator();
while (keys.hasNext()) {
String key = keys.next();
HashMap<String, String> parseBean = jx.get(key);
String type = parseBean.get("type");
if (type.equals("0")) {
try {
//取出单个解析的flags列表
JSONArray flags = new JSONObject(parseBean.get("ext")).getJSONArray("flag");
for (int j = 0; j < flags.length(); j++) {
//获取单个flags
String flagKey = flags.getString(j);
//取出configs中对应flags的解析
ArrayList<String> flagJx = configs.get(flagKey);
//如果configs中没有这个flags则插入一个空的值
if (flagJx == null) {
flagJx = new ArrayList<>();
configs.put(flagKey, flagJx);
}
//如果有就在flagJx中增加解析
flagJx.add(key);
}
} catch (Throwable th) {
}
}
}
}
// 通过上面的配置获得解析列表
ArrayList<String> webJx = new ArrayList<>();
ArrayList<String> flagJx = configs.get(flag);
//如果flagJx不为空
if (flagJx != null && !flagJx.isEmpty()) {
for (int i = 0; i < flagJx.size(); i++) {
String key = flagJx.get(i);
HashMap<String, String> parseBean = jx.get(key);
String type = parseBean.get("type");
if (type.equals("0")) {
//将json解析放入webJx
webJx.add(parseBean.get("url"));
}
}
} else {
Iterator<String> keys = jx.keySet().iterator();
while (keys.hasNext()) {
String key = keys.next();
HashMap<String, String> parseBean = jx.get(key);
String type = parseBean.get("type");
if (type.equals("0")) {
webJx.add(parseBean.get("url"));
}
}
}
if (!webJx.isEmpty()) {
flagWebJx.put(flag, webJx);
}
// json解析没有得到结果 用webview解析
if (!webJx.isEmpty()) {
JSONObject webResult = new JSONObject();
webResult.put("url", "proxy://do=MixWeb&flag=" + flag + "&url=" + Base64.encodeToString(url.getBytes(), Base64.DEFAULT | Base64.URL_SAFE | Base64.NO_WRAP));
webResult.put("parse", 1);
webResult.put("ua", Misc.UaWinChrome);
return webResult;
}
} catch (Throwable th) {
SpiderDebug.log(th);
}
return new JSONObject();
}
public static Object[] loadHtml(String flag, String url) {
try {
url = new String(Base64.decode(url, Base64.DEFAULT | Base64.URL_SAFE | Base64.NO_WRAP), "UTF-8");
String html = "\n" +
"<!doctype html>\n" +
"<html>\n" +
"<head>\n" +
"<title>解析</title>\n" +
"<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />\n" +
"<meta http-equiv=\"X-UA-Compatible\" content=\"IE=EmulateIE10\" />\n" +
"<meta name=\"renderer\" content=\"webkit|ie-comp|ie-stand\">\n" +
"<meta name=\"viewport\" content=\"width=device-width\">\n" +
"</head>\n" +
"<body>\n" +
"<script>\n" +
"var apiArray=[#jxs#];\n" +
"var urlPs=\"#url#\";\n" +
"var iframeHtml=\"\";\n" +
"for(var i=0;i<apiArray.length;i++){\n" +
"var URL=apiArray[i]+urlPs;\n" +
"iframeHtml=iframeHtml+\"<iframe sandbox='allow-scripts allow-same-origin allow-forms' frameborder='0' allowfullscreen='true' webkitallowfullscreen='true' mozallowfullscreen='true' src=\"+URL+\"></iframe>\";\n" +
"}\n" +
"document.write(iframeHtml);\n" +
"</script>\n" +
"</body>\n" +
"</html>";
StringBuilder jxs = new StringBuilder();
if (flagWebJx.containsKey(flag)) {
ArrayList<String> jxUrls = flagWebJx.get(flag);
for (int i = 0; i < jxUrls.size(); i++) {
jxs.append("\"");
jxs.append(jxUrls.get(i));
jxs.append("\"");
if (i < jxUrls.size() - 1) {
jxs.append(",");
}
}
}
html = html.replace("#url#", url).replace("#jxs#", jxs.toString());
Object[] result = new Object[3];
result[0] = 200;
result[1] = "text/html; charset=\"UTF-8\"";
ByteArrayInputStream baos = new ByteArrayInputStream(html.toString().getBytes("UTF-8"));
result[2] = baos;
return result;
} catch (Throwable th) {
th.printStackTrace();
}
return null;
}
}