diff --git a/app/src/main/java/com/github/catvod/parser/JsonBasic.java b/app/src/main/java/com/github/catvod/parser/JsonBasic.java new file mode 100644 index 00000000..940001b7 --- /dev/null +++ b/app/src/main/java/com/github/catvod/parser/JsonBasic.java @@ -0,0 +1,73 @@ +package com.github.catvod.parser; + +import android.util.Base64; + +import com.github.catvod.crawler.SpiderDebug; +import com.github.catvod.utils.Misc; +import com.github.catvod.utils.okhttp.OkHttpUtil; + +import org.json.JSONObject; + +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.Set; + +public class JsonBasic { + public static JSONObject parse(LinkedHashMap jx, String url) { + try { + SpiderDebug.log("Load Json Parse Basic..."); + if (jx.size() > 0) { + Set jxNames = jx.keySet(); + for (String jxName : jxNames) { + String parseUrl = jx.get(jxName); + HashMap reqHeaders = getReqHeader(parseUrl); + try { + String realUrl = reqHeaders.get("url"); + reqHeaders.remove("url"); + SpiderDebug.log(realUrl + url); + String json = OkHttpUtil.string(realUrl + url, reqHeaders); + JSONObject taskResult = Misc.jsonParse(url, json); + if (taskResult == null) + continue; + taskResult.put("jxFrom", jxName); + SpiderDebug.log(taskResult.toString()); + return taskResult; + } catch (Throwable th) { + SpiderDebug.log(th); + } + } + } + } catch (Throwable th) { + SpiderDebug.log(th); + } + return new JSONObject(); + } + + public static HashMap getReqHeader(String url) { + HashMap reqHeaders = new HashMap<>(); + reqHeaders.put("url", url); + if (url.contains("cat_ext")) { + try { + int start = url.indexOf("cat_ext="); + int end = url.indexOf("&", start); + String ext = url.substring(start + 8, end); + ext = new String(Base64.decode(ext, Base64.DEFAULT | Base64.URL_SAFE | Base64.NO_WRAP)); + String newUrl = url.substring(0, start) + url.substring(end + 1); + JSONObject jsonObject = new JSONObject(ext); + if (jsonObject.has("header")) { + JSONObject headerJson = jsonObject.optJSONObject("header"); + Iterator keys = headerJson.keys(); + while (keys.hasNext()) { + String key = keys.next(); + reqHeaders.put(key, headerJson.optString(key, "")); + } + } + reqHeaders.put("url", newUrl); + } catch (Throwable th) { + + } + } + return reqHeaders; + } +} diff --git a/app/src/main/java/com/github/catvod/parser/JsonParallel.java b/app/src/main/java/com/github/catvod/parser/JsonParallel.java new file mode 100644 index 00000000..c8efa1ce --- /dev/null +++ b/app/src/main/java/com/github/catvod/parser/JsonParallel.java @@ -0,0 +1,94 @@ +package com.github.catvod.parser; + +import com.github.catvod.crawler.SpiderDebug; +import com.github.catvod.utils.Misc; +import com.github.catvod.utils.okhttp.OkHttpUtil; + +import org.json.JSONObject; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Set; +import java.util.concurrent.Callable; +import java.util.concurrent.CompletionService; +import java.util.concurrent.ExecutorCompletionService; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; + +/** + * 并发解析,直到获得第一个结果 + *

+ * 默认解析超时时间为15秒,如果需要请自定义SpiderReq的HttpClient + *

+ * Author: CatVod + */ +public class JsonParallel { + private static final String ParseOKTag = "p_json_parse"; + + public static JSONObject parse(LinkedHashMap jx, String url) { + try { + if (jx.size() > 0) { + ExecutorService executorService = Executors.newFixedThreadPool(3); + CompletionService completionService = new ExecutorCompletionService(executorService); + List> futures = new ArrayList<>(); + Set jxNames = jx.keySet(); + for (String jxName : jxNames) { + String parseUrl = jx.get(jxName); + futures.add(completionService.submit(new Callable() { + @Override + public JSONObject call() throws Exception { + try { + HashMap reqHeaders = JsonBasic.getReqHeader(parseUrl); + String realUrl = reqHeaders.get("url"); + reqHeaders.remove("url"); + SpiderDebug.log(realUrl + url); + String json = OkHttpUtil.string(realUrl + url, ParseOKTag, reqHeaders); + JSONObject taskResult = Misc.jsonParse(url, json); + taskResult.put("jxFrom", jxName); + SpiderDebug.log(taskResult.toString()); + return taskResult; + } catch (Throwable th) { + SpiderDebug.log(th); + return null; + } + } + })); + } + JSONObject pTaskResult = null; + for (int i = 0; i < futures.size(); ++i) { + Future completed = completionService.take(); + try { + pTaskResult = completed.get(); + if (pTaskResult != null) { + OkHttpUtil.cancel(ParseOKTag); + for (int j = 0; j < futures.size(); j++) { + try { + futures.get(j).cancel(true); + } catch (Throwable th) { + SpiderDebug.log(th); + } + } + futures.clear(); + break; + } + } catch (Throwable th) { + SpiderDebug.log(th); + } + } + try { + executorService.shutdownNow(); + } catch (Throwable th) { + SpiderDebug.log(th); + } + if (pTaskResult != null) + return pTaskResult; + } + } catch (Throwable th) { + SpiderDebug.log(th); + } + return new JSONObject(); + } +} diff --git a/app/src/main/java/com/github/catvod/parser/JsonSequence.java b/app/src/main/java/com/github/catvod/parser/JsonSequence.java new file mode 100644 index 00000000..e850558b --- /dev/null +++ b/app/src/main/java/com/github/catvod/parser/JsonSequence.java @@ -0,0 +1,48 @@ +package com.github.catvod.parser; + +import com.github.catvod.crawler.SpiderDebug; +import com.github.catvod.utils.Misc; +import com.github.catvod.utils.okhttp.OkHttpUtil; + +import org.json.JSONObject; + +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.Set; + +/** + * 依次解析,失败尝试下一个 + *

+ * 默认解析超时时间为15秒,如果需要请自定义SpiderReq的HttpClient + *

+ * Author: CatVod + */ +public class JsonSequence { + public static JSONObject parse(LinkedHashMap jx, String url) { + try { + if (jx.size() > 0) { + Set jxNames = jx.keySet(); + for (String jxName : jxNames) { + String parseUrl = jx.get(jxName); + try { + HashMap reqHeaders = JsonBasic.getReqHeader(parseUrl); + String realUrl = reqHeaders.get("url"); + reqHeaders.remove("url"); + SpiderDebug.log(realUrl + url); + String json = OkHttpUtil.string(realUrl + url, reqHeaders); + JSONObject taskResult = Misc.jsonParse(url, json); + if (taskResult == null) + continue; + taskResult.put("jxFrom", jxName); + return taskResult; + } catch (Throwable th) { + SpiderDebug.log(th); + } + } + } + } catch (Throwable th) { + SpiderDebug.log(th); + } + return new JSONObject(); + } +} diff --git a/app/src/main/java/com/github/catvod/parser/MixDemo.java b/app/src/main/java/com/github/catvod/parser/MixDemo.java new file mode 100644 index 00000000..4db3f25e --- /dev/null +++ b/app/src/main/java/com/github/catvod/parser/MixDemo.java @@ -0,0 +1,189 @@ +package com.github.catvod.parser; + +import android.util.Base64; + +import com.github.catvod.crawler.SpiderDebug; + +import org.json.JSONArray; +import org.json.JSONObject; + +import java.io.ByteArrayInputStream; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedHashMap; + +public class MixDemo { + + /* + "parses": [ + { + "name": "聚合", + "type": 3, + "url": "Demo" + }, + { + "name": "解析", + "type": 1, + "url": "https://192.168.10.88/jx.php?url=", + "ext": { + "flag": [ + "qq", + "iqiyi", + "qiyi", + "爱奇艺", + "腾讯", + "letv", + "sohu", + "tudou", + "pptv", + "mgtv", + "wasu", + "bilibili" + ] + } + }] + */ + + public static HashMap> flagWebJx = new HashMap<>(); + + static HashMap> configs = null; + + public static JSONObject parse(LinkedHashMap> jx, String nameMe, String flag, String url) { + try { + if (configs == null) { + configs = new HashMap<>(); + Iterator keys = jx.keySet().iterator(); + while (keys.hasNext()) { + String key = keys.next(); + HashMap parseBean = jx.get(key); + String type = parseBean.get("type"); + if (type.equals("1") || type.equals("0")) { + try { + JSONArray flags = new JSONObject(parseBean.get("ext")).getJSONArray("flag"); + for (int j = 0; j < flags.length(); j++) { + String flagKey = flags.getString(j); + ArrayList flagJx = configs.get(flagKey); + if (flagJx == null) { + flagJx = new ArrayList<>(); + configs.put(flagKey, flagJx); + } + flagJx.add(key); + } + } catch (Throwable th) { + + } + } + } + } + // 通过上面的配置获得解析列表 + LinkedHashMap jsonJx = new LinkedHashMap<>(); + ArrayList webJx = new ArrayList<>(); + ArrayList flagJx = configs.get(flag); + if (flagJx != null && !flagJx.isEmpty()) { + for (int i = 0; i < flagJx.size(); i++) { + String key = flagJx.get(i); + HashMap parseBean = jx.get(key); + String type = parseBean.get("type"); + if (type.equals("1")) { + jsonJx.put(key, mixUrl(parseBean.get("url"), parseBean.get("ext"))); + } else if (type.equals("0")) { + webJx.add(parseBean.get("url")); + } + } + } else { + Iterator keys = jx.keySet().iterator(); + while (keys.hasNext()) { + String key = keys.next(); + HashMap parseBean = jx.get(key); + String type = parseBean.get("type"); + if (type.equals("1")) { + jsonJx.put(key, mixUrl(parseBean.get("url"), parseBean.get("ext"))); + } else if (type.equals("0")) { + webJx.add(parseBean.get("url")); + } + } + } + if (!webJx.isEmpty()) { + flagWebJx.put(flag, webJx); + } + // 优先使用json并发解析 + JSONObject jsonResult = JsonParallel.parse(jsonJx, url); + if (jsonResult != null && jsonResult.has("url")) { + return jsonResult; + } + // json解析没有得到结果 用webview解析 + if (!webJx.isEmpty()) { + JSONObject webResult = new JSONObject(); + webResult.put("url", "proxy://do=MixDemo&flag=" + flag + "&url=" + Base64.encodeToString(url.getBytes(), Base64.DEFAULT | Base64.URL_SAFE | Base64.NO_WRAP)); + webResult.put("parse", 1); + return webResult; + } + } catch (Throwable th) { + SpiderDebug.log(th); + } + return new JSONObject(); + } + + private static String mixUrl(String url, String ext) { + if (ext.trim().length() > 0) { + int idx = url.indexOf("?"); + if (idx > 0) { + return url.substring(0, idx + 1) + "cat_ext=" + Base64.encodeToString(ext.getBytes(), Base64.DEFAULT | Base64.URL_SAFE | Base64.NO_WRAP) + "&" + url.substring(idx + 1); + } + } + return url; + } + + public static Object[] loadHtml(String flag, String url) { + try { + url = new String(Base64.decode(url, Base64.DEFAULT | Base64.URL_SAFE | Base64.NO_WRAP), "UTF-8"); + String html = "\n" + + "\n" + + "\n" + + "\n" + + "解析\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + ""; + + StringBuilder jxs = new StringBuilder(); + if (flagWebJx.containsKey(flag)) { + ArrayList jxUrls = flagWebJx.get(flag); + for (int i = 0; i < jxUrls.size(); i++) { + jxs.append("\""); + jxs.append(jxUrls.get(i)); + jxs.append("\""); + if (i < jxUrls.size() - 1) { + jxs.append(","); + } + } + } + html = html.replace("#url#", url).replace("#jxs#", jxs.toString()); + Object[] result = new Object[3]; + result[0] = 200; + result[1] = "text/html; charset=\"UTF-8\""; + ByteArrayInputStream baos = new ByteArrayInputStream(html.toString().getBytes("UTF-8")); + result[2] = baos; + return result; + } catch (Throwable th) { + th.printStackTrace(); + } + return null; + } +} diff --git a/app/src/main/java/com/github/catvod/parser/MixWeb.java b/app/src/main/java/com/github/catvod/parser/MixWeb.java new file mode 100644 index 00000000..45d31f93 --- /dev/null +++ b/app/src/main/java/com/github/catvod/parser/MixWeb.java @@ -0,0 +1,178 @@ +package com.github.catvod.parser; + +import android.util.Base64; +import com.github.catvod.crawler.SpiderDebug; +import com.github.catvod.utils.Misc; +import org.json.JSONArray; +import org.json.JSONObject; + +import java.io.ByteArrayInputStream; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedHashMap; + +public class MixWeb { + + /* + "parses": [ + { + "name": "聚合", + "type": 3, + "url": "Demo" + }, + { + "name": "解析", + "type": 1, + "url": "https://192.168.10.88/jx.php?url=", + "ext": { + "flag": [ + "qq", + "iqiyi", + "qiyi", + "爱奇艺", + "腾讯", + "letv", + "sohu", + "tudou", + "pptv", + "mgtv", + "wasu", + "bilibili" + ] + } + }] + */ + + public static HashMap> flagWebJx = new HashMap<>(); + + static HashMap> configs = null; + + public static JSONObject parse(LinkedHashMap> jx, String nameMe, String flag, String url) { + try { + if (configs == null) { + configs = new HashMap<>(); + Iterator keys = jx.keySet().iterator(); + while (keys.hasNext()) { + String key = keys.next(); + HashMap parseBean = jx.get(key); + String type = parseBean.get("type"); + if (type.equals("0")) { + try { + //取出单个解析的flags列表 + JSONArray flags = new JSONObject(parseBean.get("ext")).getJSONArray("flag"); + for (int j = 0; j < flags.length(); j++) { + //获取单个flags + String flagKey = flags.getString(j); + //取出configs中对应flags的解析 + ArrayList flagJx = configs.get(flagKey); + //如果configs中没有这个flags,则插入一个空的值 + if (flagJx == null) { + flagJx = new ArrayList<>(); + configs.put(flagKey, flagJx); + } + //如果有就在flagJx中增加解析 + flagJx.add(key); + } + } catch (Throwable th) { + + } + } + } + } + // 通过上面的配置获得解析列表 + ArrayList webJx = new ArrayList<>(); + ArrayList flagJx = configs.get(flag); + //如果flagJx不为空 + if (flagJx != null && !flagJx.isEmpty()) { + for (int i = 0; i < flagJx.size(); i++) { + String key = flagJx.get(i); + HashMap parseBean = jx.get(key); + String type = parseBean.get("type"); + if (type.equals("0")) { + //将json解析放入webJx + webJx.add(parseBean.get("url")); + } + } + } else { + Iterator keys = jx.keySet().iterator(); + while (keys.hasNext()) { + String key = keys.next(); + HashMap parseBean = jx.get(key); + String type = parseBean.get("type"); + if (type.equals("0")) { + webJx.add(parseBean.get("url")); + } + } + } + if (!webJx.isEmpty()) { + flagWebJx.put(flag, webJx); + } + + // json解析没有得到结果 用webview解析 + if (!webJx.isEmpty()) { + JSONObject webResult = new JSONObject(); + webResult.put("url", "proxy://do=MixWeb&flag=" + flag + "&url=" + Base64.encodeToString(url.getBytes(), Base64.DEFAULT | Base64.URL_SAFE | Base64.NO_WRAP)); + webResult.put("parse", 1); + webResult.put("ua", Misc.UaWinChrome); + return webResult; + } + } catch (Throwable th) { + SpiderDebug.log(th); + } + return new JSONObject(); + } + + + public static Object[] loadHtml(String flag, String url) { + try { + url = new String(Base64.decode(url, Base64.DEFAULT | Base64.URL_SAFE | Base64.NO_WRAP), "UTF-8"); + String html = "\n" + + "\n" + + "\n" + + "\n" + + "解析\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + ""; + + StringBuilder jxs = new StringBuilder(); + if (flagWebJx.containsKey(flag)) { + ArrayList jxUrls = flagWebJx.get(flag); + for (int i = 0; i < jxUrls.size(); i++) { + jxs.append("\""); + jxs.append(jxUrls.get(i)); + jxs.append("\""); + if (i < jxUrls.size() - 1) { + jxs.append(","); + } + } + } + html = html.replace("#url#", url).replace("#jxs#", jxs.toString()); + Object[] result = new Object[3]; + result[0] = 200; + result[1] = "text/html; charset=\"UTF-8\""; + ByteArrayInputStream baos = new ByteArrayInputStream(html.toString().getBytes("UTF-8")); + result[2] = baos; + return result; + } catch (Throwable th) { + th.printStackTrace(); + } + return null; + } +}