diff --git a/readme.md b/readme.md
index 86be25c..06ef211 100644
--- a/readme.md
+++ b/readme.md
@@ -4,14 +4,14 @@
🗂️A webServer convert web and x5 movie sites to cms api data
-
+
-
-
+
+
diff --git a/utils/HtmlParser.cs b/utils/HtmlParser.cs
index c383d05..6416f11 100644
--- a/utils/HtmlParser.cs
+++ b/utils/HtmlParser.cs
@@ -25,6 +25,13 @@ using NSoup;
using NSoup.Select;
using Document = NSoup.Nodes.Document;
using RestSharp;
+using System.Web;
+using System.Net.Mime;
+using Newtonsoft.Json;
+using NSoup.Helper;
+using System.Text.Encodings.Web;
+using System.Buffers.Text;
+using System.Text.Json.Nodes;
namespace Peach.DataAccess
{
@@ -44,8 +51,8 @@ namespace Peach.DataAccess
UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36"
};
client = new RestClient(options);
- client.AddDefaultHeader("Content-Type", "application/json");
- client.AddDefaultHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7");
+ //client.AddDefaultHeader("Content-Type", "application/json");
+ //client.AddDefaultHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7");
}
///
@@ -54,7 +61,7 @@ namespace Peach.DataAccess
///
///
///
- public object request(string url, JsValue arguments)
+ public object Request(string url, JsValue arguments)
{
Uri uri = new Uri(url);
string Host = uri.Host;
@@ -63,8 +70,55 @@ namespace Peach.DataAccess
var Referer = _headers["Referer"]?.ToString();
var UserAgent = _headers["User-Agent"]?.ToString();
var Cookie = _headers["Cookie"]?.ToString();
+ var ContentType = _headers["Content-Type"]?.ToString();
+
+ var Data = arguments.AsObject()["data"]?.ToString();
+ var Body = arguments.AsObject()["body"]?.ToString();
+
+ var Buffer = arguments.AsObject()["buffer"]?.ToString();
+
+
+
+ String charset = "utf-8";
+ if (ContentType != null && ContentType.Split("charset=").Length > 1)
+ {
+ charset = ContentType.Split("charset=")[1];
+ }
var request = new RestRequest(url);
+
+ if (!string.IsNullOrEmpty(Data) && !Data.Equals("undefined"))
+ {
+ // 序列化JSON数据
+ string post_data = JsonConvert.SerializeObject(Data);
+ // 将JSON参数添加至请求中
+ request.AddParameter("application/json", post_data, ParameterType.RequestBody);
+
+ }
+
+ if (!string.IsNullOrEmpty(Body) && !Body.Equals("undefined"))
+ {
+ String[] queryS = Body.Split("&");
+ foreach (String query in queryS)
+ {
+ //String query = queryS[i];
+ int tmp = query.IndexOf("=");
+ String key;
+ String value;
+ if (tmp != -1)
+ {
+ key = query.Substring(0, tmp);
+ value = query[(tmp + 1)..];
+ }
+ else
+ {
+ key = query;
+ value = "";
+ }
+ request.AddParameter(key, value);
+ }
+ }
+
if (string.IsNullOrEmpty(UserAgent))
UserAgent = "Mozilla/5.0 (Linux; Android 11; M2007J3SC Build/RKQ1.200826.002; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/77.0.3865.120 MQQBrowser/6.2 TBS/045714 Mobile Safari/537.36";
request.AddHeader("User-Agent", UserAgent);
@@ -73,38 +127,61 @@ namespace Peach.DataAccess
if (!string.IsNullOrEmpty(Cookie) && !Cookie.Equals("undefined"))
{
- string[] cooks = Cookie.Split(';');
- foreach (var item in cooks)
- {
- string[] cook = item.Split('=');
- if (cook.Length == 2)
- client.AddDefaultHeader("Cookie", Cookie);
- //client.AddCookie(cook[0].Trim(), cook[1].Trim(), "/", Host);
- }
+ client.AddDefaultHeader("Cookie", Cookie);
}
string rContent = "";
+ JsObject header = new (_headers.Engine);
+
try
{
+ var client = new RestClient(url);
+
RestResponse? response;
if (method?.ToLower() == "post")
response = client.Post(request);
else
response = client.Get(request);
- var trw = response.Cookies;
- rContent = response.Content;
+
+ //rContent = response.Content;
+ Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
+ rContent = HttpUtility.UrlDecode(response.RawBytes == null ? Array.Empty() : response.RawBytes,
+ Encoding.GetEncoding(charset));
+
+ if (response.Headers != null)
+ {
+ foreach (var item in response.Headers)
+ {
+ header.Set(item.Name, item.Value == null ? "" : item.Value.ToString());
+ }
+ }
+
+ if (Buffer == "1")
+ {
+ return new { headers = header, content = response.RawBytes };
+ }
+ else if (Buffer == "2")
+ {
+ return new { headers = header, content = Convert.ToBase64String(Encoding.UTF8.GetBytes(rContent)) };
+ }
+ else
+ {
+ return new { headers = header, content = rContent };
+ }
}
catch (Exception)
{ }
- var jsValue = new { headers = _headers, content = rContent };
- return jsValue;
+ return new { headers = header, content = "" };
}
- private static Regex p = new Regex("url\\((.*?)\\)", RegexOptions.Multiline | RegexOptions.Singleline);
- private static Regex NOAdd_INDEX = new Regex(":eq|:lt|:gt|:first|:last|^body$|^#");
- private static Regex URLJOIN_ATTR = new Regex("(url|src|href|-original|-src|-play|-url)$", RegexOptions.Multiline | RegexOptions.IgnoreCase);
+ private static readonly Regex p = new ("url\\((.*?)\\)", RegexOptions.Multiline | RegexOptions.Singleline);
+ private static readonly Regex NOAdd_INDEX = new (":eq|:lt|:gt|:first|:last|^body$|^#");
+ private static readonly Regex URLJOIN_ATTR = new ("(url|src|href|-original|-src|-play|-url)$", RegexOptions.Multiline | RegexOptions.IgnoreCase);
+ private static String pdfh_html = "";
+ private static String pdfa_html = "";
+ private static Document? pdfh_doc = null;
+ private static Document? pdfa_doc = null;
-
- public string joinUrl(string parent, string child)
+ public static string JoinUrl(string parent, string child)
{
if (string.IsNullOrWhiteSpace(parent))
{
@@ -118,7 +195,7 @@ namespace Peach.DataAccess
url = new Uri(new Uri(parent), child);
q = url.ToString();
}
- catch (Exception e)
+ catch (Exception)
{
//e.printStackTrace();
}
@@ -130,12 +207,12 @@ namespace Peach.DataAccess
public class Painfo
{
- public string nparse_rule;
+ public string? nparse_rule;
public int nparse_index;
- public List excludes;
+ public List? excludes;
}
- private Painfo getParseInfo(string nparse)
+ private static Painfo GetParseInfo(string nparse)
{
/*
根据传入的单规则获取 parse规则,索引位置,排除列表 -- 可以用于剔除元素,支持多个,按标签剔除,按id剔除等操作
@@ -169,7 +246,7 @@ namespace Peach.DataAccess
{
painfo.nparse_index = int.Parse(nparse_pos.Replace("eq(", "").Replace(")", ""));
}
- catch (Exception e1)
+ catch (Exception)
{
painfo.nparse_index = 0;
}
@@ -187,17 +264,23 @@ namespace Peach.DataAccess
return painfo;
}
- //pd
- public string parseDom(string html, string rule)
+ //pdfh
+ public string ParseDomForUrl(string html, string rule)
{
- return parseDomForUrl(html, rule, "");
+ return ParseDom(html, rule, "");
}
- //pdfh
- public string parseDomForUrl(string html, string rule, string Add_url)
+ //pd
+ public string ParseDom(string html, string rule, string Add_url)
{
- if (string.IsNullOrEmpty(html)) return "";
- Document doc = NSoupClient.Parse(html);
+ if (string.IsNullOrWhiteSpace(html)) return "";
+ if (!pdfh_html.Equals(html))
+ {
+ pdfh_html = html;
+ pdfh_doc = NSoupClient.Parse(html);
+ }
+ Document? doc = pdfh_doc;
+ //Document doc = NSoupClient.Parse(html);
if (rule.Equals("body&&Text") || rule.Equals("Text"))
return doc.Text();
else if (rule.Equals("body&&Html") || rule.Equals("Html"))
@@ -213,8 +296,8 @@ namespace Peach.DataAccess
rule = string.Join("&&", excludes);// TextUtils.join("&&", excludes);
}
rule = parseHikerToJq(rule, true);
- string[] parses = rule.Split(" ");
- Elements ret = new Elements();
+ string[]? parses = rule.Split(" ");
+ Elements ret = new ();
foreach (string nparse in parses)
{
ret = parseOneRule(doc, nparse, ret);
@@ -228,7 +311,7 @@ namespace Peach.DataAccess
return ret.Html();
else //(JSUtils.isNotEmpty(option))
{
- string result = ret.Attr(option);
+ string? result = ret.Attr(option);
if (option.ToLower().Contains("style") && result.Contains("url("))
{
Match m = p.Match(result);
@@ -243,9 +326,9 @@ namespace Peach.DataAccess
if (m.Success)
{
if (result.Contains("http"))
- result = result.Substring(result.IndexOf("http"));
+ result = result[result.IndexOf("http")..];
else
- result = joinUrl(Add_url, result);
+ result = JoinUrl(Add_url, result);
}
}
return result;
@@ -253,14 +336,20 @@ namespace Peach.DataAccess
}
//pdfa
- public String[] parseDomForArray(string html, string rule)
+ public String[] ParseDomForArray(string html, string rule)
{
- List eleHtml = new();
- Document doc = NSoupClient.Parse(html);
+ if (!pdfa_html.Equals(html))
+ {
+ pdfa_html = html;
+ pdfa_doc = NSoupClient.Parse(html);
+ }
+ Document? doc = pdfa_doc;
+ List? eleHtml = new();
+ //Document doc = NSoupClient.Parse(html);
rule = parseHikerToJq(rule, false);
- string[] parses = rule.Split(" ");
- Elements ret = new Elements();
+ string[]? parses = rule.Split(" ");
+ Elements ret = new ();
foreach (var pars in parses)
{
ret = parseOneRule(doc, pars, ret);
@@ -274,14 +363,20 @@ namespace Peach.DataAccess
return eleHtml.ToArray();
}
//pdfl
- public String[] parseDomForList(string html, string rule, string list_text, string list_url, string urlKey)
+ public String[] ParseDomForList(string html, string rule, string list_text, string list_url, string urlKey)
{
- Document doc = NSoupClient.Parse(html);
- List new_vod_list = new();
- //String[] new_vod_list = new string[0];
+ if (!pdfa_html.Equals(html))
+ {
+ pdfa_html = html;
+ pdfa_doc = NSoupClient.Parse(html);
+ }
+ Document? doc = pdfa_doc;
+ //Document doc = NSoupClient.Parse(html);
+ List? new_vod_list = new();
+
rule = parseHikerToJq(rule, false);
- string[] parses = rule.Split(" ");
- Elements ret = new Elements();
+ string[]? parses = rule.Split(" ");
+ Elements ret = new ();
foreach (string pars in parses)
{
@@ -291,7 +386,7 @@ namespace Peach.DataAccess
foreach (Element it in ret)
{
- new_vod_list.Add(parseDomForUrl(it.OuterHtml(), list_text, "").Trim() + '$' + parseDomForUrl(it.OuterHtml(), list_url, urlKey));
+ new_vod_list.Add(ParseDom(it.OuterHtml(), list_text, "").Trim() + '$' + ParseDom(it.OuterHtml(), list_url, urlKey));
}
return new_vod_list.ToArray();
@@ -310,13 +405,13 @@ namespace Peach.DataAccess
// 不自动加eq下标索引
if (parse.Contains("&&"))
{
- string[] parses = parse.Split("&&"); //带&&的重新拼接
- List new_parses = new(); //构造新的解析表达式列表
+ string[]? parses = parse.Split("&&"); //带&&的重新拼接
+ List? new_parses = new(); //构造新的解析表达式列表
for (int i = 0; i < parses.Length; i++)
{
- string[] pss = parses[i].Split(" ");
- string ps = pss[pss.Length - 1]; //如果分割&&后带空格就取最后一个元素
- Match m = NOAdd_INDEX.Match(ps); // Matcher m = NOAdd_INDEX.matcher(ps);
+ string[]? pss = parses[i].Split(" ");
+ string? ps = pss[pss.Length - 1]; //如果分割&&后带空格就取最后一个元素
+ Match? m = NOAdd_INDEX.Match(ps); // Matcher m = NOAdd_INDEX.matcher(ps);
//if (!isIndex(ps)) {
if (!m.Success)
{
@@ -338,14 +433,14 @@ namespace Peach.DataAccess
}
else
{
- string[] pss = parse.Split(" ");
- string ps = pss[pss.Length - 1]; //如果分割&&后带空格就取最后一个元素
+ string[]? pss = parse.Split(" ");
+ string? ps = pss[pss.Length - 1]; //如果分割&&后带空格就取最后一个元素
//Matcher m = NOAdd_INDEX.matcher(ps);
- Match m = NOAdd_INDEX.Match(ps);
+ Match? m = NOAdd_INDEX.Match(ps);
//if (!isIndex(ps) && first) {
if (!m.Success && first)
{
- parse = parse + ":eq(0)";
+ parse += ":eq(0)";
}
}
return parse;
@@ -353,7 +448,7 @@ namespace Peach.DataAccess
private Elements parseOneRule(Document doc, string parse, Elements ret)
{
- Painfo info = getParseInfo(parse);
+ Painfo? info = GetParseInfo(parse);
if (ret.IsEmpty)
{
ret = doc.Select(info.nparse_rule);