修复drpy支持磁力链接,style属性处理等问题
This commit is contained in:
parent
dbc8e560a3
commit
aff38eb7fe
14
libs/drpy.js
14
libs/drpy.js
@ -55,7 +55,7 @@ function pre(){
|
|||||||
}
|
}
|
||||||
|
|
||||||
let rule = {};
|
let rule = {};
|
||||||
const VERSION = 'drpy1 3.9.47beta1 20230711';
|
const VERSION = 'drpy1 3.9.47beta15 20230728';
|
||||||
/** 已知问题记录
|
/** 已知问题记录
|
||||||
* 1.影魔的jinjia2引擎不支持 {{fl}}对象直接渲染 (有能力解决的话尽量解决下,支持对象直接渲染字符串转义,如果加了|safe就不转义)[影魔牛逼,最新的文件发现这问题已经解决了]
|
* 1.影魔的jinjia2引擎不支持 {{fl}}对象直接渲染 (有能力解决的话尽量解决下,支持对象直接渲染字符串转义,如果加了|safe就不转义)[影魔牛逼,最新的文件发现这问题已经解决了]
|
||||||
* Array.prototype.append = Array.prototype.push; 这种js执行后有毛病,for in 循环列表会把属性给打印出来 (这个大毛病需要重点排除一下)
|
* Array.prototype.append = Array.prototype.push; 这种js执行后有毛病,for in 循环列表会把属性给打印出来 (这个大毛病需要重点排除一下)
|
||||||
@ -111,6 +111,8 @@ var _pdfa;
|
|||||||
var _pd;
|
var _pd;
|
||||||
// const DOM_CHECK_ATTR = ['url', 'src', 'href', 'data-original', 'data-src'];
|
// const DOM_CHECK_ATTR = ['url', 'src', 'href', 'data-original', 'data-src'];
|
||||||
const DOM_CHECK_ATTR = /(url|src|href|-original|-src|-play|-url|style)$/;
|
const DOM_CHECK_ATTR = /(url|src|href|-original|-src|-play|-url|style)$/;
|
||||||
|
// 过滤特殊链接,不走urlJoin
|
||||||
|
const SPECIAL_URL = /^(ftp|magnet|thunder|ws):/;
|
||||||
const SELECT_REGEX = /:eq|:lt|:gt|#/g;
|
const SELECT_REGEX = /:eq|:lt|:gt|#/g;
|
||||||
const SELECT_REGEX_A = /:eq|:lt|:gt/g;
|
const SELECT_REGEX_A = /:eq|:lt|:gt/g;
|
||||||
|
|
||||||
@ -537,7 +539,7 @@ const defaultParser = {
|
|||||||
if(typeof(uri)==='undefined'||!uri){
|
if(typeof(uri)==='undefined'||!uri){
|
||||||
uri = '';
|
uri = '';
|
||||||
}
|
}
|
||||||
if(DOM_CHECK_ATTR.test(parse)){
|
if(DOM_CHECK_ATTR.test(parse) && !SPECIAL_URL.test(ret)){
|
||||||
if(/http/.test(ret)){
|
if(/http/.test(ret)){
|
||||||
ret = ret.substr(ret.indexOf('http'));
|
ret = ret.substr(ret.indexOf('http'));
|
||||||
}else{
|
}else{
|
||||||
@ -569,6 +571,8 @@ function pdfh2(html,parse){
|
|||||||
if(/style/.test(option.toLowerCase())&&/url\(/.test(result)){
|
if(/style/.test(option.toLowerCase())&&/url\(/.test(result)){
|
||||||
try {
|
try {
|
||||||
result = result.match(/url\((.*?)\)/)[1];
|
result = result.match(/url\((.*?)\)/)[1];
|
||||||
|
// 2023/07/28新增 style取内部链接自动去除首尾单双引号
|
||||||
|
result = result.replace(/^['|"](.*)['|"]$/, "$1");
|
||||||
}catch (e) {}
|
}catch (e) {}
|
||||||
}
|
}
|
||||||
return result
|
return result
|
||||||
@ -604,7 +608,7 @@ function pd2(html,parse,uri){
|
|||||||
if(typeof(uri)==='undefined'||!uri){
|
if(typeof(uri)==='undefined'||!uri){
|
||||||
uri = '';
|
uri = '';
|
||||||
}
|
}
|
||||||
if(DOM_CHECK_ATTR.test(parse)){
|
if(DOM_CHECK_ATTR.test(parse) && !SPECIAL_URL.test(ret)){
|
||||||
if(/http/.test(ret)){
|
if(/http/.test(ret)){
|
||||||
ret = ret.substr(ret.indexOf('http'));
|
ret = ret.substr(ret.indexOf('http'));
|
||||||
}else{
|
}else{
|
||||||
@ -727,10 +731,12 @@ const parseTags = {
|
|||||||
if(/style/.test(option.toLowerCase())&&/url\(/.test(result)){
|
if(/style/.test(option.toLowerCase())&&/url\(/.test(result)){
|
||||||
try {
|
try {
|
||||||
result = result.match(/url\((.*?)\)/)[1];
|
result = result.match(/url\((.*?)\)/)[1];
|
||||||
|
// 2023/07/28新增 style取内部链接自动去除首尾单双引号
|
||||||
|
result = result.replace(/^['|"](.*)['|"]$/, "$1");
|
||||||
}catch (e) {}
|
}catch (e) {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (result && base_url && DOM_CHECK_ATTR.test(option)) {
|
if (result && base_url && DOM_CHECK_ATTR.test(option) && !SPECIAL_URL.test(result)) {
|
||||||
if (/http/.test(result)) {
|
if (/http/.test(result)) {
|
||||||
result = result.substr(result.indexOf('http'));
|
result = result.substr(result.indexOf('http'));
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
2
libs/drpy.min.js
vendored
2
libs/drpy.min.js
vendored
File diff suppressed because one or more lines are too long
@ -41,7 +41,7 @@ function pre(){
|
|||||||
|
|
||||||
let rule = {};
|
let rule = {};
|
||||||
let vercode = typeof(pdfl) ==='function'?'drpy2.1':'drpy2';
|
let vercode = typeof(pdfl) ==='function'?'drpy2.1':'drpy2';
|
||||||
const VERSION = vercode+' 3.9.47beta1 20230711';
|
const VERSION = vercode+' 3.9.47beta15 20230728';
|
||||||
/** 已知问题记录
|
/** 已知问题记录
|
||||||
* 1.影魔的jinjia2引擎不支持 {{fl}}对象直接渲染 (有能力解决的话尽量解决下,支持对象直接渲染字符串转义,如果加了|safe就不转义)[影魔牛逼,最新的文件发现这问题已经解决了]
|
* 1.影魔的jinjia2引擎不支持 {{fl}}对象直接渲染 (有能力解决的话尽量解决下,支持对象直接渲染字符串转义,如果加了|safe就不转义)[影魔牛逼,最新的文件发现这问题已经解决了]
|
||||||
* Array.prototype.append = Array.prototype.push; 这种js执行后有毛病,for in 循环列表会把属性给打印出来 (这个大毛病需要重点排除一下)
|
* Array.prototype.append = Array.prototype.push; 这种js执行后有毛病,for in 循环列表会把属性给打印出来 (这个大毛病需要重点排除一下)
|
||||||
@ -97,7 +97,9 @@ var _pdfh;
|
|||||||
var _pdfa;
|
var _pdfa;
|
||||||
var _pd;
|
var _pd;
|
||||||
// const DOM_CHECK_ATTR = ['url', 'src', 'href', 'data-original', 'data-src'];
|
// const DOM_CHECK_ATTR = ['url', 'src', 'href', 'data-original', 'data-src'];
|
||||||
const DOM_CHECK_ATTR = /(url|src|href|-original|-src|-play|-url)$/;
|
const DOM_CHECK_ATTR = /(url|src|href|-original|-src|-play|-url|style)$/;
|
||||||
|
// 过滤特殊链接,不走urlJoin
|
||||||
|
const SPECIAL_URL = /^(ftp|magnet|thunder|ws):/;
|
||||||
const NOADD_INDEX = /:eq|:lt|:gt|:first|:last|^body$|^#/; // 不自动加eq下标索引
|
const NOADD_INDEX = /:eq|:lt|:gt|:first|:last|^body$|^#/; // 不自动加eq下标索引
|
||||||
const URLJOIN_ATTR = /(url|src|href|-original|-src|-play|-url|style)$/; // 需要自动urljoin的属性
|
const URLJOIN_ATTR = /(url|src|href|-original|-src|-play|-url|style)$/; // 需要自动urljoin的属性
|
||||||
const SELECT_REGEX = /:eq|:lt|:gt|#/g;
|
const SELECT_REGEX = /:eq|:lt|:gt|#/g;
|
||||||
@ -544,6 +546,8 @@ function pdfh2(html,parse){
|
|||||||
if(/style/.test(option.toLowerCase())&&/url\(/.test(result)){
|
if(/style/.test(option.toLowerCase())&&/url\(/.test(result)){
|
||||||
try {
|
try {
|
||||||
result = result.match(/url\((.*?)\)/)[1];
|
result = result.match(/url\((.*?)\)/)[1];
|
||||||
|
// 2023/07/28新增 style取内部链接自动去除首尾单双引号
|
||||||
|
result = result.replace(/^['|"](.*)['|"]$/, "$1");
|
||||||
}catch (e) {}
|
}catch (e) {}
|
||||||
}
|
}
|
||||||
return result
|
return result
|
||||||
@ -579,7 +583,7 @@ function pd2(html,parse,uri){
|
|||||||
if(typeof(uri)==='undefined'||!uri){
|
if(typeof(uri)==='undefined'||!uri){
|
||||||
uri = '';
|
uri = '';
|
||||||
}
|
}
|
||||||
if(DOM_CHECK_ATTR.test(parse)){
|
if(DOM_CHECK_ATTR.test(parse) && !SPECIAL_URL.test(ret)){
|
||||||
if(/http/.test(ret)){
|
if(/http/.test(ret)){
|
||||||
ret = ret.substr(ret.indexOf('http'));
|
ret = ret.substr(ret.indexOf('http'));
|
||||||
}else{
|
}else{
|
||||||
|
|||||||
2
libs/drpy2.min.js
vendored
2
libs/drpy2.min.js
vendored
File diff suppressed because one or more lines are too long
@ -176,6 +176,7 @@ namespace Peach.DataAccess
|
|||||||
private static readonly Regex p = new ("url\\((.*?)\\)", RegexOptions.Multiline | RegexOptions.Singleline);
|
private static readonly Regex p = new ("url\\((.*?)\\)", RegexOptions.Multiline | RegexOptions.Singleline);
|
||||||
private static readonly Regex NOAdd_INDEX = new (":eq|:lt|:gt|:first|:last|^body$|^#");
|
private static readonly Regex NOAdd_INDEX = new (":eq|:lt|:gt|:first|:last|^body$|^#");
|
||||||
private static readonly Regex URLJOIN_ATTR = new ("(url|src|href|-original|-src|-play|-url|style)$", RegexOptions.Multiline | RegexOptions.IgnoreCase);
|
private static readonly Regex URLJOIN_ATTR = new ("(url|src|href|-original|-src|-play|-url|style)$", RegexOptions.Multiline | RegexOptions.IgnoreCase);
|
||||||
|
private static readonly Regex SPECIAL_URL = new ("^(ftp|magnet|thunder|ws):", RegexOptions.Multiline | RegexOptions.IgnoreCase);
|
||||||
private static String pdfh_html = "";
|
private static String pdfh_html = "";
|
||||||
private static String pdfa_html = "";
|
private static String pdfa_html = "";
|
||||||
private static Document? pdfh_doc = null;
|
private static Document? pdfh_doc = null;
|
||||||
@ -317,13 +318,15 @@ namespace Peach.DataAccess
|
|||||||
Match m = p.Match(result);
|
Match m = p.Match(result);
|
||||||
if (m.Success)
|
if (m.Success)
|
||||||
result = m.Groups[1]?.Value;
|
result = m.Groups[1]?.Value;
|
||||||
|
result = Regex.Replace(result, "^['|\"](.*)['|\"]$", "$1");
|
||||||
}
|
}
|
||||||
if (!string.IsNullOrWhiteSpace(result) && !string.IsNullOrWhiteSpace(Add_url))// (JSUtils.isNotEmpty(result) && JSUtils.isNotEmpty(Add_url))
|
if (!string.IsNullOrWhiteSpace(result) && !string.IsNullOrWhiteSpace(Add_url))// (JSUtils.isNotEmpty(result) && JSUtils.isNotEmpty(Add_url))
|
||||||
{
|
{
|
||||||
// 需要自动urljoin的属性
|
// 需要自动urljoin的属性
|
||||||
Match m = URLJOIN_ATTR.Match(option);
|
Match m = URLJOIN_ATTR.Match(option);
|
||||||
|
Match n = SPECIAL_URL.Match(result);
|
||||||
//if (isUrl(option)) {
|
//if (isUrl(option)) {
|
||||||
if (m.Success)
|
if (m.Success && !n.Success)
|
||||||
{
|
{
|
||||||
if (result.Contains("http"))
|
if (result.Contains("http"))
|
||||||
result = result[result.IndexOf("http")..];
|
result = result[result.IndexOf("http")..];
|
||||||
|
|||||||
@ -23,6 +23,7 @@ public class HtmlParser {
|
|||||||
private static final Pattern p = Pattern.compile("url\\((.*?)\\)", Pattern.MULTILINE | Pattern.DOTALL);
|
private static final Pattern p = Pattern.compile("url\\((.*?)\\)", Pattern.MULTILINE | Pattern.DOTALL);
|
||||||
private static final Pattern NOADD_INDEX = Pattern.compile(":eq|:lt|:gt|:first|:last|^body$|^#"); // 不自动加eq下标索引
|
private static final Pattern NOADD_INDEX = Pattern.compile(":eq|:lt|:gt|:first|:last|^body$|^#"); // 不自动加eq下标索引
|
||||||
private static final Pattern URLJOIN_ATTR = Pattern.compile("(url|src|href|-original|-src|-play|-url|style)$", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE); // 需要自动urljoin的属性
|
private static final Pattern URLJOIN_ATTR = Pattern.compile("(url|src|href|-original|-src|-play|-url|style)$", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE); // 需要自动urljoin的属性
|
||||||
|
private static final Pattern SPECIAL_URL = Pattern.compile("^(ftp|magnet|thunder|ws):", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE); // 过滤特殊链接,不走urlJoin
|
||||||
private static Document pdfh_doc = null;
|
private static Document pdfh_doc = null;
|
||||||
private static Document pdfa_doc = null;
|
private static Document pdfa_doc = null;
|
||||||
|
|
||||||
@ -199,12 +200,15 @@ public class HtmlParser {
|
|||||||
if (m.find()) {
|
if (m.find()) {
|
||||||
result = m.group(1);
|
result = m.group(1);
|
||||||
}
|
}
|
||||||
|
// 2023/07/28新增 style取内部链接自动去除首尾单双引号
|
||||||
|
result = result.replaceAll("^['|\"](.*)['|\"]$", "$1");
|
||||||
}
|
}
|
||||||
if (JSUtils.isNotEmpty(result) && JSUtils.isNotEmpty(add_url)) {
|
if (JSUtils.isNotEmpty(result) && JSUtils.isNotEmpty(add_url)) {
|
||||||
// 需要自动urljoin的属性
|
// 需要自动urljoin的属性
|
||||||
Matcher m = URLJOIN_ATTR.matcher(option);
|
Matcher m = URLJOIN_ATTR.matcher(option);
|
||||||
|
Matcher n = SPECIAL_URL.matcher(result);
|
||||||
//if (isUrl(option)) {
|
//if (isUrl(option)) {
|
||||||
if (m.find()) {
|
if (m.find() && !n.find() {
|
||||||
if (result.contains("http")) {
|
if (result.contains("http")) {
|
||||||
result = result.substring(result.indexOf("http"));
|
result = result.substring(result.indexOf("http"));
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@ -14,6 +14,7 @@ from jsonpath import jsonpath
|
|||||||
PARSE_CACHE = True # 解析缓存
|
PARSE_CACHE = True # 解析缓存
|
||||||
NOADD_INDEX = ':eq|:lt|:gt|:first|:last|^body$|^#' # 不自动加eq下标索引
|
NOADD_INDEX = ':eq|:lt|:gt|:first|:last|^body$|^#' # 不自动加eq下标索引
|
||||||
URLJOIN_ATTR = '(url|src|href|-original|-src|-play|-url|style)$' # 需要自动urljoin的属性
|
URLJOIN_ATTR = '(url|src|href|-original|-src|-play|-url|style)$' # 需要自动urljoin的属性
|
||||||
|
SPECIAL_URL = '^(ftp|magnet|thunder|ws):' # 过滤特殊链接,不走urlJoin
|
||||||
|
|
||||||
|
|
||||||
class jsoup:
|
class jsoup:
|
||||||
@ -193,11 +194,13 @@ class jsoup:
|
|||||||
if self.contains(option.lower(), 'style') and self.contains(ret, 'url('):
|
if self.contains(option.lower(), 'style') and self.contains(ret, 'url('):
|
||||||
try:
|
try:
|
||||||
ret = re.search('url\((.*?)\)', ret, re.M | re.S).groups()[0]
|
ret = re.search('url\((.*?)\)', ret, re.M | re.S).groups()[0]
|
||||||
|
# 2023/07/28新增 style取内部链接自动去除首尾单双引号
|
||||||
|
ret = re.sub(r"^['\"]|['\"]$", '', ret)
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
if ret and base_url:
|
if ret and base_url:
|
||||||
# need_add = re.search(URLJOIN_ATTR, option, re.M | re.I)
|
# need_add = re.search(URLJOIN_ATTR, option, re.M | re.I)
|
||||||
need_add = self.test(URLJOIN_ATTR, option)
|
need_add = self.test(URLJOIN_ATTR, option) and not self.test(SPECIAL_URL, ret)
|
||||||
if need_add:
|
if need_add:
|
||||||
if 'http' in ret:
|
if 'http' in ret:
|
||||||
ret = ret[ret.find('http'):]
|
ret = ret[ret.find('http'):]
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user