87,910
社区成员
发帖
与我相关
我的任务
分享
<html>
<head>this is head</head>
<body>
<a>this is a</a>
<span>this is span</span>
<p>this is p <p>
<i>this is i</i>
等等。。。。。
</html>
var str = "<html> <head>this is head</head> <body> <a>this is a</a> <span>this is span</span> <p>this is p <p> <i>this is i</i> 等等。。。。。 </body></html>";
var arr1 = str.split(/<[/a-z]+>/);
var arr2 = [];
for (var i in arr1) {
arr1[i].trim() && arr2.push(arr1[i]);
}
var reg = /\<\s*([^\>]+\s*\>)([^\<]*?)\<\/\s*\1/g,
html = "<html><head>this is head</head><body><a>this is a</a><span>this is span</span><p>this is p </p><i>this is i</i>等等。。。。。</html>",
arr = [];
html.replace(reg,function(p1,p2,p3){
console.log(p1);
console.log(p2);
console.log(p3);
arr.push(p3);
});
console.log(arr);
上面的正则,只能匹配出标签内部的,其他的,比如你代码中的“等等。。。”这样的没有被标签包围的,就无法匹配到。
var reg = /(?:\s*\<\s*[^\>]+\s*\>\s*)+/g,
html = "<html><head>this is head</head><body><a>this is a</a><span>this is span</span><p>this is p </p><i>this is i</i>等等。。。。。</html>",
arr = [];
arr = html.replace(reg,"<>").split("<>");
//把所有标签的集合,换成"<>",然后使用split分割
arr = arr.slice(1,arr.length-1);
//去除最初的两个孔数组
console.log(arr);
这种,正则,就会有些问题,就是如果结构换乱的话,可能会出问题。
当然,可以把所有的匹配出来。