21,886
社区成员
发帖
与我相关
我的任务
分享
<?php
function mySubstr( $str, $length ){
$tagcnt = 0;
$charcnt = 0;
$tag = '';
$maxlen = strlen( $str );
$resultstr = '';
$tagstack = array();
for( $i = 0; $i < $length; $i++ ){
if( $str[$i] == '<' ){
$resultstr .= $str[$i];
for( $j=$i; $str[$j]!='>'; $j++,$length++ ){
$tag .= $str[$j];
}
$tagcnt++;
$length++;
$tag .= '>';
//如果是开始标记,则入栈,如果是与之相对应的结束标记则出栈
if( preg_match('/<([^\/]+)?>/i', $tag, $r) ){
echo '入栈:',htmlspecialchars($r[1]),'<br />';
array_push($tagstack, $r[1]);
}
elseif( preg_match( '/'.$tagstack[count($tagstack)-1].'/', $tag ) ){
echo '出栈:',htmlspecialchars($tagstack[count($tagstack)-1]),'<br />';
array_pop( $tagstack );
}
$tag = '';
continue;
}
$charcnt++;
$resultstr .= $str[$i];
}
echo '<hr size=1>最后结果为:';
//栈是空的直接返回
if(empty($tagstack)){
return $resultstr;
}
//否则去掉没有结束标记的开始标记
else{
while(!empty($tagstack)){
$tag = array_pop($tagstack);
$index = strrpos($resultstr, $tag);
for($i = $index-1; $resultstr[$i] != '>'; $i++ ){
$resultstr[$i] = '';
}
$resultstr[$i++] = '';
}
return $resultstr;
}
}
$sttime = microtime(true);
$stmem = memory_get_usage();
$str = "a1<body>b2<p>c3<em>d4</em>e5</p>f6</body>g7h8";
echo '处理结果为:<br/><hr size=1>',htmlspecialchars( mySubstr( $str, 18 ) ),'<br />';
echo "内存使用情况:",(memory_get_usage()-$stmem),'<br />';
echo "算法运行时间(microtime):",(microtime(true)-$sttime),'<br/>';
$str="123<em>abc</em>456<em>def</em>789";
//echo strrpos('12<em>','<em>');
echo htmlspecialchars(mysubstr($str,13));
function mysubstr($str,$len=0){
$str1=str_replace('<em>','',$str);
$str1=str_replace('</em>','',$str1);
$substr=substr($str1,0,$len);
$str3=substr($substr,strlen($substr)-1,1);
$index=strpos($str,$str3);
$str4=substr($str,0,$index+1);
if(strrpos($str4,"</em>")===false || strrpos($str4,"</em>")<strrpos($str4,"<em>")){
$str5=substr($str4,0,strrpos($str4,"<em>"));
$str5.=substr($str4,strrpos($str4,"<em>")+strlen('<em>'),strlen($str4));
return $str5;
}else{
return $str4;
}
}
function cutString($str,$len){
$str1 = str_replace(array('<em>','</em>'),array('|','@'),$str);
$splitNum = substr_count(substr($str1,0,$len),'|')+substr_count(substr($str1,0,$len),'@');
$str2 = substr($str1,0,$len+$splitNum);
$len1 = substr_count($str2,'|');
$len2 = substr_count($str2,'@');
if ($len1>$len2){
$str2 = substr($str2,0,strpos($str2,'|',$len1-1)).substr($str2,strpos($str2,'|',$len1-1)+1);
}
return str_replace(array('|','@'),array('<em>','</em>'),$str2);
}
$str = '123<em>abc</em>456<em>lsaf12</em>sdklfskdf<em>00000</em>';
echo cutString($str,5);
function sub_str($the_str,$len){
/**将要截取的长度与原字符串长度对应**/
$j = 0;
for($i = 0; $i < $len; $i++){
$a_char = substr($the_str,$j,1);
if($a_char == '<'){
$j = strpos($the_str,'>',$j) + 1;
$i--;
}else{
$j++;
}
}
/**对应结束**/
//截取
$get_thestr = substr($the_str,0,$j);
//判断截取后的字符串是否有<em>标签
$tip_index = strrpos($get_thestr,'<',0);
if($tip_index == false){
return $get_thestr;
}
//判断是否需要过滤标签
$anti_index = strpos($get_thestr,'>',$tip_index);
if($anti_index - $tip_index == 3){
$head_thestr = substr($get_thestr,0,$tip_index);
$last_thestr = substr($get_thestr,$anti_index + 1);
$get_thestr = $head_thestr.$last_thestr;
return $get_thestr;
}else if($anti_index - $tip_index == 4){
return $get_thestr;
}
}
function sub_str($the_str,$len){
/**将要截取的长度与原字符串长度对应**/
$j = 0;
for($i = 0; $i < $len; $i++){
$a_char = substr($the_str,$j,1);
if($a_char == '<'){
//检查是否有类似'2<3<em>3'的特殊情况
$check_str = substr($the_str,$i + 1,strpos($the_str,'>',$j) - $i - 1);
$check_index = strpos($check_str,'<');
//出现标签
if($check_index == false){
$j = strpos($the_str,'>',$j) + 2;
}else{
$j++;
}
}else{
$j++;
}
}
/**对应结束**/
//截取
$get_thestr = substr($the_str,0,$j);
//判断截取后的字符串是否有<em>标签
$tip_index = strrpos($get_thestr,'<',0);
if($tip_index == false){
return $get_thestr;
}
//判断是否需要过滤标签
$anti_index = strpos($get_thestr,'>',$tip_index);
if($anti_index - $tip_index == 3){
$head_thestr = substr($get_thestr,0,$tip_index);
$last_thestr = substr($get_thestr,$anti_index + 1);
$get_thestr = $head_thestr.$last_thestr;
return $get_thestr;
}else if($anti_index - $tip_index == 4){
return $get_thestr;
}
}
<?php
$str = '123<em>abc<br/></em>45</em>6<em>def</em>789';
echo sub_str($str,10);
function sub_str($str,$length)
{
$len = strlen($str);
$tag_stat = 0; //0-闭合 1-打开
$stack = array();
$stack_len = 0;
$data_len = 0;
$last_tag_index = -1;
$lookup_stack = array();
for($i=0;$i<$len;$i++){
if($data_len >= $length) break;
if($str[$i] == '<'){//标签开始
$tag_stat = 1;
$stack[$stack_len] = '<';
}elseif($str[$i] == '>'){//标签结束
$tag_stat = 0;
$stack[$stack_len].= '>';
$key = $stack_len;
$tag = $stack[$key];
$stack_len++;
if(preg_match('/^<\w+[\/]>$/',$tag)){ //是自闭标签,不管他,让他自身自灭吧
continue;
}
if(preg_match('/^<\/\w+>$/',$tag)){//是闭合标签
if($last_tag_index == -1){//非闭合还没开始呢,不多说,直接删~~~
unset($stack[$key]);
continue;
}
if(str_replace('/','',$tag) == $stack[$last_tag_index]){
unset($lookup_stack[$last_tag_index]);//正常闭合,他不是非法的标签,所有从非法标签中移走
}else{
unset($stack[$key],$stack[$last_tag_index]);//不是正常的标签,移除本身并且移除与他同级的非闭合标签
}
$last_tag_index = -1;
}
if(preg_match('/^<\w+>$/',$tag)){//非闭合标签,并把他列入非法标签
$last_tag_index = $key;
$lookup_stack[$last_tag_index] = $last_tag_index;
}
}else{
if(isset($stack[$stack_len])){
$stack[$stack_len].= $str[$i];
}else{
$stack[$stack_len] = $str[$i];
}
if($tag_stat == 0){
$data_len++;
$stack_len++;
}
}
}
//清除那些不走寻常路的非法标签
foreach($lookup_stack as $key){
unset($stack[$key]);
}
return implode('',$stack);
}
?>