按你的思路简单改了下:
function sub_html_restr(str, num)
{
var reg = new RegExp( '<[^>]+>' , 'g' );
var rt, rts = [], indexs = [], tstr, endstr, rstr, sstr, endtag, rtstr, restr;
//提取所有的html标签和标签在字符串中的位置
while ( ( rt = reg.exec(str) ) != null )
{
rts.push(rt[0]);
indexs.push(rt['index']);
}
//删除字符串中所有的html标签
tstr = str.replace(reg, '');
//对剩余的纯字符串进行substr
sstr = tstr.substr(num, tstr.length);
tstr = tstr.substr(0, num);
//判断有没有把实体腰斩,如果有腰斩的就再接上
endstr = (/&[^&]*$/.exec(tstr) || '');
if ( endstr !== '' ) endstr += '' + (/^[^;]*;/.exec(sstr) || '');
if (/^(&\w{1,10};|&#\d+;)$/.test(endstr))
{
rtstr = tstr.replace(/&[^&]*$/, endstr);
}
else
{
rtstr = tstr;
}
//把html标签放回到截断完毕的字符串中,当然有的html标签这时候已经无家可归了
var index = 0;
for (var i = 0; i < rts.length; i ++)
{
index = indexs[i];
if (rtstr.length >= index)
{
rtstr = rtstr.substr(0, index) + rts[i] + rtstr.substr(index, rtstr.length);
}
else
{
break;
}
}
restr = str.substr(rtstr.length,str.length)
//把闭合的标签全部删除
tstr = restr;
rstr = '';
while ( rstr != tstr )
{
rstr = tstr;
tstr = tstr.replace(/<[^\/][^>]*>[^<]*<\/[^>]+>/g, '').replace(/<[^>]+ \/>/g, '');
}
var lastindex = i ;
var endreg = new RegExp('<\/([^>]+)>', 'g');
var tagreg = new RegExp('<?([^ >]+)[ ]?[^ ]*>?');
//如果存在没有闭合的标签,从切断的标签里找上半身
while( (endrt = endreg.exec(tstr))!=null){
for(var i = lastindex-1;i>-1;i--){
if(tagreg.exec(rts[i])[1]==tagreg.exec(endrt[1])[1]){
restr = rts[i] + restr;
lastindex = i;
break;
}
}
}
return restr;
}
</script>
<script>
document.writeln('<textarea cols="100" rows="10">');
document.writeln(sub_html_restr('<xx><oo>嵌套标签截断测试</oo><fk><test>lala</test></fk></xx>', 5));
document.writeln(sub_html_restr('正常字符串测试', 5));
document.writeln(sub_html_restr('<xx>带标签的字符串截断</xx>', 5));
document.writeln(sub_html_restr('<xx><oo>嵌套标签截断测试</oo><fk>lala</fk></xx>', 5));
document.writeln(sub_html_restr('<xx><oo>嵌套标签<img src="
http://www.google.com/logo.gif" />截断测试</oo></xx>', 5));
document.writeln(sub_html_restr('<xx><oo>实体截断 测试测试</oo></xx>', 5));
document.writeln(sub_html_restr('<xx><oo>实体截断 测试测试</oo></xx>', 5));
document.writeln('</textarea>');
</script>
但是,这里有个bug,你没有存储标签的配对,所以遇到document.writeln(sub_html_restr('<xx><oo><oo a=1></oo>截断测试测试</oo></xx>', 5));这种就没办法了.