web-sniffer-js
Version:
Simple HTML Parser to JS Object
1 lines • 4.66 kB
JavaScript
var http=require("http");module.exports=function(){this.htmlObject={},this.clean=function(){this.htmlObject={}},this.clean_line=function(a){if(!a)return a;for(;-1!==a.indexOf(" ");)a=a.replace(" "," ");return a.replace("\t","").replace("\r","").trim()},this.parseWithLink=function(a="",b){let c="";http.get(a,d=>{d.setEncoding("utf8"),d.on("data",function(f){c+=f});var e=this;d.on("end",function(){c=c.replace("\n","").replace("\r",""),c=c.substring(c.indexOf("<html")),e.htmlObject=e.parseHtml(e.clean_line(c)),b(e.htmlObject)})})},this.parseWithFile=function(a="",b){var c=this.clean_line(a.replace("\n","").replace("\r",""));c=c.substring(c.indexOf("<html")),this.htmlObject=this.parseHtml(this.clean_line(c)),b(this.htmlObject)},this.formatTable=function(a){let b=[],c=[];if(a[0]&&"thead"==a[0].name&&a[0].next&&a[0].next[0]&&a[0].next[0].next)for(let d in a[0].next[0].next)b.push(a[0].next[0].next[d].value);if(a[1]&&"tbody"==a[1].name&&a[1].next)for(let d in a[1].next){let e=a[1].next[d],f={};if(e.next){for(let g in e.next)f[b[g]]=e.next[g].value;c.push(f)}}if(a[0]&&"div"==a[0].name&&-1!==a[0].content.indexOf("class=\"thead\"")){for(let d in a[0].next)b.push(a[0].next[d].value);for(let d=1;d<a.length;d++){let e=a[d],f={};if(e.next){for(let g in e.next)f[b[g]]=e.next[g].value;c.push(f)}}}return c},this.readSearchLine=function(a){var b=[];let c=a.split(";");for(let d in c){let e=c[d],f=e.split("|");if(0<f.length){let g={balise:f[0]};if(3==f.length||2==f.length&&-1!==f[1].indexOf("[")&&-1!==f[1].indexOf("]")){let h=f[1].slice(f[1].indexOf("[")+1,f[1].indexOf("]"));g.content=h.split(",")}3==f.length&&-1!==f[2].indexOf("{")&&-1!==f[2].indexOf("}")&&(g.position=parseInt(f[2].slice(f[2].indexOf("{")+1,f[2].indexOf("}")))),2==f.length&&-1!==f[1].indexOf("{")&&-1!==f[1].indexOf("}")&&(g.position=parseInt(f[1].slice(f[1].indexOf("{")+1,f[1].indexOf("}")))),b.push(g)}}return b},this.execSearchFromObj=function(a={},b={}){for(let c in browseObj=a,b){let d=b[c],e=0;d.position&&(e=d.position);let f=-1;for(let g in browseObj){let h=browseObj[g];if(!h)break;if(d.balise==h.name)if(d.content){let j=0;for(let k in d.content)for(let l in h.content)if(d.content[k]==h.content[l]){j++;break}j==d.content.length&&f++}else f++;if(f==e){browseObj=browseObj[g].next;break}}if(f!=e)return!1}return browseObj},this.recusrsiveBrowseObjectSearch=function(a={},b=[]){let c;for(let d in a){let e=a[d],f=this.execSearchFromObj([e],b);if(!1!==f)return f;if(e.next&&(c=this.recusrsiveBrowseObjectSearch(e.next,b),!1!==c))return c}return!1},this.search=function(a=""){var b=this.readSearchLine(a),c=this.recusrsiveBrowseObjectSearch(this.htmlObject,b);return c},this.parseHtml=function(a="",b=0){a=this.clean_line(a);let d=[];if(0>b||0>=a.length||-1===a.indexOf("<"))return null;for(a=a.slice(a.indexOf("<"));0===a.indexOf("<!--");)a=a.slice(a.indexOf("-->")+3);for(a=a.slice(a.indexOf("<"));0===a.indexOf("<script");)a=a.slice(a.indexOf("</script>")+9),a=a.slice(a.indexOf("<"));let e=this.getBaliseInfo(a);if(-1!=e.index){let f=e.index,g=0;if("script"!=e.balise.name){if(!1==e.balise.is_end){g=this.getPosEndOfBalise(a,e),f=g+3+e.balise.name.length;let h=a.slice(e.index+1,g);-1==this.getBaliseInfo(h).index&&(e.balise.value=h),e.balise.next=this.parseHtml(h,b+1),d.push(e.balise);let j=this.parseHtml(a.slice(f),b+1);if(j)for(let k=0;k<j.length;k++)d.push(j[k])}else{f=e.balise.pos_end,d.push(e.balise);let h=this.parseHtml(a.slice(e.balise.pos_end),b+1);if(h)for(let j=0;j<h.length;j++)d.push(h[j])}return d}return null}},this.getPosEndOfBalise=function(a="",b){let c=b.balise.name.length+1,d=a.slice(c),e=0;for(;0<d.length;){let f=d.indexOf("</"+b.balise.name),g=d.indexOf("<"+b.balise.name);if(g<f&&-1!==g)e--,c+=g+1+b.balise.name.length;else{if(e++,1==e)return f+c;c+=3+b.balise.name.length+f}d=a.slice(c)}},this.getBaliseInfo=function(a=""){let b=a.indexOf("<"),c=a.indexOf(" "),d=a.indexOf(">"),e=a.indexOf("/>"),f=!0;(e>d||-1===e)&&(e=d,f=!1),(e<c||-1===c)&&(c=e);let g=a.slice(b+1,c);-1!==["link","input"].indexOf(g)&&(f=!0);let h=a.slice(b+1+g.length,e).trim(),j=null;if(""!=h){j=h.split(" ");let n=[],o=!1,p="";for(let q=0;q<j.length;q++)-1===j[q].indexOf("\"")&&!1==o?n.push(j[q]):j[q].indexOf("\"")!=j[q].lastIndexOf("\"")&&!1==o?n.push(j[q]):(-1!==j[q].indexOf("\"")&&!1==o&&(o=!0),p+=" "+j[q],j[q].lastIndexOf("\"")&&!0==o&&j[q].lastIndexOf("\"")==j[q].length-1&&(o=!1,n.push(p.trim()),p=""));j=n}let k={name:g,content:j,is_end:f};!0==k.is_end&&(k.pos_end=e);var l=a.substring(a.indexOf("</"+k.name+">")+3+k.name.length),m=l.trim();return-1!==l.indexOf("<")&&(m=l.substring(0,l.indexOf("<")).trim()),0<m.length&&(k.str=m),{index:d,balise:k}}};