158 lines
3.7 KiB
JavaScript

let cursor, tr;
const is_space = ch => " \r\t\n\H".includes(ch);
const eat_space = () => {while( is_space( tr.charAt(cursor) ) ) ++cursor;}
const match_next = (str, ignore_space) => {
let ch = next_char( ignore_space );
if( ch != str )
throw new SyntaxError(`dismatch symbol, expert ${str}, get ${ch} pos: ${tr.slice(cursor-10, cursor+20)}`)
}
const next_char = ignore_space => {
if( cursor == tr.length )
throw new SyntaxError(`invalid end of file`)
if( !ignore_space )
return tr.charAt( cursor++ );
else {
let ch = ""
do{
ch = tr.charAt(cursor++);
}while( is_space(ch) );
return ch;
}
}
const read_token = () => {
let chs = []
do{
let ch = peek_char(1)
if( is_space(ch) || ch == "=" || ch == ">" || ch == "/" )
break;
else chs.push( ch );
next_char();
} while( true );
if( chs.length == 0 )
throw new SyntaxError(`invalid token without any char`)
return chs.join("")
}
const peek_char = (n,ignore_space=0) => {
let t=cursor;
if( t == tr.length )
return "";
if( !ignore_space )
return tr.slice( t, t+n );
else {
let chs = []
while( n>0 ){
let ch = tr.charAt(t++);
if( is_space(ch) )
continue;
chs.push( ch );
n--;
}
return chs.join("")
}
}
const read_attr = () => {
match_next('"', 1);
let chs = [], ch = "";
do{
ch = next_char();
if( ch=="\\" && peek_char(1) == '\"' ){
chs.push( next_char() )
}
else if( ch != '"' ) chs.push( ch );
}while( ch != '"' );
return chs.join("")
}
const read_text = () => {
let ch = next_char(1);
let chs = [ch];
do{
ch = peek_char(1);
if( ch == "<" ) break;
chs.push( next_char() );
}while( true )
return chs.join("");
}
const read_node = () => {
let token, attr, children;
let result = {name: "", attributes: {}, children: []}, node_name = "";
match_next( "<", 1 );
node_name = read_token();
result.name = node_name;
let peekch = peek_char(1,1);
while( peekch != "/" && peekch != ">" ){
eat_space();
token = read_token();
if( peek_char(1) == "=" ){
next_char();
attr = read_attr();
result.attributes[token] = attr;
}
else {
result.attributes[token] = true;
}
peekch = peek_char(1,1);
}
if( peekch == "/" ){
match_next("/",1)
match_next(">")
return result;
}
else if( peekch == ">" ){
children = []
next_char(1)
}
else throw new SyntaxError(`should not come here ${peekch}`)
while( true ){
if( peek_char(2,1) == "</") {
next_char(1); next_char(1);
node_name = read_token();
match_next(">",1);
if( node_name != result.name )
throw new SyntaxError(`dismatch close tag for ${node_name}`)
else break;
}
else if( peek_char(1,1) == "<") {
children.push( read_node() );
}
else children.push( read_text() )
}
result.children = children;
return result;
}
const trim_comment = tr => {
let pos = tr.indexOf("<!--");
while( pos >= 0 ){
let end = tr.indexOf("-->", pos+4);
tr = tr.slice(0, pos) + tr.slice(end+3);
pos = tr.indexOf("<!--");
}
return tr;
}
const start = r => {
cursor = 0;
tr = r.replace(/<\?.*\?>/g, "").replace(/<!DOCTYPE.*>/g, "");
tr = trim_comment(tr);
return read_node();
}
module.exports = start