mirror of
https://gitee.com/anod/open_agb_firm.git
synced 2025-05-06 13:54:09 +08:00
158 lines
3.7 KiB
JavaScript
158 lines
3.7 KiB
JavaScript
|
|
let cursor, tr;
|
|
|
|
const is_space = ch => " \r\t\n\H".includes(ch);
|
|
|
|
const eat_space = () => {while( is_space( tr.charAt(cursor) ) ) ++cursor;}
|
|
|
|
const match_next = (str, ignore_space) => {
|
|
let ch = next_char( ignore_space );
|
|
if( ch != str )
|
|
throw new SyntaxError(`dismatch symbol, expert ${str}, get ${ch} pos: ${tr.slice(cursor-10, cursor+20)}`)
|
|
}
|
|
|
|
const next_char = ignore_space => {
|
|
if( cursor == tr.length )
|
|
throw new SyntaxError(`invalid end of file`)
|
|
|
|
if( !ignore_space )
|
|
return tr.charAt( cursor++ );
|
|
else {
|
|
let ch = ""
|
|
do{
|
|
ch = tr.charAt(cursor++);
|
|
}while( is_space(ch) );
|
|
return ch;
|
|
}
|
|
}
|
|
|
|
const read_token = () => {
|
|
let chs = []
|
|
do{
|
|
let ch = peek_char(1)
|
|
if( is_space(ch) || ch == "=" || ch == ">" || ch == "/" )
|
|
break;
|
|
else chs.push( ch );
|
|
|
|
next_char();
|
|
} while( true );
|
|
if( chs.length == 0 )
|
|
throw new SyntaxError(`invalid token without any char`)
|
|
return chs.join("")
|
|
}
|
|
|
|
const peek_char = (n,ignore_space=0) => {
|
|
let t=cursor;
|
|
if( t == tr.length )
|
|
return "";
|
|
|
|
if( !ignore_space )
|
|
return tr.slice( t, t+n );
|
|
else {
|
|
let chs = []
|
|
while( n>0 ){
|
|
let ch = tr.charAt(t++);
|
|
if( is_space(ch) )
|
|
continue;
|
|
chs.push( ch );
|
|
n--;
|
|
}
|
|
return chs.join("")
|
|
}
|
|
}
|
|
|
|
const read_attr = () => {
|
|
match_next('"', 1);
|
|
let chs = [], ch = "";
|
|
do{
|
|
ch = next_char();
|
|
if( ch=="\\" && peek_char(1) == '\"' ){
|
|
chs.push( next_char() )
|
|
}
|
|
else if( ch != '"' ) chs.push( ch );
|
|
}while( ch != '"' );
|
|
return chs.join("")
|
|
}
|
|
|
|
const read_text = () => {
|
|
let ch = next_char(1);
|
|
let chs = [ch];
|
|
do{
|
|
ch = peek_char(1);
|
|
if( ch == "<" ) break;
|
|
chs.push( next_char() );
|
|
}while( true )
|
|
return chs.join("");
|
|
}
|
|
|
|
const read_node = () => {
|
|
let token, attr, children;
|
|
let result = {name: "", attributes: {}, children: []}, node_name = "";
|
|
match_next( "<", 1 );
|
|
node_name = read_token();
|
|
result.name = node_name;
|
|
|
|
let peekch = peek_char(1,1);
|
|
while( peekch != "/" && peekch != ">" ){
|
|
eat_space();
|
|
token = read_token();
|
|
if( peek_char(1) == "=" ){
|
|
next_char();
|
|
attr = read_attr();
|
|
result.attributes[token] = attr;
|
|
}
|
|
else {
|
|
result.attributes[token] = true;
|
|
}
|
|
|
|
peekch = peek_char(1,1);
|
|
}
|
|
if( peekch == "/" ){
|
|
match_next("/",1)
|
|
match_next(">")
|
|
return result;
|
|
}
|
|
else if( peekch == ">" ){
|
|
children = []
|
|
next_char(1)
|
|
}
|
|
else throw new SyntaxError(`should not come here ${peekch}`)
|
|
|
|
while( true ){
|
|
if( peek_char(2,1) == "</") {
|
|
next_char(1); next_char(1);
|
|
node_name = read_token();
|
|
match_next(">",1);
|
|
if( node_name != result.name )
|
|
throw new SyntaxError(`dismatch close tag for ${node_name}`)
|
|
else break;
|
|
}
|
|
else if( peek_char(1,1) == "<") {
|
|
children.push( read_node() );
|
|
}
|
|
else children.push( read_text() )
|
|
}
|
|
|
|
result.children = children;
|
|
return result;
|
|
}
|
|
|
|
const trim_comment = tr => {
|
|
let pos = tr.indexOf("<!--");
|
|
while( pos >= 0 ){
|
|
let end = tr.indexOf("-->", pos+4);
|
|
tr = tr.slice(0, pos) + tr.slice(end+3);
|
|
pos = tr.indexOf("<!--");
|
|
}
|
|
return tr;
|
|
}
|
|
|
|
const start = r => {
|
|
cursor = 0;
|
|
tr = r.replace(/<\?.*\?>/g, "").replace(/<!DOCTYPE.*>/g, "");
|
|
tr = trim_comment(tr);
|
|
return read_node();
|
|
}
|
|
|
|
module.exports = start
|