1 昨天做no_iter的时候,漏掉了forEach

2 addString可以复用旧字符串没错,但是也有局限,如果先放入'x'再放入'xx',虽然'x'是'xx'的一部分,却没有充分利用空间。所以修改了addString的方案,先收集字符串,在字符串收集完成后按照长度进行降序排序,然后进行统一addString,然后设置cache,这样后续使用就可以复用空间了。也确实让acll文件大小降低了
This commit is contained in:
a92126 2024-10-16 11:28:43 +08:00
parent 3ad07f892f
commit a095a748a3

View File

@ -19,7 +19,6 @@ const a = "a".charCodeAt();
const f = "f".charCodeAt();
const conv = new TextEncoder();
const noiter = process.env.OAFCHT_NO_ITER==='1';
const noreuse = process.env.OAFCHT_NO_REUSE==='1';
/**
* 这个函数做了一些数据复用处理能省下来一点文件容量
@ -27,9 +26,6 @@ const noreuse = process.env.OAFCHT_NO_REUSE==='1';
* 具体来说字符串表里面如果有两个字符串'1''11'
* 那么在字符串表里面只会有一个索引指向'11''1'
* 复用'11'的数据方案是索引的位置放到'11'的最后一个1前面
*
* 当然这样做有点吃cpu如果不希望使用这个方式可以配置
* 环境变量 OAFCHT_NO_REUSE=1 将会产生一个更快的过程
*/
const findIndex = (t, tr, tlen) => {
const rlen = tr.length, limit = tlen - rlen;
@ -51,7 +47,7 @@ const findIndex = (t, tr, tlen) => {
const read = (cht, info, order) => {
let state = WAIT_LOCK, token = [], line = 1;
let locks = [], currlock = null
let locks = [], currlock = null;
const retval = [];
const token_str = () => { const r = token.join(""); return (token = [], r); }
@ -313,12 +309,23 @@ const pack = (entlist, strlist, idxlist, cmdlist, xvalue) => {
view.setUint16(0, entlist.length, true);
// entry data
entlist.forEach((entry, index) => {
let offset = 2 + index * entrysize;
view.setUint32(offset, entry[0], true);
view.setUint32(offset + 4, entry[1] + (entry[0] > 0xffff ? cmdbase : idxbase), true);
view.setUint32(offset + 8, entry[2], true);
});
if( noiter === false ){
entlist.forEach((entry, index) => {
let offset = 2 + index * entrysize;
view.setUint32(offset, entry[0], true);
view.setUint32(offset + 4, entry[1] + (entry[0] > 0xffff ? cmdbase : idxbase), true);
view.setUint32(offset + 8, entry[2], true);
});
}
else {
for( let i=0; i < entlist.length; ++i ){
const entry = entlist[i];
let offset = 2 + i * entrysize;
view.setUint32(offset, entry[0], true);
view.setUint32(offset + 4, entry[1] + (entry[0] > 0xffff ? cmdbase : idxbase), true);
view.setUint32(offset + 8, entry[2], true);
}
}
// string table
let strings = new Uint8Array(result, strbase, strlist.length);
@ -337,35 +344,62 @@ const pack = (entlist, strlist, idxlist, cmdlist, xvalue) => {
const lade = (list, info, order) => {
let enttable = []; // 保存hole/key数据
let rootstr = [], names = [], dummy = [];
const makeID = (a, b) => a | b << 16;
const makeEntry = (id, str) => ({ id, data: str });
// collect
const collectEntry = (id, keys, list) => {
const collectEntry = noiter === false ? (id, keys, list, colname) => {
keys.forEach(({ name, cmd }, index) => {
list.push(name);
list[index] = name;
if( colname ) names.push( name );
enttable.push(makeEntry(makeID(id, index + 1), cmd));
});
} : (id, keys, list, colname) => {
for( let i=0; i < keys.length; ++i ){
const { name, cmd } = keys[i];
list[i] = name;
if( colname ) names.push( name );
enttable.push(makeEntry(makeID(id, i + 1), cmd));
}
};
if( noiter === false ){
list.forEach((hole, index) => {
rootstr.push(hole.name);
names.push(hole.name);
let holestr = new Array(hole.keys.length);
collectEntry(index + 1, hole.keys, holestr, hole.keys.length > 1);
let entry = makeEntry(makeID(index + 1, 0), holestr);
enttable.push(entry);
});
}
else {
for( let i=0; i < list.length; ++i ){
const hole = list[i];
rootstr.push(hole.name);
names.push(hole.name);
let holestr = new Array(hole.keys.length);
collectEntry(i + 1, hole.keys, holestr, hole.keys.length > 1);
let entry = makeEntry(makeID(i + 1, 0), holestr);
enttable.push(entry);
}
}
let rootstr = [];
list.forEach((hole, index) => {
rootstr.push(hole.name);
let holestr = [];
collectEntry(index + 1, hole.keys, holestr);
let entry = makeEntry(makeID(index + 1, 0), holestr);
enttable.push(entry);
});
enttable.push(makeEntry(makeID(0, 0), rootstr));
enttable.sort((a, b) => a.id - b.id);
names.sort( (a,b) => b.length - a.length );
let strtable = new Uint8Array(1024); // 保存字符串常量
let idxtable = new Uint16Array(128); // 从字符串常量索引出来的字符串列表
let cmdtable = new Uint32Array(4096); // 保存选项对应指令
const strOffsetCache = {};
const size = {"str": 0, "cmd": 0, "idx": 0};
const addString = list => {
const ut_pushtr = tr => {
@ -383,21 +417,17 @@ const lade = (list, info, order) => {
size.str = len;
return off;
};
const ut_addtr = noreuse ? (() => {
const cache = {};
return tr => {// 不要直接ut_pushtr因为有些秘籍会撑爆字符串表
if (cache[tr] != undefined) return cache[tr];
else {
const code = conv.encode(tr);
const off = ut_pushtr(code);
cache[tr] = off;
return off;
}
const ut_addtr = tr => {
if (strOffsetCache[tr] != undefined) {
return strOffsetCache[tr];
}
else {
const code = conv.encode(tr);
let off = findIndex(strtable, code, size.str);
off = off < 0 ? ut_pushtr(code) : off;
strOffsetCache[tr] = off;
return off;
}
})() : tr => {
const code = conv.encode(tr);
const off = findIndex(strtable, code, size.str);
return off < 0 ? ut_pushtr(code) : off;
};
// 避免使用迭代器方法减少CPU已走过performance对比流程
// ---------------原来的代码在这里------------------
@ -461,6 +491,7 @@ const lade = (list, info, order) => {
// 避免使用迭代器方法减少CPU已走过performance对比流程
// ---------------原来的代码在这里------------------
let entbytelist;
addString(names); //
if( noiter === false ){
entbytelist = enttable.map(entry => {
let { id, data } = entry;