a92126 9acee9b827 一句代码让rust版本提速16%
其实就是补缺一句capacity的内容
2025-04-03 15:15:40 +08:00

787 lines
22 KiB
Rust

mod utf8view;
use std::fs;
use serde_json::Value;
use byteorder::{LittleEndian, ByteOrder};
use std::collections::BTreeMap;
use utf8view::Utf8CharIter;
type Module = Vec<u8>;
type LockKey = Vec<String>;
type LockHole = (String, Vec< LockKey >);
#[derive(Default)]
struct Lock {
name: String,
holes: Vec< LockHole >,
}
#[derive(Debug)]
enum ParserState {
WaitLock,
ReadLock,
WaitHole,
ReadHole,
ReadKey,
WaitPart,
NeedPart,
}
const PASSHOLE: [&str; 2] = ["text", "off"];
const HEX: [u8; 22] = [b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b'a', b'A', b'b', b'B', b'c', b'C', b'd', b'D', b'e', b'E', b'f', b'F'];
struct ParserCtx<'a>{
state: ParserState,
token: Vec<u8>,
line: u32,
file: &'a String,
code: u32,
locks: Vec< Lock >,
currlock: Option<Lock>,
currhole: Option<LockHole>,
output: &'a mut Vec< (u32, Module) >,
}
fn token_str( ctx: &mut ParserCtx ) -> String {
if ctx.token.len() == 0 {
String::from("")
}
else {
let retval = String::from_utf8(ctx.token.clone()).unwrap();
ctx.token.clear();
retval
}
}
fn error( ctx: &ParserCtx, str: String ) {
if ctx.currlock.is_some() {
panic!("{} at line: {} in lock {:?} [{}]", str, ctx.line, ctx.currlock.iter().next().unwrap().name, ctx.file);
}
else {
panic!("{} at line: {} in lock {:?} [{}]", str, ctx.line, "<nolock>".to_string(), ctx.file);
}
}
enum EntData<'a> {
Names(Vec<&'a String>),
Cmds(&'a Vec<LockKey>),
}
struct LadeCtx<'a> {
strtable: Vec<u8>,
idxtable: Vec<u16>,
cmdtable: Vec<u32>,
strcache: BTreeMap<&'a String, u16>,
}
#[inline(always)]
fn make_id( a: usize, b: usize ) -> u32 { (a | b<<16).try_into().unwrap() }
fn find_index( t: &Vec<u8>, tr: &[u8] ) -> i16 {
let rlen = tr.len();
if rlen > t.len() { return -1; }
let limit = t.len() - rlen;
for i in 0..limit {
if t[i + rlen] != 0 {
continue
}
let mut eq = true;
for j in 0..rlen {
if t[i+j] != tr[j] {
eq = false;
break;
}
}
if eq { return i.try_into().unwrap(); }
}
-1
}
fn ut_push( t: &mut Vec<u8>, tr: &[u8] ) -> u16 {
let off = t.len();
t.reserve( tr.len() + 1 );
t.extend_from_slice(tr);
t.push(0);
off.try_into().unwrap()
}
fn add_string<'a>( list: &Vec<&'a String>, ctx: &mut LadeCtx<'a> ) -> Vec<u16> {
let mut ret: Vec<u16> = Vec::with_capacity(list.len());
for tr in list.iter() {
let res = ctx.strcache.get(tr);
if let Some(pos) = res {
ret.push( *pos );
}
else {
let code: &[u8] = tr.as_bytes();
let off: i16 = find_index(&ctx.strtable, code);
let res: u16 = if off < 0 { ut_push(&mut ctx.strtable, code) } else { off.try_into().unwrap() };
ctx.strcache.insert(tr, res);
ret.push( res );
}
}
return ret;
}
fn add_str_index( list: &Vec<u16>, ctx: &mut LadeCtx ) -> u32 {
if list.len() == 0 { return 0; }
if ctx.strtable.len() > 0xffff {
panic!("string table is too big.")
}
let ret: u32 = ctx.idxtable.len().try_into().unwrap();
ctx.idxtable.reserve( list.len() );
ctx.idxtable.extend( list.iter() );
return ret*2;
}
fn assemble_cheat( list: &Vec<LockKey>, dup: &mut BTreeMap<u32, u16>, hole: u16, order: &String ) -> Vec<u32> {
let from_t_addr = |taddr| {
match u32::from_str_radix(taddr, 16) {
Ok(n) => {
if n == 0 || n > 0x41fffff {
panic!("get an invalid address: {}", taddr);
}
if n < 0x3ffff { return n | 0x200_0000; }
else if 0 == (n & 0xf000000) { return (n&0xffff) | 0x3000000; }
else { return n; }
},
Err(_e) => panic!("get an invalid address: {} in {}", taddr, order),
}
};
let mut addrval: BTreeMap<u32, u32> = BTreeMap::new();
for command in list.iter() {
if command.len() == 0 { continue; }
let mut iter = command.iter();
let addr = from_t_addr( iter.next().unwrap() );
let cnt = (command.len() - 1).try_into().unwrap();
for pos in 0..cnt {
let r = iter.next().unwrap();
match u32::from_str_radix(r, 16) {
Ok(val) => { addrval.insert(addr + pos, val); },
Err(_e) => panic!("cannot get valid cheat val {} in {}", r, order),
}
}
}
let mut ordered: Vec<(u32, u32)> = addrval.iter().map(|(&a,&b)| (a,b)).collect();
ordered.sort_by( |a, b| a.0.cmp(&b.0) );
let mut blocks: Vec<u32> = Vec::new();
let mut curr: (u32, u32, u32) = (0, 0, 0);
for (addr, value) in ordered.iter() {
dup.insert(*addr, hole);
if curr.2 == 0 {
curr = (*addr, *value, 1);
}
else {
if curr.1 == *value && curr.0 + curr.2 == *addr {
curr.2 = curr.2 + 1;
}
else {
blocks.push( curr.0 );
blocks.push( (curr.2<<8) | curr.1 );
curr = (*addr, *value, 1);
}
}
}
if curr.2 != 0 {
blocks.push( curr.0 );
blocks.push( (curr.2<<8) | curr.1 );
}
return blocks;
}
fn add_command( list: &Vec<u32>, ctx: &mut LadeCtx ) -> u32 {
let ret:u32 = ctx.cmdtable.len().try_into().unwrap();
ctx.cmdtable.reserve( list.len() );
ctx.cmdtable.extend( list.iter() );
return ret * 4;
}
fn lade( list: &Vec<Lock>, code: u32, order: &String ) -> (u32, Module) {
let mut rootstr: Vec<&String> = Vec::new();
let mut names: Vec<&String> = Vec::new();
let mut enttable: Vec<(u32, EntData)> = Vec::new();
for (i, lock) in list.iter().enumerate() {
rootstr.push( &lock.name );
names.push( &lock.name );
let mut holestr: Vec<&String> = Vec::with_capacity( lock.holes.len() );
let colname = lock.holes.len() > 1;
for (index, (name, cmd)) in lock.holes.iter().enumerate() {
holestr.push( name );
if colname { names.push( name ); }
enttable.push( (make_id(i+1, index+1), EntData::Cmds(&cmd)) );
}
enttable.push( (make_id(i+1, 0), EntData::Names(holestr)) );
}
enttable.push( (make_id(0, 0), EntData::Names(rootstr) ) );
if enttable.len() > 0xffff {
panic!("entry count is overflow in file {}", order);
}
enttable.sort_by( |a, b| a.0.cmp( &b.0 ) );
names.sort_by( |a, b| if a.len() == b.len() { a.cmp(&b) } else { b.len().cmp( &a.len() ) } );
let mut ctx: LadeCtx = LadeCtx{
strtable: Vec::new(),
idxtable: Vec::new(),
cmdtable: Vec::new(),
strcache: BTreeMap::new(),
};
add_string(&names, &mut ctx);
let mut entbytelist: Vec<(u32, u32, u32)> = Vec::with_capacity( enttable.len() );
let emptylist: Vec<&String> = Vec::new();
let mut holedup: BTreeMap<u32, u16> = BTreeMap::new();
for (id, data) in enttable.iter() {
match data {
EntData::Names( names ) => {
let idxlist = add_string( if *id == 0 { names } else { if names.len() > 1 { names } else { &emptylist } }, &mut ctx );
let location = add_str_index( &idxlist, &mut ctx );
entbytelist.push( (*id, location, idxlist.len().try_into().unwrap()) );
},
EntData::Cmds( cmds ) => {
let armbytecode = assemble_cheat( cmds, &mut holedup, (id & 0xffff).try_into().unwrap(), &order );
let location = add_command( &armbytecode, &mut ctx );
entbytelist.push( (*id, location, armbytecode.len().try_into().unwrap()) );
},
}
}
let mut xv = code;
for cmd in ctx.cmdtable.iter() {
xv = xv ^ cmd;
}
let bin = pack( entbytelist, &ctx.strtable, &ctx.idxtable, &ctx.cmdtable );
return (xv, bin);
}
#[inline(always)]
fn align( n: usize, base: usize ) -> usize {
let ext = n % base;
if ext == 0 { n } else { n + base - ext }
}
fn pack( entlist: Vec<(u32,u32,u32)>, strlist: &Vec<u8>, idxlist: &Vec<u16>, cmdlist: &Vec<u32> ) -> Module {
let entrysize: usize = 12;
let strbase = 2 + entlist.len() * entrysize;
let idxbase = strbase + strlist.len();
let cmdbase = idxbase + idxlist.len() * 2;// u16=2B
let nonalign_size = cmdbase + cmdlist.len() * 4;// u32=4B
let size = align(nonalign_size, 32);
let mut result: Module = vec![0u8; size];
LittleEndian::write_u16(&mut result[0..2], entlist.len().try_into().unwrap());
for (i, entry) in entlist.iter().enumerate() {
let offset = 2 + i * entrysize;
let locbase:u32;
if entry.0 > 0xffff {
locbase = cmdbase.try_into().unwrap();
}
else {
locbase = idxbase.try_into().unwrap();
}
LittleEndian::write_u32(&mut result[offset..offset+4], entry.0);
LittleEndian::write_u32(&mut result[offset+4..offset+8], locbase + entry.1);
LittleEndian::write_u32(&mut result[offset+8..offset+12], entry.2);
}
// string table
result[strbase..idxbase].copy_from_slice( &strlist );
// idxes table
LittleEndian::write_u16_into(idxlist, &mut result[idxbase..cmdbase]);
// instruction table
LittleEndian::write_u32_into(cmdlist, &mut result[cmdbase..nonalign_size]);
return result;
}
fn incr( ch: &[u8], ctx: &mut ParserCtx ) {
match ch[0] {
b'[' => {
match ctx.state {
ParserState::WaitLock | ParserState::ReadHole | ParserState::NeedPart => {
ctx.state = ParserState::ReadLock;
if ctx.currlock.is_some() {
let currlock = ctx.currlock.take().unwrap();
ctx.locks.push( currlock );
}
let tmp = Lock {
name: "".to_string(),
holes: Vec::new(),
};
ctx.currlock = Some( tmp );
},
ParserState::WaitPart => {},
_ => error( ctx, format!("error occur [ on {:?}", ctx.state) ),
}
},
b']' => {
match ctx.state {
ParserState::ReadLock => {
ctx.state = ParserState::WaitHole;
let name = token_str(ctx);
if name.to_lowercase() == "gameinfo" {
ctx.output.push( lade(&ctx.locks, ctx.code, &ctx.file) );
ctx.state = ParserState::WaitPart;
ctx.locks = Vec::new();
ctx.currhole = None;
ctx.currlock = None;
}
else if !name.is_empty() {
ctx.currlock = Some( Lock {
name: name.to_string(),
holes: Vec::new(),
});
ctx.currhole = None;
}
else {
panic!("empty lock name {}", ctx.file);
}
},
ParserState::WaitPart => {},
ParserState::NeedPart => {
ctx.state = ParserState::WaitPart;
},
_ => error( ctx, format!("error occur ] on {:?}", ctx.state) ),
}
},
b'\r' | b'\n' => {
match ctx.state {
ParserState::WaitHole => {
ctx.state = ParserState::ReadHole;
},
ParserState::ReadKey => {
let token = token_str(ctx);
let mut pass = false;
if let Some(hole) = &mut ctx.currhole {
let (name, ref mut keys) = hole;
if !token.is_empty() {
if let Some(key) = keys.last_mut() {
key.push( token );
}
}
if PASSHOLE.iter().any( |&s| s == name.to_lowercase() ) {
pass = true;
}
}
if pass == false {
if let Some(lock) = &mut ctx.currlock {
lock.holes.push( ctx.currhole.take().unwrap() );
}
}
ctx.state = ParserState::ReadHole;
},
ParserState::ReadLock => {
error(ctx, format!("error occur newline on {:?}", ctx.state));
},
ParserState::ReadHole => {
if ctx.token.len() > 0 && ctx.token.iter().any( |c| *c != b' ' && *c != b'\t' ) {
error(ctx, format!("error occur newline on {:?}", ctx.state));
}
else {
ctx.token.clear();
}
},
ParserState::WaitPart => {
ctx.state = ParserState::NeedPart;
},
_ => {},
}
if ch[0] == b'\n' {
ctx.line += 1;
}
},
b'=' => {
match ctx.state {
ParserState::ReadHole => {
ctx.state = ParserState::ReadKey;
ctx.currhole = Some( (token_str(ctx), vec![Vec::new()]) );
},
ParserState::ReadLock => {
ctx.token.extend( ch );
},
ParserState::WaitPart => {},
ParserState::NeedPart => {
ctx.state = ParserState::WaitPart;
},
_ => error( ctx, format!("error occur = on {:?}", ctx.state) ),
}
},
b',' => {
match ctx.state {
ParserState::ReadKey => {
if ctx.token.len() > 0 {
let token = token_str(ctx);
ctx.currhole.as_mut().map(
|hole| {
let (_name, ref mut keys) = hole;
if !token.is_empty() {
if let Some(key) = keys.last_mut() {
key.push( token );
}
}
true
}
);
}
},
ParserState::ReadHole => {
// 秘籍数据需要换行才能写完的情况
let token = token_str(ctx);
ctx.currlock.as_mut().map(
|lock| {
ctx.currhole = lock.holes.pop().map(
|hole| {
let (name, mut keys) = hole;
if !token.is_empty() {
if let Some(key) = keys.last_mut() {
key.push( token );
}
}
(name, keys)
}
);
}
);
ctx.state = ParserState::ReadKey;
}
ParserState::ReadLock => {
ctx.token.extend( ch );
},
ParserState::WaitPart => {},
ParserState::NeedPart => {
ctx.state = ParserState::WaitPart;
},
_ => error( ctx, format!("error occur , on {:?}", ctx.state) ),
}
},
b';' => {
match ctx.state {
ParserState::ReadKey => {
let token = token_str(ctx);
ctx.currhole.as_mut().map(
|hole| {
let (_name, ref mut keys) = hole;
if !token.is_empty() {
if let Some(key) = keys.last_mut() {
key.push( token );
}
}
keys.push( Vec::new() );
true
}
);
},
ParserState::ReadLock => {
ctx.token.extend( ch );
},
ParserState::ReadHole => {
let token = token_str(ctx);
ctx.currlock.as_mut().map(
|lock| {
ctx.currhole = lock.holes.pop().map(
|hole| {
let (name, mut keys) = hole;
if !token.is_empty() {
if let Some(key) = keys.last_mut() {
key.push( token );
}
}
keys.push( Vec::new() );
(name, keys)
}
);
}
);
ctx.state = ParserState::ReadKey;
},
ParserState::WaitPart => {},
ParserState::NeedPart => {
ctx.state = ParserState::WaitPart;
},
_ => error(ctx, format!("error occur ; on {:?}", ctx.state)),
}
},
b' ' => {
match ctx.state {
ParserState::ReadLock | ParserState::ReadHole => {
ctx.token.extend( ch );
},
ParserState::NeedPart => {
ctx.state = ParserState::WaitPart;
},
_ => {},
}
},
_ => {
match ctx.state {
ParserState::ReadLock | ParserState::ReadHole => {
ctx.token.extend( ch );
},
ParserState::ReadKey => {
if HEX.contains(&ch[0]) == true {
ctx.token.extend( ch );
}
else if let Some(hole) = &ctx.currhole {
let (name, _key) = hole;
if name == "text" {
ctx.token.extend( ch );
}
}
else {
error(ctx, format!("error occur {:?} on {:?}", ch, ctx.state));
}
},
ParserState::WaitPart => {},
ParserState::NeedPart => {
ctx.state = ParserState::WaitPart;
},
_ => error(ctx, format!("error occur {:?} on {:?}", ch, ctx.state)),
}
},
}
}
fn done(ctx: &mut ParserCtx) {
match ctx.state {
ParserState::WaitPart | ParserState::NeedPart => {},
_ => error( ctx, format!("error occur eof on {:?}", ctx.state) ),
}
}
fn parse( data: Vec<u8>, serial: &String, order: &String ) -> Vec< (u32, Module) > {
let mut ret: Vec<(u32, Module)> = Vec::new();
let mut context = ParserCtx {
state: ParserState::WaitLock,
token: Vec::new(),
line: 1,
file: order,
code: {
let mut r:u32 = 0;
for v in serial.as_bytes().iter() {
let u:u32 = *v as u32;
r = u | r << 8;
}
r
},
locks: Vec::new(),
currlock: None,
currhole: None,
output: &mut ret,
};
let iter = Utf8CharIter::new(&data);
for ch in iter {
incr( ch, &mut context );
}
done( &mut context );
return ret;
}
fn loadlist() -> Vec<(String,String)> {
let mut retval: Vec<(String, String)> = Vec::new();
let file_content = fs::read_to_string("./serial.json")
.expect("Unable to read file");
let json_data: Value = serde_json::from_str(&file_content)
.expect("JSON was not well-formatted");
if let Value::Object(map) = json_data {
retval.reverse( map.len() );
for (key, value) in map.iter() {
if let Value::String(serial) = value {
if serial != "????" {
retval.push( (key.to_string(), serial.to_string()) );
}
}
}
} else {
panic!("serial.json with invalid format.");
}
return retval;
}
fn transform<'a>( list: &'a Vec<(String, String)> ) -> Vec<(&'a String, Vec<(u32, Module)>)> {
let mut retval: Vec<(&String, Vec<(u32, Module)>)> = Vec::new();
for (order, serial) in list.iter() {
let file = format!("./gba/{}.u8", order);
let hasfile = fs::exists(&file).expect("error for exists");
if hasfile == false {
retval.push( (serial, vec![]) );
}
else {
let chtdata = fs::read(file).unwrap_or( vec![] );
if chtdata.len() > 0 {
let cheats = parse( chtdata, serial, order );
retval.push( (serial, cheats) );
}
else {
//println!("bad cheat");
retval.push( (serial, vec![]) );
}
}
}
return retval;
}
fn format<'a>( mut cheats: Vec<(&'a String, Vec<(u32, Module)>)> ) -> Vec<u8> {
cheats.sort_by( |a, b| a.0.cmp(&b.0) );
let (sers, offs, chtc, maxl) = {
let mut ret: (Vec<u8>, Vec<u16>, usize, usize) = (vec![], vec![], 0, 0);
let mut r4: &str = "";
let mut last: usize = 0;
for game in cheats.iter() {
let (serial, cheat) = game;
let val = serial.get(0..3).expect("not valid serial");
if r4.ne(val) {
if ret.1.len() > 0 && ret.2 - last > ret.3 {
ret.3 = ret.2 - last;
}
ret.0.extend( val.as_bytes() );
ret.1.push( ret.2.try_into().unwrap() );
last = ret.2;
r4 = val;
}
ret.2 = ret.2 + cheat.len();
}
if ret.1.len() > 0 {
if ret.2 - last > ret.3 {
ret.3 = ret.2 - last;
}
ret.1.push( ret.2.try_into().unwrap() );
}
ret
};
let (cheats, expanded, _) = {
let mut ret: (Vec<u32>, Vec<u8>, usize) = (vec![], vec![], align(8 + sers.len() + offs.len()*2 + chtc * 8, 32));
for game in cheats.iter_mut() {
let (serial, cheat) = game;
let val = serial.chars().nth(3).expect("invalid serial");
cheat.sort_by( |a, b| a.0.cmp(&b.0) );
for (id, bin) in cheat.iter() {
let off: u32 = (ret.2 + ret.1.len()).try_into().unwrap();
ret.0.push( (val as u32) | (off << 3) );
ret.0.push( *id );
ret.1.extend( bin );
}
}
ret
};
println!("name: {} cheats: {} maxl: {}", sers.len(), chtc, maxl);
let serialbase = 8;
let offsetbase = serialbase + sers.len();
let cheatbase = offsetbase + offs.len() * 2;
let expandbase = align( cheatbase + cheats.len() * 4, 32 );
let total = expandbase + expanded.len();
let mut output: Vec<u8> = vec![0u8; total];
output[0..4].copy_from_slice(&['A' as u8, 'C' as u8, 'L' as u8, 1]);
LittleEndian::write_u16(&mut output[4..6], (sers.len() / 3).try_into().unwrap());
LittleEndian::write_u16(&mut output[6..8], maxl.try_into().unwrap());
output[serialbase..offsetbase].copy_from_slice( &sers );
LittleEndian::write_u16_into(&offs, &mut output[offsetbase..cheatbase]);
LittleEndian::write_u32_into(&cheats, &mut output[cheatbase..cheatbase+cheats.len()*4]);
output[expandbase..expandbase+expanded.len()].copy_from_slice( &expanded );
return output;
}
fn main() {
let list = loadlist();
let roms = {
let ret = transform( &list );
println!("all rom has {}", ret.len());
let mut idx: BTreeMap<&String, Vec<(u32, Module)>> = BTreeMap::new();
for (serial, cheat) in ret.into_iter() {
if cheat.len() == 0 {
continue;
}
idx.entry(serial).or_insert_with(Vec::new).extend( cheat );
}
idx.into_iter().collect()
};
let content = format( roms );
let _ = fs::write("gba.acl", content);
}
/*
// file: utf8view.rs
// author: ali-lingma
use std::iter::FusedIterator;
/// 定义一个迭代器结构体,用于从 Vec<u8> 中按 UTF-8 编码迭代每个 Unicode 字符
pub struct Utf8CharIter<'a> {
data: &'a [u8],
index: usize,
}
impl<'a> Utf8CharIter<'a> {
/// 创建一个新的迭代器
pub fn new(data: &'a [u8]) -> Self {
Utf8CharIter { data, index: 0 }
}
}
impl<'a> Iterator for Utf8CharIter<'a> {
type Item = &'a [u8];
fn next(&mut self) -> Option<Self::Item> {
if self.index >= self.data.len() {
return None;
}
let start = self.index;
let first_byte = self.data[self.index];
let end = if first_byte <= 0x7f {
start + 1
} else if first_byte <= 0xdf {
start + 2
} else if first_byte <= 0xef {
start + 3
} else {
start + 4
};
if end > self.data.len() {
return None;
}
let result = Some(&self.data[start..end]);
self.index = end;
result
}
}
impl<'a> FusedIterator for Utf8CharIter<'a> {}
*/