使用utf8_view进行遍历,x86已经追上了zig

This commit is contained in:
a92126 2024-11-29 18:01:07 +08:00
parent e17f8a4dd6
commit 91548c067e

View File

@ -1,8 +1,10 @@
mod utf8view;
use std::fs; use std::fs;
use serde_json::Value; use serde_json::Value;
use byteorder::{LittleEndian, ByteOrder}; use byteorder::{LittleEndian, ByteOrder};
use std::collections::HashMap;
use std::collections::BTreeMap; use std::collections::BTreeMap;
use utf8view::Utf8CharIter;
type Module = Vec<u8>; type Module = Vec<u8>;
type LockKey = Vec<String>; type LockKey = Vec<String>;
@ -25,11 +27,11 @@ enum ParserState {
NeedPart, NeedPart,
} }
const PASSHOLE: [&str; 2] = ["text", "off"]; const PASSHOLE: [&str; 2] = ["text", "off"];
const HEX: &str = "0123456789abcdefABCDEF"; const HEX: [u8; 22] = [b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b'a', b'A', b'b', b'B', b'c', b'C', b'd', b'D', b'e', b'E', b'f', b'F'];
struct ParserCtx<'a>{ struct ParserCtx<'a>{
state: ParserState, state: ParserState,
token: Vec<char>, token: Vec<u8>,
line: u32, line: u32,
file: &'a String, file: &'a String,
@ -44,10 +46,10 @@ struct ParserCtx<'a>{
fn token_str( ctx: &mut ParserCtx ) -> String { fn token_str( ctx: &mut ParserCtx ) -> String {
if ctx.token.len() == 0 { if ctx.token.len() == 0 {
"".to_string() String::from("")
} }
else { else {
let retval = String::from_iter( &ctx.token ); let retval = String::from_utf8(ctx.token.clone()).unwrap();
ctx.token.clear(); ctx.token.clear();
retval retval
} }
@ -67,11 +69,11 @@ enum EntData<'a> {
Cmds(&'a Vec<LockKey>), Cmds(&'a Vec<LockKey>),
} }
struct LadeCtx { struct LadeCtx<'a> {
strtable: Vec<u8>, strtable: Vec<u8>,
idxtable: Vec<u16>, idxtable: Vec<u16>,
cmdtable: Vec<u32>, cmdtable: Vec<u32>,
strcache: HashMap<String, u16>, strcache: BTreeMap<&'a String, u16>,
} }
#[inline(always)] #[inline(always)]
@ -107,10 +109,10 @@ fn ut_push( t: &mut Vec<u8>, tr: &[u8] ) -> u16 {
off.try_into().unwrap() off.try_into().unwrap()
} }
fn add_string( list: &Vec<&String>, ctx: &mut LadeCtx ) -> Vec<u16> { fn add_string<'a>( list: &Vec<&'a String>, ctx: &mut LadeCtx<'a> ) -> Vec<u16> {
let mut ret: Vec<u16> = Vec::with_capacity(list.len()); let mut ret: Vec<u16> = Vec::with_capacity(list.len());
for tr in list.iter() { for tr in list.iter() {
let res = ctx.strcache.get(&tr.to_string()); let res = ctx.strcache.get(tr);
if let Some(pos) = res { if let Some(pos) = res {
ret.push( *pos ); ret.push( *pos );
} }
@ -118,7 +120,7 @@ fn add_string( list: &Vec<&String>, ctx: &mut LadeCtx ) -> Vec<u16> {
let code: &[u8] = tr.as_bytes(); let code: &[u8] = tr.as_bytes();
let off: i16 = find_index(&ctx.strtable, code); let off: i16 = find_index(&ctx.strtable, code);
let res: u16 = if off < 0 { ut_push(&mut ctx.strtable, code) } else { off.try_into().unwrap() }; let res: u16 = if off < 0 { ut_push(&mut ctx.strtable, code) } else { off.try_into().unwrap() };
ctx.strcache.insert(tr.to_string(), res); ctx.strcache.insert(tr, res);
ret.push( res ); ret.push( res );
} }
} }
@ -173,6 +175,7 @@ fn assemble_cheat( list: &Vec<LockKey>, dup: &mut BTreeMap<u32, u16>, hole: u16,
let mut ordered: Vec<(u32, u32)> = addrval.iter().map(|(&a,&b)| (a,b)).collect(); let mut ordered: Vec<(u32, u32)> = addrval.iter().map(|(&a,&b)| (a,b)).collect();
ordered.sort_by( |a, b| a.0.cmp(&b.0) ); ordered.sort_by( |a, b| a.0.cmp(&b.0) );
let mut blocks: Vec<u32> = Vec::new(); let mut blocks: Vec<u32> = Vec::new();
let mut curr: (u32, u32, u32) = (0, 0, 0); let mut curr: (u32, u32, u32) = (0, 0, 0);
for (addr, value) in ordered.iter() { for (addr, value) in ordered.iter() {
@ -235,11 +238,12 @@ fn lade( list: &Vec<Lock>, code: u32, order: &String ) -> (u32, Module) {
names.sort_by( |a, b| if a.len() == b.len() { a.cmp(&b) } else { b.len().cmp( &a.len() ) } ); names.sort_by( |a, b| if a.len() == b.len() { a.cmp(&b) } else { b.len().cmp( &a.len() ) } );
let mut ctx: LadeCtx = LadeCtx{ let mut ctx: LadeCtx = LadeCtx{
strtable: Vec::with_capacity( 1024 ), strtable: Vec::new(),
idxtable: Vec::with_capacity( 128 ), idxtable: Vec::new(),
cmdtable: Vec::with_capacity( 2048 ), cmdtable: Vec::new(),
strcache: HashMap::new(), strcache: BTreeMap::new(),
}; };
add_string(&names, &mut ctx); add_string(&names, &mut ctx);
let mut entbytelist: Vec<(u32, u32, u32)> = Vec::with_capacity( enttable.len() ); let mut entbytelist: Vec<(u32, u32, u32)> = Vec::with_capacity( enttable.len() );
@ -269,6 +273,7 @@ fn lade( list: &Vec<Lock>, code: u32, order: &String ) -> (u32, Module) {
return (xv, bin); return (xv, bin);
} }
#[inline(always)]
fn align( n: usize, base: usize ) -> usize { fn align( n: usize, base: usize ) -> usize {
let ext = n % base; let ext = n % base;
if ext == 0 { n } else { n + base - ext } if ext == 0 { n } else { n + base - ext }
@ -311,9 +316,9 @@ fn pack( entlist: Vec<(u32,u32,u32)>, strlist: &Vec<u8>, idxlist: &Vec<u16>, cmd
return result; return result;
} }
fn incr( ch: char, ctx: &mut ParserCtx ) { fn incr( ch: &[u8], ctx: &mut ParserCtx ) {
match ch { match ch[0] {
'[' => { b'[' => {
match ctx.state { match ctx.state {
ParserState::WaitLock | ParserState::ReadHole | ParserState::NeedPart => { ParserState::WaitLock | ParserState::ReadHole | ParserState::NeedPart => {
ctx.state = ParserState::ReadLock; ctx.state = ParserState::ReadLock;
@ -331,7 +336,7 @@ fn incr( ch: char, ctx: &mut ParserCtx ) {
_ => error( ctx, format!("error occur [ on {:?}", ctx.state) ), _ => error( ctx, format!("error occur [ on {:?}", ctx.state) ),
} }
}, },
']' => { b']' => {
match ctx.state { match ctx.state {
ParserState::ReadLock => { ParserState::ReadLock => {
ctx.state = ParserState::WaitHole; ctx.state = ParserState::WaitHole;
@ -361,7 +366,7 @@ fn incr( ch: char, ctx: &mut ParserCtx ) {
_ => error( ctx, format!("error occur ] on {:?}", ctx.state) ), _ => error( ctx, format!("error occur ] on {:?}", ctx.state) ),
} }
}, },
'\r' | '\n' => { b'\r' | b'\n' => {
match ctx.state { match ctx.state {
ParserState::WaitHole => { ParserState::WaitHole => {
ctx.state = ParserState::ReadHole; ctx.state = ParserState::ReadHole;
@ -392,7 +397,7 @@ fn incr( ch: char, ctx: &mut ParserCtx ) {
error(ctx, format!("error occur newline on {:?}", ctx.state)); error(ctx, format!("error occur newline on {:?}", ctx.state));
}, },
ParserState::ReadHole => { ParserState::ReadHole => {
if ctx.token.len() > 0 && ctx.token.iter().any( |c| *c != ' ' && *c != '\t' ) { if ctx.token.len() > 0 && ctx.token.iter().any( |c| *c != b' ' && *c != b'\t' ) {
error(ctx, format!("error occur newline on {:?}", ctx.state)); error(ctx, format!("error occur newline on {:?}", ctx.state));
} }
else { else {
@ -404,18 +409,18 @@ fn incr( ch: char, ctx: &mut ParserCtx ) {
}, },
_ => {}, _ => {},
} }
if ch == '\n' { if ch[0] == b'\n' {
ctx.line += 1; ctx.line += 1;
} }
}, },
'=' => { b'=' => {
match ctx.state { match ctx.state {
ParserState::ReadHole => { ParserState::ReadHole => {
ctx.state = ParserState::ReadKey; ctx.state = ParserState::ReadKey;
ctx.currhole = Some( (token_str(ctx), vec![Vec::new()]) ); ctx.currhole = Some( (token_str(ctx), vec![Vec::new()]) );
}, },
ParserState::ReadLock => { ParserState::ReadLock => {
ctx.token.push( ch ); ctx.token.extend( ch );
}, },
ParserState::WaitPart => {}, ParserState::WaitPart => {},
ParserState::NeedPart => { ParserState::NeedPart => {
@ -424,7 +429,7 @@ fn incr( ch: char, ctx: &mut ParserCtx ) {
_ => error( ctx, format!("error occur = on {:?}", ctx.state) ), _ => error( ctx, format!("error occur = on {:?}", ctx.state) ),
} }
}, },
',' => { b',' => {
match ctx.state { match ctx.state {
ParserState::ReadKey => { ParserState::ReadKey => {
if ctx.token.len() > 0 { if ctx.token.len() > 0 {
@ -463,7 +468,7 @@ fn incr( ch: char, ctx: &mut ParserCtx ) {
ctx.state = ParserState::ReadKey; ctx.state = ParserState::ReadKey;
} }
ParserState::ReadLock => { ParserState::ReadLock => {
ctx.token.push( ch ); ctx.token.extend( ch );
}, },
ParserState::WaitPart => {}, ParserState::WaitPart => {},
ParserState::NeedPart => { ParserState::NeedPart => {
@ -472,7 +477,7 @@ fn incr( ch: char, ctx: &mut ParserCtx ) {
_ => error( ctx, format!("error occur , on {:?}", ctx.state) ), _ => error( ctx, format!("error occur , on {:?}", ctx.state) ),
} }
}, },
';' => { b';' => {
match ctx.state { match ctx.state {
ParserState::ReadKey => { ParserState::ReadKey => {
let token = token_str(ctx); let token = token_str(ctx);
@ -490,7 +495,7 @@ fn incr( ch: char, ctx: &mut ParserCtx ) {
); );
}, },
ParserState::ReadLock => { ParserState::ReadLock => {
ctx.token.push( ch ); ctx.token.extend( ch );
}, },
ParserState::ReadHole => { ParserState::ReadHole => {
let token = token_str(ctx); let token = token_str(ctx);
@ -519,10 +524,10 @@ fn incr( ch: char, ctx: &mut ParserCtx ) {
_ => error(ctx, format!("error occur ; on {:?}", ctx.state)), _ => error(ctx, format!("error occur ; on {:?}", ctx.state)),
} }
}, },
' ' => { b' ' => {
match ctx.state { match ctx.state {
ParserState::ReadLock | ParserState::ReadHole => { ParserState::ReadLock | ParserState::ReadHole => {
ctx.token.push( ch ); ctx.token.extend( ch );
}, },
ParserState::NeedPart => { ParserState::NeedPart => {
ctx.state = ParserState::WaitPart; ctx.state = ParserState::WaitPart;
@ -533,27 +538,27 @@ fn incr( ch: char, ctx: &mut ParserCtx ) {
_ => { _ => {
match ctx.state { match ctx.state {
ParserState::ReadLock | ParserState::ReadHole => { ParserState::ReadLock | ParserState::ReadHole => {
ctx.token.push( ch ); ctx.token.extend( ch );
}, },
ParserState::ReadKey => { ParserState::ReadKey => {
if String::from(HEX).contains(ch) == true { if HEX.contains(&ch[0]) == true {
ctx.token.push( ch ); ctx.token.extend( ch );
} }
else if let Some(hole) = &ctx.currhole { else if let Some(hole) = &ctx.currhole {
let (name, _key) = hole; let (name, _key) = hole;
if name == "text" { if name == "text" {
ctx.token.push( ch ); ctx.token.extend( ch );
} }
} }
else { else {
error(ctx, format!("error occur {} on {:?}", ch, ctx.state)); error(ctx, format!("error occur {:?} on {:?}", ch, ctx.state));
} }
}, },
ParserState::WaitPart => {}, ParserState::WaitPart => {},
ParserState::NeedPart => { ParserState::NeedPart => {
ctx.state = ParserState::WaitPart; ctx.state = ParserState::WaitPart;
}, },
_ => error(ctx, format!("error occur {} on {:?}", ch, ctx.state)), _ => error(ctx, format!("error occur {:?} on {:?}", ch, ctx.state)),
} }
}, },
} }
@ -566,7 +571,7 @@ fn done(ctx: &mut ParserCtx) {
} }
} }
fn parse( data: String, serial: &String, order: &String ) -> Vec< (u32, Module) > { fn parse( data: Vec<u8>, serial: &String, order: &String ) -> Vec< (u32, Module) > {
let mut ret: Vec<(u32, Module)> = Vec::new(); let mut ret: Vec<(u32, Module)> = Vec::new();
let mut context = ParserCtx { let mut context = ParserCtx {
state: ParserState::WaitLock, state: ParserState::WaitLock,
@ -589,7 +594,9 @@ fn parse( data: String, serial: &String, order: &String ) -> Vec< (u32, Module)
output: &mut ret, output: &mut ret,
}; };
for ch in data.chars() {
let iter = Utf8CharIter::new(&data);
for ch in iter {
incr( ch, &mut context ); incr( ch, &mut context );
} }
done( &mut context ); done( &mut context );
@ -623,37 +630,43 @@ fn transform<'a>( list: &'a Vec<(String, String)> ) -> Vec<(&'a String, Vec<(u32
let mut retval: Vec<(&String, Vec<(u32, Module)>)> = Vec::new(); let mut retval: Vec<(&String, Vec<(u32, Module)>)> = Vec::new();
for (order, serial) in list.iter() { for (order, serial) in list.iter() {
let file = format!("./gba/{}.u8", order); let file = format!("./gba/{}.u8", order);
let chtdata = fs::read_to_string(file).unwrap_or("".to_string()); let hasfile = fs::exists(&file).expect("error for exists");
if hasfile == false {
retval.push( (serial, vec![]) );
}
else {
let chtdata = fs::read(file).unwrap_or( vec![] );
if chtdata.len() > 0 { if chtdata.len() > 0 {
let cheats = parse( chtdata.to_string(), serial, order ); let cheats = parse( chtdata, serial, order );
retval.push( (serial, cheats) ); retval.push( (serial, cheats) );
} }
else { else {
// println!("bad cheat: {}", file); //println!("bad cheat");
retval.push( (serial, vec![]) ); retval.push( (serial, vec![]) );
} }
} }
}
return retval; return retval;
} }
fn format<'a>( mut cheats: Vec<(&'a String, Vec<(u32, Module)>)> ) -> Vec<u8> { fn format<'a>( mut cheats: Vec<(&'a String, Vec<(u32, Module)>)> ) -> Vec<u8> {
cheats.sort_by( |a, b| a.0.cmp(&b.0) ); cheats.sort_by( |a, b| a.0.cmp(&b.0) );
println!("valid rom has {}", cheats.len());
let (sers, offs, chtc, maxl, _) = { let (sers, offs, chtc, maxl) = {
let mut ret: (Vec<u8>, Vec<u16>, usize, usize, String) = (vec![], vec![], 0, 0, "".to_string()); let mut ret: (Vec<u8>, Vec<u16>, usize, usize) = (vec![], vec![], 0, 0);
let mut r4: &str = "";
let mut last: usize = 0; let mut last: usize = 0;
for game in cheats.iter() { for game in cheats.iter() {
let (serial, cheat) = game; let (serial, cheat) = game;
let val = serial.get(0..3).expect("not valid serial"); let val = serial.get(0..3).expect("not valid serial");
if ret.4.ne(val) { if r4.ne(val) {
if ret.1.len() > 0 && ret.2 - last > ret.3 { if ret.1.len() > 0 && ret.2 - last > ret.3 {
ret.3 = ret.2 - last; ret.3 = ret.2 - last;
} }
ret.0.extend( val.as_bytes() ); ret.0.extend( val.as_bytes() );
ret.1.push( ret.2.try_into().unwrap() ); ret.1.push( ret.2.try_into().unwrap() );
last = ret.2; last = ret.2;
ret.4 = val.to_string(); r4 = val;
} }
ret.2 = ret.2 + cheat.len(); ret.2 = ret.2 + cheat.len();
} }
@ -667,9 +680,10 @@ fn format<'a>( mut cheats: Vec<(&'a String, Vec<(u32, Module)>)> ) -> Vec<u8> {
}; };
let (cheats, expanded, _) = { let (cheats, expanded, _) = {
let mut ret: (Vec<u32>, Vec<u8>, usize) = (vec![], vec![], align(8 + sers.len() + offs.len()*2 + chtc * 8, 32)); let mut ret: (Vec<u32>, Vec<u8>, usize) = (vec![], vec![], align(8 + sers.len() + offs.len()*2 + chtc * 8, 32));
for game in cheats.iter() { for game in cheats.iter_mut() {
let (serial, cheat) = game; let (serial, cheat) = game;
let val = serial.chars().nth(3).expect("invalid serial"); let val = serial.chars().nth(3).expect("invalid serial");
cheat.sort_by( |a, b| a.0.cmp(&b.0) );
for (id, bin) in cheat.iter() { for (id, bin) in cheat.iter() {
let off: u32 = (ret.2 + ret.1.len()).try_into().unwrap(); let off: u32 = (ret.2 + ret.1.len()).try_into().unwrap();
ret.0.push( (val as u32) | (off << 3) ); ret.0.push( (val as u32) | (off << 3) );
@ -718,3 +732,54 @@ fn main() {
let content = format( roms ); let content = format( roms );
let _ = fs::write("gba.acl", content); let _ = fs::write("gba.acl", content);
} }
/*
// file: utf8view.rs
// author: ali-lingma
use std::iter::FusedIterator;
/// 定义一个迭代器结构体,用于从 Vec<u8> 中按 UTF-8 编码迭代每个 Unicode 字符
pub struct Utf8CharIter<'a> {
data: &'a [u8],
index: usize,
}
impl<'a> Utf8CharIter<'a> {
/// 创建一个新的迭代器
pub fn new(data: &'a [u8]) -> Self {
Utf8CharIter { data, index: 0 }
}
}
impl<'a> Iterator for Utf8CharIter<'a> {
type Item = &'a [u8];
fn next(&mut self) -> Option<Self::Item> {
if self.index >= self.data.len() {
return None;
}
let start = self.index;
let first_byte = self.data[self.index];
let end = if first_byte <= 0x7f {
start + 1
} else if first_byte <= 0xdf {
start + 2
} else if first_byte <= 0xef {
start + 3
} else {
start + 4
};
if end > self.data.len() {
return None;
}
let result = Some(&self.data[start..end]);
self.index = end;
result
}
}
impl<'a> FusedIterator for Utf8CharIter<'a> {}
*/