使用utf8_view进行遍历,x86已经追上了zig

This commit is contained in:
a92126 2024-11-29 18:01:07 +08:00
parent e17f8a4dd6
commit 91548c067e

View File

@ -1,8 +1,10 @@
mod utf8view;
use std::fs;
use serde_json::Value;
use byteorder::{LittleEndian, ByteOrder};
use std::collections::HashMap;
use std::collections::BTreeMap;
use utf8view::Utf8CharIter;
type Module = Vec<u8>;
type LockKey = Vec<String>;
@ -25,11 +27,11 @@ enum ParserState {
NeedPart,
}
const PASSHOLE: [&str; 2] = ["text", "off"];
const HEX: &str = "0123456789abcdefABCDEF";
const HEX: [u8; 22] = [b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b'a', b'A', b'b', b'B', b'c', b'C', b'd', b'D', b'e', b'E', b'f', b'F'];
struct ParserCtx<'a>{
state: ParserState,
token: Vec<char>,
token: Vec<u8>,
line: u32,
file: &'a String,
@ -44,10 +46,10 @@ struct ParserCtx<'a>{
fn token_str( ctx: &mut ParserCtx ) -> String {
if ctx.token.len() == 0 {
"".to_string()
String::from("")
}
else {
let retval = String::from_iter( &ctx.token );
let retval = String::from_utf8(ctx.token.clone()).unwrap();
ctx.token.clear();
retval
}
@ -67,11 +69,11 @@ enum EntData<'a> {
Cmds(&'a Vec<LockKey>),
}
struct LadeCtx {
struct LadeCtx<'a> {
strtable: Vec<u8>,
idxtable: Vec<u16>,
cmdtable: Vec<u32>,
strcache: HashMap<String, u16>,
strcache: BTreeMap<&'a String, u16>,
}
#[inline(always)]
@ -107,10 +109,10 @@ fn ut_push( t: &mut Vec<u8>, tr: &[u8] ) -> u16 {
off.try_into().unwrap()
}
fn add_string( list: &Vec<&String>, ctx: &mut LadeCtx ) -> Vec<u16> {
fn add_string<'a>( list: &Vec<&'a String>, ctx: &mut LadeCtx<'a> ) -> Vec<u16> {
let mut ret: Vec<u16> = Vec::with_capacity(list.len());
for tr in list.iter() {
let res = ctx.strcache.get(&tr.to_string());
let res = ctx.strcache.get(tr);
if let Some(pos) = res {
ret.push( *pos );
}
@ -118,7 +120,7 @@ fn add_string( list: &Vec<&String>, ctx: &mut LadeCtx ) -> Vec<u16> {
let code: &[u8] = tr.as_bytes();
let off: i16 = find_index(&ctx.strtable, code);
let res: u16 = if off < 0 { ut_push(&mut ctx.strtable, code) } else { off.try_into().unwrap() };
ctx.strcache.insert(tr.to_string(), res);
ctx.strcache.insert(tr, res);
ret.push( res );
}
}
@ -173,6 +175,7 @@ fn assemble_cheat( list: &Vec<LockKey>, dup: &mut BTreeMap<u32, u16>, hole: u16,
let mut ordered: Vec<(u32, u32)> = addrval.iter().map(|(&a,&b)| (a,b)).collect();
ordered.sort_by( |a, b| a.0.cmp(&b.0) );
let mut blocks: Vec<u32> = Vec::new();
let mut curr: (u32, u32, u32) = (0, 0, 0);
for (addr, value) in ordered.iter() {
@ -235,11 +238,12 @@ fn lade( list: &Vec<Lock>, code: u32, order: &String ) -> (u32, Module) {
names.sort_by( |a, b| if a.len() == b.len() { a.cmp(&b) } else { b.len().cmp( &a.len() ) } );
let mut ctx: LadeCtx = LadeCtx{
strtable: Vec::with_capacity( 1024 ),
idxtable: Vec::with_capacity( 128 ),
cmdtable: Vec::with_capacity( 2048 ),
strcache: HashMap::new(),
strtable: Vec::new(),
idxtable: Vec::new(),
cmdtable: Vec::new(),
strcache: BTreeMap::new(),
};
add_string(&names, &mut ctx);
let mut entbytelist: Vec<(u32, u32, u32)> = Vec::with_capacity( enttable.len() );
@ -269,6 +273,7 @@ fn lade( list: &Vec<Lock>, code: u32, order: &String ) -> (u32, Module) {
return (xv, bin);
}
#[inline(always)]
fn align( n: usize, base: usize ) -> usize {
let ext = n % base;
if ext == 0 { n } else { n + base - ext }
@ -311,9 +316,9 @@ fn pack( entlist: Vec<(u32,u32,u32)>, strlist: &Vec<u8>, idxlist: &Vec<u16>, cmd
return result;
}
fn incr( ch: char, ctx: &mut ParserCtx ) {
match ch {
'[' => {
fn incr( ch: &[u8], ctx: &mut ParserCtx ) {
match ch[0] {
b'[' => {
match ctx.state {
ParserState::WaitLock | ParserState::ReadHole | ParserState::NeedPart => {
ctx.state = ParserState::ReadLock;
@ -331,7 +336,7 @@ fn incr( ch: char, ctx: &mut ParserCtx ) {
_ => error( ctx, format!("error occur [ on {:?}", ctx.state) ),
}
},
']' => {
b']' => {
match ctx.state {
ParserState::ReadLock => {
ctx.state = ParserState::WaitHole;
@ -361,7 +366,7 @@ fn incr( ch: char, ctx: &mut ParserCtx ) {
_ => error( ctx, format!("error occur ] on {:?}", ctx.state) ),
}
},
'\r' | '\n' => {
b'\r' | b'\n' => {
match ctx.state {
ParserState::WaitHole => {
ctx.state = ParserState::ReadHole;
@ -392,7 +397,7 @@ fn incr( ch: char, ctx: &mut ParserCtx ) {
error(ctx, format!("error occur newline on {:?}", ctx.state));
},
ParserState::ReadHole => {
if ctx.token.len() > 0 && ctx.token.iter().any( |c| *c != ' ' && *c != '\t' ) {
if ctx.token.len() > 0 && ctx.token.iter().any( |c| *c != b' ' && *c != b'\t' ) {
error(ctx, format!("error occur newline on {:?}", ctx.state));
}
else {
@ -404,18 +409,18 @@ fn incr( ch: char, ctx: &mut ParserCtx ) {
},
_ => {},
}
if ch == '\n' {
if ch[0] == b'\n' {
ctx.line += 1;
}
},
'=' => {
b'=' => {
match ctx.state {
ParserState::ReadHole => {
ctx.state = ParserState::ReadKey;
ctx.currhole = Some( (token_str(ctx), vec![Vec::new()]) );
},
ParserState::ReadLock => {
ctx.token.push( ch );
ctx.token.extend( ch );
},
ParserState::WaitPart => {},
ParserState::NeedPart => {
@ -424,7 +429,7 @@ fn incr( ch: char, ctx: &mut ParserCtx ) {
_ => error( ctx, format!("error occur = on {:?}", ctx.state) ),
}
},
',' => {
b',' => {
match ctx.state {
ParserState::ReadKey => {
if ctx.token.len() > 0 {
@ -463,7 +468,7 @@ fn incr( ch: char, ctx: &mut ParserCtx ) {
ctx.state = ParserState::ReadKey;
}
ParserState::ReadLock => {
ctx.token.push( ch );
ctx.token.extend( ch );
},
ParserState::WaitPart => {},
ParserState::NeedPart => {
@ -472,7 +477,7 @@ fn incr( ch: char, ctx: &mut ParserCtx ) {
_ => error( ctx, format!("error occur , on {:?}", ctx.state) ),
}
},
';' => {
b';' => {
match ctx.state {
ParserState::ReadKey => {
let token = token_str(ctx);
@ -490,7 +495,7 @@ fn incr( ch: char, ctx: &mut ParserCtx ) {
);
},
ParserState::ReadLock => {
ctx.token.push( ch );
ctx.token.extend( ch );
},
ParserState::ReadHole => {
let token = token_str(ctx);
@ -519,10 +524,10 @@ fn incr( ch: char, ctx: &mut ParserCtx ) {
_ => error(ctx, format!("error occur ; on {:?}", ctx.state)),
}
},
' ' => {
b' ' => {
match ctx.state {
ParserState::ReadLock | ParserState::ReadHole => {
ctx.token.push( ch );
ctx.token.extend( ch );
},
ParserState::NeedPart => {
ctx.state = ParserState::WaitPart;
@ -533,27 +538,27 @@ fn incr( ch: char, ctx: &mut ParserCtx ) {
_ => {
match ctx.state {
ParserState::ReadLock | ParserState::ReadHole => {
ctx.token.push( ch );
ctx.token.extend( ch );
},
ParserState::ReadKey => {
if String::from(HEX).contains(ch) == true {
ctx.token.push( ch );
if HEX.contains(&ch[0]) == true {
ctx.token.extend( ch );
}
else if let Some(hole) = &ctx.currhole {
let (name, _key) = hole;
if name == "text" {
ctx.token.push( ch );
ctx.token.extend( ch );
}
}
else {
error(ctx, format!("error occur {} on {:?}", ch, ctx.state));
error(ctx, format!("error occur {:?} on {:?}", ch, ctx.state));
}
},
ParserState::WaitPart => {},
ParserState::NeedPart => {
ctx.state = ParserState::WaitPart;
},
_ => error(ctx, format!("error occur {} on {:?}", ch, ctx.state)),
_ => error(ctx, format!("error occur {:?} on {:?}", ch, ctx.state)),
}
},
}
@ -566,7 +571,7 @@ fn done(ctx: &mut ParserCtx) {
}
}
fn parse( data: String, serial: &String, order: &String ) -> Vec< (u32, Module) > {
fn parse( data: Vec<u8>, serial: &String, order: &String ) -> Vec< (u32, Module) > {
let mut ret: Vec<(u32, Module)> = Vec::new();
let mut context = ParserCtx {
state: ParserState::WaitLock,
@ -589,7 +594,9 @@ fn parse( data: String, serial: &String, order: &String ) -> Vec< (u32, Module)
output: &mut ret,
};
for ch in data.chars() {
let iter = Utf8CharIter::new(&data);
for ch in iter {
incr( ch, &mut context );
}
done( &mut context );
@ -623,37 +630,43 @@ fn transform<'a>( list: &'a Vec<(String, String)> ) -> Vec<(&'a String, Vec<(u32
let mut retval: Vec<(&String, Vec<(u32, Module)>)> = Vec::new();
for (order, serial) in list.iter() {
let file = format!("./gba/{}.u8", order);
let chtdata = fs::read_to_string(file).unwrap_or("".to_string());
let hasfile = fs::exists(&file).expect("error for exists");
if hasfile == false {
retval.push( (serial, vec![]) );
}
else {
let chtdata = fs::read(file).unwrap_or( vec![] );
if chtdata.len() > 0 {
let cheats = parse( chtdata.to_string(), serial, order );
let cheats = parse( chtdata, serial, order );
retval.push( (serial, cheats) );
}
else {
// println!("bad cheat: {}", file);
//println!("bad cheat");
retval.push( (serial, vec![]) );
}
}
}
return retval;
}
fn format<'a>( mut cheats: Vec<(&'a String, Vec<(u32, Module)>)> ) -> Vec<u8> {
cheats.sort_by( |a, b| a.0.cmp(&b.0) );
println!("valid rom has {}", cheats.len());
let (sers, offs, chtc, maxl, _) = {
let mut ret: (Vec<u8>, Vec<u16>, usize, usize, String) = (vec![], vec![], 0, 0, "".to_string());
let (sers, offs, chtc, maxl) = {
let mut ret: (Vec<u8>, Vec<u16>, usize, usize) = (vec![], vec![], 0, 0);
let mut r4: &str = "";
let mut last: usize = 0;
for game in cheats.iter() {
let (serial, cheat) = game;
let val = serial.get(0..3).expect("not valid serial");
if ret.4.ne(val) {
if r4.ne(val) {
if ret.1.len() > 0 && ret.2 - last > ret.3 {
ret.3 = ret.2 - last;
}
ret.0.extend( val.as_bytes() );
ret.1.push( ret.2.try_into().unwrap() );
last = ret.2;
ret.4 = val.to_string();
r4 = val;
}
ret.2 = ret.2 + cheat.len();
}
@ -667,9 +680,10 @@ fn format<'a>( mut cheats: Vec<(&'a String, Vec<(u32, Module)>)> ) -> Vec<u8> {
};
let (cheats, expanded, _) = {
let mut ret: (Vec<u32>, Vec<u8>, usize) = (vec![], vec![], align(8 + sers.len() + offs.len()*2 + chtc * 8, 32));
for game in cheats.iter() {
for game in cheats.iter_mut() {
let (serial, cheat) = game;
let val = serial.chars().nth(3).expect("invalid serial");
cheat.sort_by( |a, b| a.0.cmp(&b.0) );
for (id, bin) in cheat.iter() {
let off: u32 = (ret.2 + ret.1.len()).try_into().unwrap();
ret.0.push( (val as u32) | (off << 3) );
@ -718,3 +732,54 @@ fn main() {
let content = format( roms );
let _ = fs::write("gba.acl", content);
}
/*
// file: utf8view.rs
// author: ali-lingma
use std::iter::FusedIterator;
/// 定义一个迭代器结构体,用于从 Vec<u8> 中按 UTF-8 编码迭代每个 Unicode 字符
pub struct Utf8CharIter<'a> {
data: &'a [u8],
index: usize,
}
impl<'a> Utf8CharIter<'a> {
/// 创建一个新的迭代器
pub fn new(data: &'a [u8]) -> Self {
Utf8CharIter { data, index: 0 }
}
}
impl<'a> Iterator for Utf8CharIter<'a> {
type Item = &'a [u8];
fn next(&mut self) -> Option<Self::Item> {
if self.index >= self.data.len() {
return None;
}
let start = self.index;
let first_byte = self.data[self.index];
let end = if first_byte <= 0x7f {
start + 1
} else if first_byte <= 0xdf {
start + 2
} else if first_byte <= 0xef {
start + 3
} else {
start + 4
};
if end > self.data.len() {
return None;
}
let result = Some(&self.data[start..end]);
self.index = end;
result
}
}
impl<'a> FusedIterator for Utf8CharIter<'a> {}
*/