Export parser::parse in xpat

This commit is contained in:
luk
2026-01-01 19:04:20 +00:00
parent 40372db656
commit 99f8f23b5d
6 changed files with 313 additions and 162 deletions

99
Cargo.lock generated Normal file
View File

@@ -0,0 +1,99 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 4
[[package]]
name = "libm"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de"
[[package]]
name = "proc-macro2"
version = "1.0.104"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9695f8df41bb4f3d222c95a67532365f569318332d03d5f3f67f37b20e6ebdf0"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.42"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f"
dependencies = [
"proc-macro2",
]
[[package]]
name = "sub_core"
version = "0.1.0"
[[package]]
name = "sub_libm"
version = "0.1.0"
dependencies = [
"libm",
]
[[package]]
name = "sub_macros"
version = "0.1.0"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "sub_pe"
version = "0.1.0"
dependencies = [
"sub_core",
]
[[package]]
name = "sub_winu"
version = "0.1.0"
dependencies = [
"sub_core",
"sub_pe",
]
[[package]]
name = "sub_xpat"
version = "0.1.0"
dependencies = [
"sub_core",
"sub_macros",
]
[[package]]
name = "syn"
version = "2.0.112"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "21f182278bf2d2bcb3c88b1b08a37df029d71ce3d3ae26168e3c653b213b99d4"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "unicode-ident"
version = "1.0.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5"
[[package]]
name = "x"
version = "0.1.0"
dependencies = [
"sub_core",
"sub_libm",
"sub_macros",
"sub_pe",
"sub_winu",
"sub_xpat",
]

View File

@@ -1,35 +1,27 @@
use crate::scannable::{ChunkIter, Scannable}; use crate::scannable::{ChunkIter, Scannable};
use core::ops::{Bound, Range, RangeBounds, RangeFull};
use core::fmt::{Display, Formatter}; use core::fmt::{Display, Formatter};
use core::ops::{Bound, Range, RangeBounds, RangeFull};
const SEP: &str = " | "; const SEP: &str = " | ";
pub struct HexDump<'s, T: Scannable + ?Sized, R: RangeBounds<usize>>(pub &'s T, pub R); pub struct HexDump<'s, T: Scannable + ?Sized, R: RangeBounds<usize>>(pub &'s T, pub R);
#[allow(clippy::needless_lifetimes)] #[allow(clippy::needless_lifetimes)]
pub fn hex< pub fn hex<'a, T: Scannable + ?Sized, R: RangeBounds<usize>>(
'a,
T: Scannable + ?Sized,
R: RangeBounds<usize>
>(
data: &'a T, data: &'a T,
range:R range: R,
) -> HexDump<'a, T, R> { ) -> HexDump<'a, T, R> {
HexDump(data, range) HexDump(data, range)
} }
impl<'s, T: Scannable> HexDump<'s, T, RangeFull> { impl<'s, T: Scannable> HexDump<'s, T, RangeFull> {
pub fn new(scannable: &'s T) -> Self { pub fn new(scannable: &'s T) -> Self {
Self(scannable, ..) Self(scannable, ..)
} }
} }
impl<'s, T: Scannable + ?Sized, R: RangeBounds<usize>> Display for HexDump<'s, T, R> { impl<'s, T: Scannable + ?Sized, R: RangeBounds<usize>> Display for HexDump<'s, T, R> {
fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
// calculate the end and the start addresses // calculate the end and the start addresses
let (start, end) = { let (start, end) = {
let r = self.0.range(); let r = self.0.range();
@@ -37,32 +29,39 @@ impl<'s, T: Scannable + ?Sized, R: RangeBounds<usize>> Display for HexDump<'s, T
Bound::Included(i) => *i, Bound::Included(i) => *i,
Bound::Excluded(i) => i.saturating_add(1), Bound::Excluded(i) => i.saturating_add(1),
Bound::Unbounded => 0, Bound::Unbounded => 0,
}.max(r.start); }
.max(r.start);
let end = match self.1.end_bound() { let end = match self.1.end_bound() {
Bound::Included(i) => *i, Bound::Included(i) => *i,
Bound::Excluded(i) => i.saturating_sub(1), Bound::Excluded(i) => i.saturating_sub(1),
Bound::Unbounded => usize::MAX, Bound::Unbounded => usize::MAX,
}.min(r.end); }
.min(r.end);
(start, end) (start, end)
}; };
// the number of digits the address column should have // the number of digits the address column should have
let digits = if end == 0 { 4 } else { let digits = if end == 0 {
4
} else {
(end.ilog(16) as usize + 1).max(4) (end.ilog(16) as usize + 1).max(4)
}; };
for (mut addr, chunk) in ChunkIter::new(self.0, start) { for (mut addr, chunk) in ChunkIter::new(self.0, start) {
for chunk in chunk.chunks(16) { for chunk in chunk.chunks(16) {
if addr > end { return Ok(()) } if addr > end {
return Ok(());
}
let chunk = &chunk[..(1 + end - addr).min(chunk.len())]; let chunk = &chunk[..(1 + end - addr).min(chunk.len())];
//╶───╴Column╶────────────────────────────────╴ //╶───╴Column╶────────────────────────────────╴
write!(f, "{:0digits$X}{SEP}", addr, digits = digits)?; write!(f, "{:0digits$X}{SEP}", addr, digits = digits)?;
//╶───╴Bytes╶─────────────────────────────────╴ //╶───╴Bytes╶─────────────────────────────────╴
for (i, byte) in chunk.iter().enumerate() { for (i, byte) in chunk.iter().enumerate() {
if i != 0 { write!(f, " ")?; } if i != 0 {
write!(f, " ")?;
}
write!(f, "{byte:02X}")?; write!(f, "{byte:02X}")?;
} }
for i in (chunk.len()..16) { for i in (chunk.len()..16) {

View File

@@ -1,12 +1,13 @@
#![no_std] #![allow(unused)] #![no_std]
#![allow(unused)]
pub mod atoms { pub mod atoms {
include!("atoms.rs"); include!("atoms.rs");
} }
pub mod hexdump;
pub mod scannable; pub mod scannable;
pub mod scanner; pub mod scanner;
pub mod hexdump;
#[cfg(feature = "alloc")] #[cfg(feature = "alloc")]
extern crate alloc; extern crate alloc;
@@ -19,19 +20,19 @@ pub mod parser;
// //
pub mod prelude { pub mod prelude {
pub use sub_macros::pattern;
pub use crate::atoms::Pattern; pub use crate::atoms::Pattern;
pub use crate::scanner::Scanner;
pub use crate::hexdump::hex; pub use crate::hexdump::hex;
pub use crate::scanner::Scanner;
pub use sub_macros::pattern;
} }
pub mod public { pub mod public {
pub use crate::atoms::Atom; pub use crate::atoms::Atom;
pub use crate::scannable::Scannable;
pub use crate::scanner::{
exec, scan_for_aob, make_aob
};
pub use crate::scannable::ChunkIter;
pub use crate::hexdump::HexDump; pub use crate::hexdump::HexDump;
} pub use crate::scannable::ChunkIter;
pub use crate::scannable::Scannable;
pub use crate::scanner::{exec, make_aob, scan_for_aob};
#[cfg(feature = "alloc")]
pub use crate::parser::parse;
}

View File

@@ -1,6 +1,6 @@
use core::{cmp, fmt, mem, str};
use super::atoms::Atom; use super::atoms::Atom;
use alloc::vec::Vec; use alloc::vec::Vec;
use core::{cmp, fmt, mem, str};
/// Special skip value to indicate to use platform pointer size instead. /// Special skip value to indicate to use platform pointer size instead.
pub(crate) const PTR_SKIP: u8 = 0; pub(crate) const PTR_SKIP: u8 = 0;
@@ -14,7 +14,12 @@ pub struct ParsePatError {
impl fmt::Display for ParsePatError { impl fmt::Display for ParsePatError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "Syntax Error @{}: {}.", self.position, self.kind.to_str()) write!(
f,
"Syntax Error @{}: {}.",
self.position,
self.kind.to_str()
)
} }
} }
@@ -34,7 +39,7 @@ pub enum PatError {
ReadOperand, ReadOperand,
SubPattern, SubPattern,
SubOverflow, SubOverflow,
DoubleNibble DoubleNibble,
} }
impl PatError { impl PatError {
pub fn to_str(self) -> &'static str { pub fn to_str(self) -> &'static str {
@@ -163,7 +168,7 @@ pub fn parse(pat: &str) -> Result<Vec<Atom>, ParsePatError> {
Err(kind) => { Err(kind) => {
let position = pat_end.as_ptr() as usize - pat.as_ptr() as usize; let position = pat_end.as_ptr() as usize - pat.as_ptr() as usize;
Err(ParsePatError { kind, position }) Err(ParsePatError { kind, position })
}, }
} }
} }
// This is preferable but currently limited by macro rules... // This is preferable but currently limited by macro rules...
@@ -201,7 +206,7 @@ fn parse_helper(pat: &mut &str, result: &mut Vec<Atom>) -> Result<(), PatError>
_ => return Err(PatError::StackInvalid), _ => return Err(PatError::StackInvalid),
}; };
result.push(atom); result.push(atom);
}, }
// End recursive operator // End recursive operator
b'}' => { b'}' => {
// Unbalanced recursion // Unbalanced recursion
@@ -210,7 +215,7 @@ fn parse_helper(pat: &mut &str, result: &mut Vec<Atom>) -> Result<(), PatError>
} }
depth -= 1; depth -= 1;
result.push(Atom::Pop); result.push(Atom::Pop);
}, }
// Start subpattern // Start subpattern
b'(' => { b'(' => {
subs.push(SubPattern::default()); subs.push(SubPattern::default());
@@ -221,7 +226,7 @@ fn parse_helper(pat: &mut &str, result: &mut Vec<Atom>) -> Result<(), PatError>
// Add a new case, update the case offset later // Add a new case, update the case offset later
sub.case = result.len(); sub.case = result.len();
result.push(Atom::Case(0)); result.push(Atom::Case(0));
}, }
// Case subpattern // Case subpattern
b'|' => { b'|' => {
// Should already have started a subpattern // Should already have started a subpattern
@@ -241,7 +246,7 @@ fn parse_helper(pat: &mut &str, result: &mut Vec<Atom>) -> Result<(), PatError>
result[sub.case] = Atom::Case(case_offset as u8); result[sub.case] = Atom::Case(case_offset as u8);
sub.case = result.len(); sub.case = result.len();
result.push(Atom::Case(0)); result.push(Atom::Case(0));
}, }
// End subpattern // End subpattern
b')' => { b')' => {
// Should already have started a subpattern // Should already have started a subpattern
@@ -259,7 +264,7 @@ fn parse_helper(pat: &mut &str, result: &mut Vec<Atom>) -> Result<(), PatError>
} }
result[brk] = Atom::Break(brk_offset as u8); result[brk] = Atom::Break(brk_offset as u8);
} }
}, }
// Skip many operator // Skip many operator
b'[' => { b'[' => {
// Parse the lower bound // Parse the lower bound
@@ -275,7 +280,7 @@ fn parse_helper(pat: &mut &str, result: &mut Vec<Atom>) -> Result<(), PatError>
if lower_bound >= 16384 { if lower_bound >= 16384 {
return Err(PatError::ManyOverflow); return Err(PatError::ManyOverflow);
} }
}, }
_ => return Err(PatError::ManyInvalid), _ => return Err(PatError::ManyInvalid),
} }
} }
@@ -304,7 +309,7 @@ fn parse_helper(pat: &mut &str, result: &mut Vec<Atom>) -> Result<(), PatError>
if upper_bound >= 16384 { if upper_bound >= 16384 {
return Err(PatError::ManyOverflow); return Err(PatError::ManyOverflow);
} }
}, }
_ => return Err(PatError::ManyInvalid), _ => return Err(PatError::ManyInvalid),
} }
} }
@@ -315,51 +320,63 @@ fn parse_helper(pat: &mut &str, result: &mut Vec<Atom>) -> Result<(), PatError>
result.push(Atom::Rangext((many_skip >> 8) as u8)); result.push(Atom::Rangext((many_skip >> 8) as u8));
} }
result.push(Atom::Many((many_skip & 0xff) as u8)); result.push(Atom::Many((many_skip & 0xff) as u8));
} } else {
else {
return Err(PatError::ManyRange); return Err(PatError::ManyRange);
} }
}, }
// Match a byte // Match a byte
b'0'..=b'9' | b'A'..=b'F' | b'a'..=b'f' | b'.' => { b'0'..=b'9' | b'A'..=b'F' | b'a'..=b'f' | b'.' => {
let mut mask = 0xFF; let mut mask = 0xFF;
// High nibble of the byte // High nibble of the byte
let hi = if chr == b'.' { mask &= 0x0F;0 } let hi = if chr == b'.' {
else if chr >= b'a' { chr - b'a' + 10 } mask &= 0x0F;
else if chr >= b'A' { chr - b'A' + 10 } 0
else { chr - b'0' }; } else if chr >= b'a' {
chr - b'a' + 10
} else if chr >= b'A' {
chr - b'A' + 10
} else {
chr - b'0'
};
chr = iter.next().cloned().ok_or(PatError::UnpairedHexDigit)?; chr = iter.next().cloned().ok_or(PatError::UnpairedHexDigit)?;
// Low nibble of the byte // Low nibble of the byte
let lo = if chr >= b'a' && chr <= b'f' { chr - b'a' + 10 } let lo = if chr >= b'a' && chr <= b'f' {
else if chr >= b'A' && chr <= b'F' { chr - b'A' + 10 } chr - b'a' + 10
else if chr >= b'0' && chr <= b'9' { chr - b'0' } } else if chr >= b'A' && chr <= b'F' {
else if chr == b'.' { mask &= 0xF0; 0 } chr - b'A' + 10
else { return Err(PatError::UnpairedHexDigit); }; } else if chr >= b'0' && chr <= b'9' {
chr - b'0'
} else if chr == b'.' {
mask &= 0xF0;
0
} else {
return Err(PatError::UnpairedHexDigit);
};
if mask == 0 { return Err(PatError::DoubleNibble); }; if mask == 0 {
return Err(PatError::DoubleNibble);
};
// mask out nibble // mask out nibble
if mask != 0xFF { result.push(Atom::Fuzzy(mask)) } if mask != 0xFF {
result.push(Atom::Fuzzy(mask))
}
// Add byte to the pattern // Add byte to the pattern
result.push(Atom::Byte((hi << 4) + lo)); result.push(Atom::Byte((hi << 4) + lo));
}, }
// Match raw bytes // Match raw bytes
b'"' => { b'"' => loop {
loop { if let Some(chr) = iter.next().cloned() {
if let Some(chr) = iter.next().cloned() { if chr != b'"' {
if chr != b'"' { result.push(Atom::Byte(chr));
result.push(Atom::Byte(chr)); } else {
} break;
else {
break;
}
}
else {
return Err(PatError::UnclosedQuote);
} }
} else {
return Err(PatError::UnclosedQuote);
} }
}, },
// Save the cursor // Save the cursor
@@ -370,7 +387,7 @@ fn parse_helper(pat: &mut &str, result: &mut Vec<Atom>) -> Result<(), PatError>
} }
result.push(Atom::Save(save)); result.push(Atom::Save(save));
save += 1; save += 1;
}, }
// Skip bytes // Skip bytes
b'?' => { b'?' => {
// match result.last_mut() { // match result.last_mut() {
@@ -385,26 +402,26 @@ fn parse_helper(pat: &mut &str, result: &mut Vec<Atom>) -> Result<(), PatError>
} }
} }
result.push(Atom::Skip(1)); result.push(Atom::Skip(1));
}, }
b'=' => { b'=' => {
let op = iter.next().cloned().ok_or(PatError::CheckOperand)?; let op = iter.next().cloned().ok_or(PatError::CheckOperand)?;
result.push( match op { result.push(match op {
b'0'..=b'9' => Atom::Check(op - b'0'), b'0'..=b'9' => Atom::Check(op - b'0'),
b'A'..=b'Z' => Atom::Check(10 + (op - b'A')), b'A'..=b'Z' => Atom::Check(10 + (op - b'A')),
b'a'..=b'z' => Atom::Check(10 + (op - b'a')), b'a'..=b'z' => Atom::Check(10 + (op - b'a')),
_ => return Err(PatError::CheckOperand) _ => return Err(PatError::CheckOperand),
}); });
}, }
b'@' => { b'@' => {
let op = iter.next().cloned().ok_or(PatError::AlignedOperand)?; let op = iter.next().cloned().ok_or(PatError::AlignedOperand)?;
result.push( match op { result.push(match op {
b'0'..=b'9' => Atom::Aligned(op - b'0'), b'0'..=b'9' => Atom::Aligned(op - b'0'),
b'A'..=b'Z' => Atom::Aligned(10 + (op - b'A')), b'A'..=b'Z' => Atom::Aligned(10 + (op - b'A')),
b'a'..=b'z' => Atom::Aligned(10 + (op - b'a')), b'a'..=b'z' => Atom::Aligned(10 + (op - b'a')),
_ => return Err(PatError::AlignedOperand) _ => return Err(PatError::AlignedOperand),
}); });
}, }
b'i' => { b'i' => {
let atom = match iter.next().cloned() { let atom = match iter.next().cloned() {
Some(b'1') => Atom::ReadI8(save), Some(b'1') => Atom::ReadI8(save),
@@ -417,7 +434,7 @@ fn parse_helper(pat: &mut &str, result: &mut Vec<Atom>) -> Result<(), PatError>
} }
save += 1; save += 1;
result.push(atom); result.push(atom);
}, }
b'u' => { b'u' => {
let atom = match iter.next().cloned() { let atom = match iter.next().cloned() {
Some(b'1') => Atom::ReadU8(save), Some(b'1') => Atom::ReadU8(save),
@@ -430,22 +447,21 @@ fn parse_helper(pat: &mut &str, result: &mut Vec<Atom>) -> Result<(), PatError>
} }
save += 1; save += 1;
result.push(atom); result.push(atom);
}, }
b'z' => { b'z' => {
if save >= u8::MAX { if save >= u8::MAX {
return Err(PatError::SaveOverflow); return Err(PatError::SaveOverflow);
} }
result.push(Atom::Zero(save)); result.push(Atom::Zero(save));
save += 1; save += 1;
}, }
// Allow spaces as padding // Allow spaces as padding
b' ' | b'\n' | b'\r' | b'\t' => {}, b' ' | b'\n' | b'\r' | b'\t' => {}
// Everything else is illegal // Everything else is illegal
_ => { _ => {
return Err(PatError::UnknownChar); return Err(PatError::UnknownChar);
}, }
} }
// Converted from str originally, should be safe // Converted from str originally, should be safe
*pat = unsafe { str::from_utf8_unchecked(iter.as_slice()) }; *pat = unsafe { str::from_utf8_unchecked(iter.as_slice()) };
@@ -462,10 +478,7 @@ fn parse_helper(pat: &mut &str, result: &mut Vec<Atom>) -> Result<(), PatError>
// Remove redundant atoms at the end // Remove redundant atoms at the end
fn is_redundant(atom: &Atom) -> bool { fn is_redundant(atom: &Atom) -> bool {
match atom { match atom {
| Atom::Skip(_) Atom::Skip(_) | Atom::Rangext(_) | Atom::Pop | Atom::Many(_) => true,
| Atom::Rangext(_)
| Atom::Pop
| Atom::Many(_) => true,
_ => false, _ => false,
} }
} }
@@ -474,4 +487,4 @@ fn parse_helper(pat: &mut &str, result: &mut Vec<Atom>) -> Result<(), PatError>
} }
Ok(()) Ok(())
} }

View File

@@ -9,33 +9,40 @@ pub trait Scannable {
/// given an address will return the next chunk, None if there are no more hcunks /// given an address will return the next chunk, None if there are no more hcunks
fn next_chunk(&self, address: usize) -> Option<(usize, &[u8])>; fn next_chunk(&self, address: usize) -> Option<(usize, &[u8])>;
} }
impl Scannable for [u8] { impl Scannable for [u8] {
fn range(&self) -> Range<usize> { 0..self.len() } fn range(&self) -> Range<usize> {
0..self.len()
}
fn chunk_at(&self, address: usize) -> Option<&[u8]> { fn chunk_at(&self, address: usize) -> Option<&[u8]> {
self.get(address..) self.get(address..)
} }
fn next_chunk(&self, _address: usize) -> Option<(usize, &[u8])> { None } fn next_chunk(&self, _address: usize) -> Option<(usize, &[u8])> {
None
}
} }
/// In case you want to scan with a specific address /// In case you want to scan with a specific address
impl Scannable for (usize, &[u8]) { impl Scannable for (usize, &[u8]) {
fn range(&self) -> Range<usize> { self.0..(self.0 + self.1.len()) } fn range(&self) -> Range<usize> {
self.0..(self.0 + self.1.len())
}
fn chunk_at(&self, address: usize) -> Option<&[u8]> { fn chunk_at(&self, address: usize) -> Option<&[u8]> {
match address.overflowing_sub(self.0) { match address.overflowing_sub(self.0) {
(address, false) => self.1.get(address..), (address, false) => self.1.get(address..),
(_, true) => None, (_, true) => None,
} }
} }
fn next_chunk(&self, _address: usize) -> Option<(usize, &[u8])> { None } fn next_chunk(&self, _address: usize) -> Option<(usize, &[u8])> {
None
}
} }
pub struct ChunkIter<'l, T: Scannable + ?Sized>(&'l T, usize, bool); pub struct ChunkIter<'l, T: Scannable + ?Sized>(&'l T, usize, bool);
impl<'l, T: Scannable + ?Sized> ChunkIter<'l, T> { impl<'l, T: Scannable + ?Sized> ChunkIter<'l, T> {
pub const fn new(scannable : &'l T, start: usize) -> Self { pub const fn new(scannable: &'l T, start: usize) -> Self {
Self(scannable, start, true) Self(scannable, start, true)
} }
} }
@@ -43,15 +50,14 @@ impl<'l, T: Scannable + ?Sized> ChunkIter<'l, T> {
impl<'l, T: Scannable + ?Sized> Iterator for ChunkIter<'l, T> { impl<'l, T: Scannable + ?Sized> Iterator for ChunkIter<'l, T> {
type Item = (usize, &'l [u8]); type Item = (usize, &'l [u8]);
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
// if this is the first time being called, use chunk_at instead of next_chunk // if this is the first time being called, use chunk_at instead of next_chunk
if self.2 { if self.2 {
self.2 = false; self.2 = false;
if let Some(chunk) = self.0.chunk_at(self.1) { if let Some(chunk) = self.0.chunk_at(self.1) {
return Some((self.1, chunk)) return Some((self.1, chunk));
} }
} }
match self.0.next_chunk(self.1) { match self.0.next_chunk(self.1) {
None => None, None => None,
Some((addr, block)) => { Some((addr, block)) => {
@@ -59,6 +65,5 @@ impl<'l, T: Scannable + ?Sized> Iterator for ChunkIter<'l, T> {
Some((addr, block)) Some((addr, block))
} }
} }
} }
} }

View File

@@ -1,7 +1,7 @@
use core::ops::{Range, RangeBounds, Bound}; use crate::atoms::{Atom, Pattern};
use crate::atoms::{Pattern, Atom};
use crate::scannable::Scannable; use crate::scannable::Scannable;
use sub_core::{pod::Pod}; use core::ops::{Bound, Range, RangeBounds};
use sub_core::pod::Pod;
const SKIP_VA: u32 = size_of::<usize>() as u32; const SKIP_VA: u32 = size_of::<usize>() as u32;
@@ -20,18 +20,22 @@ pub struct Scanner<'a, S: Scannable + ?Sized> {
} }
impl<'a, S: Scannable + ?Sized> Scanner<'a, S> { impl<'a, S: Scannable + ?Sized> Scanner<'a, S> {
pub fn new(bin: &'a S, pat: Pattern<'a>, r: impl RangeBounds<usize>) -> Self { pub fn new(bin: &'a S, pat: Pattern<'a>, r: impl RangeBounds<usize>) -> Self {
let range = limit_range(bin, r); let range = limit_range(bin, r);
let cursor = range.start; let cursor = range.start;
Self { bin, pat, range, cursor } Self {
bin,
pat,
range,
cursor,
}
} }
pub fn get<const LEN: usize>(&mut self) -> Option<[usize;LEN]> { pub fn get<const LEN: usize>(&mut self) -> Option<[usize; LEN]> {
let mut m = [0usize;LEN]; let mut m = [0usize; LEN];
self.next(&mut m).then_some(m) self.next(&mut m).then_some(m)
} }
pub fn next(&mut self, saves: &mut [usize]) -> bool { pub fn next(&mut self, saves: &mut [usize]) -> bool {
let mut aob = <[u8; 0x10] as Pod>::uninit(); let mut aob = <[u8; 0x10] as Pod>::uninit();
let aob = make_aob(self.pat, &mut aob); let aob = make_aob(self.pat, &mut aob);
@@ -51,7 +55,15 @@ impl<'a, S: Scannable + ?Sized> Scanner<'a, S> {
while self.range.contains(&self.cursor) { while self.range.contains(&self.cursor) {
let current_cursor = self.cursor; let current_cursor = self.cursor;
self.cursor += 1; self.cursor += 1;
if exec(self.bin, current_cursor, self.pat, saves, self.range.clone()).is_some() { if exec(
self.bin,
current_cursor,
self.pat,
saves,
self.range.clone(),
)
.is_some()
{
return true; return true;
} }
} }
@@ -69,7 +81,6 @@ pub fn exec<Binary: Scannable + ?Sized>(
saves: &mut [usize], saves: &mut [usize],
range: Range<usize>, range: Range<usize>,
) -> Option<usize> { ) -> Option<usize> {
let mut cursor = address; let mut cursor = address;
let mut pc = 0; let mut pc = 0;
@@ -89,11 +100,14 @@ pub fn exec<Binary: Scannable + ?Sized>(
while let Some(atom) = pattern.get(pc).cloned() { while let Some(atom) = pattern.get(pc).cloned() {
pc += 1; pc += 1;
match atom { match atom {
// Compare bytes // Compare bytes
Atom::Byte(pat_byte) => { Atom::Byte(pat_byte) => {
let Some(byte) = read::<_, u8>(bin, cursor) else { return None; }; let Some(byte) = read::<_, u8>(bin, cursor) else {
if byte & mask != pat_byte & mask { return None; } return None;
};
if byte & mask != pat_byte & mask {
return None;
}
cursor += 1; cursor += 1;
mask = 0xFF; mask = 0xFF;
} }
@@ -111,7 +125,7 @@ pub fn exec<Binary: Scannable + ?Sized>(
// start running the pattern from pc... // start running the pattern from pc...
exec(bin, cursor, &pattern[pc..], saves, range.clone())?; exec(bin, cursor, &pattern[pc..], saves, range.clone())?;
cursor = cursor.wrapping_add(skip as usize); cursor = cursor.wrapping_add(skip as usize);
mask = 0xff; mask = 0xff;
ext_range = 0; ext_range = 0;
@@ -124,7 +138,7 @@ pub fn exec<Binary: Scannable + ?Sized>(
Some(Atom::Push(_)) => counter += 1, Some(Atom::Push(_)) => counter += 1,
Some(Atom::Pop) => counter -= 1, Some(Atom::Pop) => counter -= 1,
None => return Some(cursor), None => return Some(cursor),
_ => (/**/) _ => (),
} }
pc += 1; pc += 1;
} }
@@ -162,29 +176,39 @@ pub fn exec<Binary: Scannable + ?Sized>(
} }
Atom::Jump1 => { Atom::Jump1 => {
let Some(sbyte) = read::<_, i8>(bin, cursor) else { return None }; let Some(sbyte) = read::<_, i8>(bin, cursor) else {
return None;
};
cursor = cursor.wrapping_add(sbyte as usize).wrapping_add(1); cursor = cursor.wrapping_add(sbyte as usize).wrapping_add(1);
} }
Atom::Jump4 => { Atom::Jump4 => {
let Some(sdword) = read::<_, i32>(bin, cursor) else { return None }; let Some(sdword) = read::<_, i32>(bin, cursor) else {
return None;
};
cursor = cursor.wrapping_add(sdword as usize).wrapping_add(4); cursor = cursor.wrapping_add(sdword as usize).wrapping_add(4);
} }
Atom::Ptr => { Atom::Ptr => {
let Some(sptr) = read::<_, usize>(bin, cursor) else { return None }; let Some(sptr) = read::<_, usize>(bin, cursor) else {
return None;
};
cursor = sptr; cursor = sptr;
} }
Atom::Pir(slot) => { Atom::Pir(slot) => {
let Some(sdword) = read::<_, i32>(bin, cursor) else { return None }; let Some(sdword) = read::<_, i32>(bin, cursor) else {
return None;
};
let base = saves.get(slot as usize).cloned().unwrap_or(cursor); let base = saves.get(slot as usize).cloned().unwrap_or(cursor);
cursor = base.wrapping_add(sdword as usize); cursor = base.wrapping_add(sdword as usize);
} }
Atom::Check(slot) => { Atom::Check(slot) => {
if let Some(&rva) = saves.get(slot as usize) { if let Some(&rva) = saves.get(slot as usize) {
if rva != cursor { return None; } if rva != cursor {
return None;
}
} }
} }
@@ -195,33 +219,57 @@ pub fn exec<Binary: Scannable + ?Sized>(
} }
Atom::ReadU8(slot) => { Atom::ReadU8(slot) => {
let Some(value) = read::<_, u8>(bin, cursor) else { return None }; let Some(value) = read::<_, u8>(bin, cursor) else {
if let Some(slot) = saves.get_mut(slot as usize) { *slot = value as _ } return None;
};
if let Some(slot) = saves.get_mut(slot as usize) {
*slot = value as _
}
cursor = cursor.wrapping_add(1) cursor = cursor.wrapping_add(1)
} }
Atom::ReadI8(slot) => { Atom::ReadI8(slot) => {
let Some(value) = read::<_, i8>(bin, cursor) else { return None }; let Some(value) = read::<_, i8>(bin, cursor) else {
if let Some(slot) = saves.get_mut(slot as usize) { *slot = value as _ } return None;
};
if let Some(slot) = saves.get_mut(slot as usize) {
*slot = value as _
}
cursor = cursor.wrapping_add(1) cursor = cursor.wrapping_add(1)
} }
Atom::ReadU16(slot) => { Atom::ReadU16(slot) => {
let Some(value) = read::<_, u16>(bin, cursor) else { return None }; let Some(value) = read::<_, u16>(bin, cursor) else {
if let Some(slot) = saves.get_mut(slot as usize) { *slot = value as _ } return None;
};
if let Some(slot) = saves.get_mut(slot as usize) {
*slot = value as _
}
cursor = cursor.wrapping_add(2) cursor = cursor.wrapping_add(2)
} }
Atom::ReadI16(slot) => { Atom::ReadI16(slot) => {
let Some(value) = read::<_, i16>(bin, cursor) else { return None }; let Some(value) = read::<_, i16>(bin, cursor) else {
if let Some(slot) = saves.get_mut(slot as usize) { *slot = value as _ } return None;
};
if let Some(slot) = saves.get_mut(slot as usize) {
*slot = value as _
}
cursor = cursor.wrapping_add(2) cursor = cursor.wrapping_add(2)
} }
Atom::ReadU32(slot) => { Atom::ReadU32(slot) => {
let Some(value) = read::<_, u32>(bin, cursor) else { return None }; let Some(value) = read::<_, u32>(bin, cursor) else {
if let Some(slot) = saves.get_mut(slot as usize) { *slot = value as _ } return None;
};
if let Some(slot) = saves.get_mut(slot as usize) {
*slot = value as _
}
cursor = cursor.wrapping_add(4) cursor = cursor.wrapping_add(4)
} }
Atom::ReadI32(slot) => { Atom::ReadI32(slot) => {
let Some(value) = read::<_, i32>(bin, cursor) else { return None }; let Some(value) = read::<_, i32>(bin, cursor) else {
if let Some(slot) = saves.get_mut(slot as usize) { *slot = value as _ } return None;
};
if let Some(slot) = saves.get_mut(slot as usize) {
*slot = value as _
}
cursor = cursor.wrapping_add(4) cursor = cursor.wrapping_add(4)
} }
Atom::Zero(slot) => { Atom::Zero(slot) => {
@@ -232,7 +280,7 @@ pub fn exec<Binary: Scannable + ?Sized>(
Atom::Case(next) => { Atom::Case(next) => {
if let Some(nc) = exec(bin, cursor, &pattern[pc..], saves, range.clone()) { if let Some(nc) = exec(bin, cursor, &pattern[pc..], saves, range.clone()) {
cursor = nc; cursor = nc;
// same as Push/Pop except we add the next from the break to the pc. // same as Push/Pop except we add the next from the break to the pc.
let mut counter = 1; let mut counter = 1;
loop { loop {
@@ -246,12 +294,11 @@ pub fn exec<Binary: Scannable + ?Sized>(
} }
} }
None => return Some(cursor), None => return Some(cursor),
_ => (/**/) _ => (),
} }
pc += 1; pc += 1;
} }
// panic!("{pc}, {:X?} == {:02X}", pattern.get(pc), bin.chunk_at(cursor).unwrap([1])); // panic!("{pc}, {:X?} == {:02X}", pattern.get(pc), bin.chunk_at(cursor).unwrap([1]));
} else { } else {
// if the case fails go to the location defined by next // if the case fails go to the location defined by next
pc += next as usize; pc += next as usize;
@@ -266,9 +313,8 @@ pub fn exec<Binary: Scannable + ?Sized>(
Some(cursor) Some(cursor)
} }
#[inline(always)] #[inline(always)]
pub fn exec_many<Binary: Scannable + ?Sized >( pub fn exec_many<Binary: Scannable + ?Sized>(
bin: &Binary, bin: &Binary,
address: usize, address: usize,
pattern: Pattern, pattern: Pattern,
@@ -309,7 +355,6 @@ pub fn exec_many<Binary: Scannable + ?Sized >(
} }
} }
#[inline(always)] #[inline(always)]
pub fn scan_for_aob<Binary: Scannable + ?Sized>( pub fn scan_for_aob<Binary: Scannable + ?Sized>(
bin: &Binary, bin: &Binary,
@@ -319,16 +364,13 @@ pub fn scan_for_aob<Binary: Scannable + ?Sized>(
let mut address = range.start; let mut address = range.start;
let upper_bounds = range.end; let upper_bounds = range.end;
while address < upper_bounds { while address < upper_bounds {
// get the current chunk for the given address // get the current chunk for the given address
let chunk = match bin.chunk_at(address) { let chunk = match bin.chunk_at(address) {
Some(chunk) => chunk, Some(chunk) => chunk,
// the address is out of bounds, try to shift the address so its back in bounds // the address is out of bounds, try to shift the address so its back in bounds
None => match bin.next_chunk(address) { None => match bin.next_chunk(address) {
// the next chunk is in bounds so we will just correct the address and use that chunk instead // the next chunk is in bounds so we will just correct the address and use that chunk instead
Some((naddr, nchunk)) if naddr < upper_bounds => { Some((naddr, nchunk)) if naddr < upper_bounds => {
address = naddr; address = naddr;
@@ -337,33 +379,36 @@ pub fn scan_for_aob<Binary: Scannable + ?Sized>(
// no hope, give up // no hope, give up
_ => return None, _ => return None,
} },
}; };
// try to find the aob in the current chunk // try to find the aob in the current chunk
if let Some(offset) = chunk.windows(aob.len()) if let Some(offset) = chunk
.take(upper_bounds.saturating_sub(address)).position(|c| c == aob) { .windows(aob.len())
.take(upper_bounds.saturating_sub(address))
.position(|c| c == aob)
{
// we got a hit, return it // we got a hit, return it
return Some(address + offset) return Some(address + offset);
} }
// the AOB was not found in the current chunk, now check if its contiguous between chunks: // the AOB was not found in the current chunk, now check if its contiguous between chunks:
if let Some((naddr, nchunk)) = bin.next_chunk(address) { if let Some((naddr, nchunk)) = bin.next_chunk(address) {
// next chunk is out of bounds, give up // next chunk is out of bounds, give up
if naddr - aob.len() > upper_bounds { return None } if naddr - aob.len() > upper_bounds {
return None;
}
// if chunks are contiguous and the aob is greater than one byte, // if chunks are contiguous and the aob is greater than one byte,
// check if the aob is on a chunk border // check if the aob is on a chunk border
if address + chunk.len() == naddr && aob.len() > 1 { if address + chunk.len() == naddr && aob.len() > 1 {
// check if the aob is between two chunks :) // check if the aob is between two chunks :)
for i in 1..aob.len()-1 { for i in 1..aob.len() - 1 {
let (p1, p2) = aob.split_at(i); let (p1, p2) = aob.split_at(i);
if chunk.ends_with(p1) && nchunk.starts_with(p2) { if chunk.ends_with(p1) && nchunk.starts_with(p2) {
// aob was found between two chunks // aob was found between two chunks
// return this address // return this address
return Some(address + chunk.len() - i) return Some(address + chunk.len() - i);
} }
} }
} }
@@ -373,17 +418,18 @@ pub fn scan_for_aob<Binary: Scannable + ?Sized>(
// TODO When i originally wrote this I put a subtraction // TODO When i originally wrote this I put a subtraction
// I don't remember why but I think it should be an addition // I don't remember why but I think it should be an addition
let naddr = naddr + aob.len(); let naddr = naddr + aob.len();
debug_assert!(naddr > address, "debug assertion failed: {naddr:X} > {address:X}"); debug_assert!(
naddr > address,
"debug assertion failed: {naddr:X} > {address:X}"
);
address = naddr; address = naddr;
} else { } else {
return None return None;
} }
} }
None None
} }
/// Limits a selected range into the range of the binary... /// Limits a selected range into the range of the binary...
fn limit_range<Binary: Scannable + ?Sized>( fn limit_range<Binary: Scannable + ?Sized>(
bin: &Binary, bin: &Binary,
@@ -398,7 +444,7 @@ fn limit_range<Binary: Scannable + ?Sized>(
let end = match range.end_bound() { let end = match range.end_bound() {
Bound::Included(v) => bin_range.end.min(v.saturating_add(1)), Bound::Included(v) => bin_range.end.min(v.saturating_add(1)),
Bound::Excluded(v) => bin_range.end.min(*v), Bound::Excluded(v) => bin_range.end.min(*v),
Bound::Unbounded => bin_range.end Bound::Unbounded => bin_range.end,
}; };
start..end start..end
} }
@@ -422,15 +468,3 @@ pub fn make_aob<'b>(pattern: &[Atom], buffer: &'b mut [u8]) -> &'b [u8] {
} }
&buffer[..i] &buffer[..i]
} }