rakata_formats/
binary.rs

1//! Shared binary helpers and lightweight codec traits.
2//!
3//! This module is the low-level utility layer used across format modules for:
4//! - checked primitive reads (`u16`, `u32`, `u64`, `f32`, fourcc),
5//! - bounds validation for offset/size pairs,
6//! - narrow encode/decode traits used for composition.
7//!
8//! ## Layering Overview
9//! ```text
10//! format module (erf/rim/gff/tlk/twoda)
11//!   -> format-specific error mapping
12//!   -> binary::{read_*, write_*, check_*}
13//!   -> raw byte slice / writer
14//! ```
15
16use std::io::Write;
17use thiserror::Error;
18
19use rakata_core::{encode_text, EncodeTextError, TextEncoding};
20
21/// Shared binary layout/read error used by low-level format helpers.
22#[derive(Debug, Clone, PartialEq, Eq, Error)]
23pub enum BinaryLayoutError {
24    /// Integer/index arithmetic overflow while computing offsets or sizes.
25    #[error("{0} overflow")]
26    Overflow(&'static str),
27    /// Requested read exceeds available input bytes.
28    #[error("unexpected EOF while reading {0}")]
29    UnexpectedEof(&'static str),
30    /// Named region exceeds the allowed bounds.
31    #[error("{0} exceeds file bounds")]
32    BoundsExceeded(String),
33}
34
35/// Minimal decode trait for binary format value types.
36pub trait DecodeBinary: Sized {
37    /// Error type used by the format.
38    type Error;
39
40    /// Decodes a value from raw bytes.
41    fn decode_binary(bytes: &[u8]) -> Result<Self, Self::Error>;
42}
43
44/// Minimal encode trait for binary format value types.
45pub trait EncodeBinary {
46    /// Error type used by the format.
47    type Error;
48
49    /// Encodes a value into an owned byte buffer.
50    fn encode_binary(&self) -> Result<Vec<u8>, Self::Error>;
51}
52
53/// Validates that `[offset, offset + size)` is within `total_len`.
54pub fn check_range_in_bounds(
55    total_len: usize,
56    offset: usize,
57    size: usize,
58    label: &str,
59) -> Result<(), BinaryLayoutError> {
60    if offset.checked_add(size).is_none_or(|end| end > total_len) {
61        return Err(BinaryLayoutError::BoundsExceeded(label.to_string()));
62    }
63    Ok(())
64}
65
66/// Validates that `[offset, offset + size)` is within `bytes`.
67pub fn check_slice_in_bounds(
68    bytes: &[u8],
69    offset: usize,
70    size: usize,
71    label: &str,
72) -> Result<(), BinaryLayoutError> {
73    check_range_in_bounds(bytes.len(), offset, size, label)
74}
75
76/// Converts a 32-bit offset/count value to `usize` with overflow checking.
77pub fn checked_to_usize(value: u32, field: &'static str) -> Result<usize, BinaryLayoutError> {
78    usize::try_from(value).map_err(|_| BinaryLayoutError::Overflow(field))
79}
80
81/// Reads a 4-byte tag at `offset`.
82pub fn read_fourcc(bytes: &[u8], offset: usize) -> Result<[u8; 4], BinaryLayoutError> {
83    read_array::<4>(bytes, offset, "fourcc")
84}
85
86/// Validates that a parsed fourcc exactly matches `expected`.
87///
88/// Returns `Err(actual)` so callers can map mismatches into format-specific
89/// error variants without re-reading the source bytes.
90pub fn expect_fourcc(actual: [u8; 4], expected: [u8; 4]) -> Result<(), [u8; 4]> {
91    if actual == expected {
92        Ok(())
93    } else {
94        Err(actual)
95    }
96}
97
98/// Validates that a parsed fourcc matches one of `expected`.
99///
100/// Returns `Err(actual)` when no candidate matches.
101pub fn expect_any_fourcc(actual: [u8; 4], expected: &[[u8; 4]]) -> Result<(), [u8; 4]> {
102    if expected.contains(&actual) {
103        Ok(())
104    } else {
105        Err(actual)
106    }
107}
108
109/// Reads little-endian `u16` at `offset`.
110pub fn read_u16(bytes: &[u8], offset: usize) -> Result<u16, BinaryLayoutError> {
111    Ok(u16::from_le_bytes(read_array::<2>(bytes, offset, "u16")?))
112}
113
114/// Reads a single byte at `offset`.
115pub fn read_u8(bytes: &[u8], offset: usize) -> Result<u8, BinaryLayoutError> {
116    read_array::<1>(bytes, offset, "u8").map(|[b]| b)
117}
118
119/// Reads little-endian `u32` at `offset`.
120pub fn read_u32(bytes: &[u8], offset: usize) -> Result<u32, BinaryLayoutError> {
121    Ok(u32::from_le_bytes(read_array::<4>(bytes, offset, "u32")?))
122}
123
124/// Reads little-endian `u64` at `offset`.
125pub fn read_u64(bytes: &[u8], offset: usize) -> Result<u64, BinaryLayoutError> {
126    Ok(u64::from_le_bytes(read_array::<8>(bytes, offset, "u64")?))
127}
128
129/// Reads little-endian `i32` at `offset`.
130pub fn read_i32(bytes: &[u8], offset: usize) -> Result<i32, BinaryLayoutError> {
131    Ok(i32::from_le_bytes(read_array::<4>(bytes, offset, "i32")?))
132}
133
134/// Reads little-endian `f32` at `offset`.
135pub fn read_f32(bytes: &[u8], offset: usize) -> Result<f32, BinaryLayoutError> {
136    Ok(f32::from_le_bytes(read_array::<4>(bytes, offset, "f32")?))
137}
138
139/// Writes a single byte.
140pub fn write_u8<W: Write>(writer: &mut W, value: u8) -> std::io::Result<()> {
141    writer.write_all(&[value])
142}
143
144/// Writes little-endian `u16`.
145pub fn write_u16<W: Write>(writer: &mut W, value: u16) -> std::io::Result<()> {
146    writer.write_all(&value.to_le_bytes())
147}
148
149/// Writes little-endian `u32`.
150pub fn write_u32<W: Write>(writer: &mut W, value: u32) -> std::io::Result<()> {
151    writer.write_all(&value.to_le_bytes())
152}
153
154/// Writes little-endian `i32`.
155pub fn write_i32<W: Write>(writer: &mut W, value: i32) -> std::io::Result<()> {
156    writer.write_all(&value.to_le_bytes())
157}
158
159/// Writes little-endian `u64`.
160pub fn write_u64<W: Write>(writer: &mut W, value: u64) -> std::io::Result<()> {
161    writer.write_all(&value.to_le_bytes())
162}
163
164/// Writes little-endian `f32`.
165pub fn write_f32<W: Write>(writer: &mut W, value: f32) -> std::io::Result<()> {
166    writer.write_all(&value.to_le_bytes())
167}
168
169/// Writes a 4-byte tag.
170pub fn write_fourcc<W: Write>(writer: &mut W, tag: [u8; 4]) -> std::io::Result<()> {
171    writer.write_all(&tag)
172}
173
174/// Reads a null-terminated string from a fixed-size field in a byte buffer.
175///
176/// Scans up to `max_len` bytes starting at `offset` for the first null byte,
177/// then decodes the preceding bytes as Windows-1252 (the engine's native
178/// codepage). If no null byte is found, the entire `max_len` slice is decoded.
179///
180/// This is the standard binary format string primitive used across KotOR
181/// formats (model names, texture names, resource labels, etc.).
182pub fn read_fixed_c_string(bytes: &[u8], offset: usize, max_len: usize) -> String {
183    let end = (offset + max_len).min(bytes.len());
184    let slice = &bytes[offset..end];
185    let nul_pos = slice.iter().position(|&b| b == 0).unwrap_or(slice.len());
186    rakata_core::text::decode_text(&slice[..nul_pos], TextEncoding::Windows1252)
187}
188
189/// Reads a variable-length null-terminated string from a byte buffer.
190///
191/// Scans from `offset` to the first null byte (or end of buffer), then
192/// decodes the preceding bytes as Windows-1252.
193pub fn read_c_string(bytes: &[u8], offset: usize) -> String {
194    let end = bytes[offset..]
195        .iter()
196        .position(|&b| b == 0)
197        .unwrap_or(bytes.len() - offset);
198    rakata_core::text::decode_text(&bytes[offset..offset + end], TextEncoding::Windows1252)
199}
200
201/// Writes a null-terminated string into a fixed-size field, zero-padded.
202///
203/// Truncates `s` to `field_size - 1` bytes to guarantee a null terminator.
204/// The remaining bytes are filled with zeros.
205pub fn write_fixed_c_string<W: Write>(
206    writer: &mut W,
207    s: &str,
208    field_size: usize,
209) -> std::io::Result<()> {
210    let bytes = s.as_bytes();
211    let write_len = bytes.len().min(field_size.saturating_sub(1));
212    writer.write_all(&bytes[..write_len])?;
213    let pad = field_size - write_len;
214    for _ in 0..pad {
215        writer.write_all(&[0])?;
216    }
217    Ok(())
218}
219
220/// Encodes `text` as Windows-1252 and writes it to `writer`.
221///
222/// Format modules can pass a format-specific mapper for text-encoding failures,
223/// while I/O errors are converted via `From<std::io::Error>`.
224pub fn write_cp1252<W: Write, E, F>(
225    writer: &mut W,
226    text: &str,
227    context: String,
228    map_text_error: F,
229) -> Result<(), E>
230where
231    E: From<std::io::Error>,
232    F: FnOnce(String, EncodeTextError) -> E,
233{
234    let encoded = encode_text(text, TextEncoding::Windows1252)
235        .map_err(|source| map_text_error(context, source))?;
236    writer.write_all(&encoded).map_err(E::from)
237}
238
239/// Returns `true` when two resource keys match by type and ASCII
240/// case-insensitive resource name.
241pub fn matches_resource_key<T: Eq>(
242    entry_resref: &str,
243    entry_type: T,
244    query_resref: &str,
245    query_type: T,
246) -> bool {
247    entry_type == query_type && entry_resref.eq_ignore_ascii_case(query_resref)
248}
249
250fn read_array<const N: usize>(
251    bytes: &[u8],
252    offset: usize,
253    context: &'static str,
254) -> Result<[u8; N], BinaryLayoutError> {
255    let end = offset
256        .checked_add(N)
257        .ok_or(BinaryLayoutError::Overflow(context))?;
258    let raw = bytes
259        .get(offset..end)
260        .ok_or(BinaryLayoutError::UnexpectedEof(context))?;
261    let mut out = [0_u8; N];
262    out.copy_from_slice(raw);
263    Ok(out)
264}
265
266#[cfg(test)]
267mod tests {
268    use super::*;
269
270    #[test]
271    fn reads_primitives_from_little_endian_bytes() {
272        let bytes = [
273            b'R', b'I', b'M', b' ', // fourcc
274            0x34, 0x12, // u16
275            0x78, 0x56, 0x34, 0x12, // u32
276            0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, // u64
277            0x00, 0x00, 0x80, 0x3f, // f32 = 1.0
278        ];
279
280        assert_eq!(read_fourcc(&bytes, 0).expect("fourcc"), *b"RIM ");
281        assert_eq!(read_u16(&bytes, 4).expect("u16"), 0x1234);
282        assert_eq!(read_u32(&bytes, 6).expect("u32"), 0x1234_5678);
283        assert_eq!(read_u64(&bytes, 10).expect("u64"), 0x0102_0304_0506_0708);
284        assert_eq!(read_f32(&bytes, 18).expect("f32"), 1.0);
285    }
286
287    #[test]
288    fn validates_single_or_multiple_fourcc_values() {
289        assert_eq!(expect_fourcc(*b"RIM ", *b"RIM "), Ok(()));
290        assert_eq!(expect_fourcc(*b"RIM ", *b"GFF "), Err(*b"RIM "));
291
292        assert_eq!(expect_any_fourcc(*b"V1.1", &[*b"V1  ", *b"V1.1"]), Ok(()));
293        assert_eq!(
294            expect_any_fourcc(*b"V9.9", &[*b"V1  ", *b"V1.1"]),
295            Err(*b"V9.9")
296        );
297    }
298
299    #[test]
300    fn reports_eof_and_overflow_for_invalid_reads() {
301        let bytes = [0_u8; 4];
302
303        let eof = read_u32(&bytes, 2).expect_err("expected EOF");
304        assert!(matches!(eof, BinaryLayoutError::UnexpectedEof("u32")));
305
306        let overflow = read_u32(&bytes, usize::MAX).expect_err("expected overflow");
307        assert!(matches!(overflow, BinaryLayoutError::Overflow("u32")));
308    }
309
310    #[test]
311    fn validates_slice_bounds() {
312        let bytes = [0_u8; 16];
313
314        check_slice_in_bounds(&bytes, 4, 8, "table").expect("in bounds");
315
316        let err = check_slice_in_bounds(&bytes, 12, 8, "table").expect_err("must fail");
317        assert_eq!(err, BinaryLayoutError::BoundsExceeded("table".into()));
318    }
319
320    #[test]
321    fn converts_u32_to_usize_with_error() {
322        assert_eq!(checked_to_usize(42, "count").expect("convert"), 42usize);
323    }
324
325    #[test]
326    fn writes_little_endian_primitives() {
327        let mut out = Vec::new();
328        write_u8(&mut out, 0xAB).expect("write u8");
329        write_u16(&mut out, 0x1234).expect("write u16");
330        write_u32(&mut out, 0x1234_5678).expect("write u32");
331        write_u64(&mut out, 0x0102_0304_0506_0708).expect("write u64");
332        write_f32(&mut out, 1.0_f32).expect("write f32");
333        write_fourcc(&mut out, *b"RIM ").expect("write fourcc");
334        assert_eq!(
335            out,
336            vec![
337                0xAB, // u8
338                0x34, 0x12, // u16
339                0x78, 0x56, 0x34, 0x12, // u32
340                0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, // u64
341                0x00, 0x00, 0x80, 0x3f, // f32 = 1.0
342                b'R', b'I', b'M', b' ', // fourcc
343            ]
344        );
345    }
346
347    #[derive(Debug, PartialEq, Eq)]
348    enum Cp1252TestError {
349        Io,
350        Encode { context: String },
351    }
352
353    impl From<std::io::Error> for Cp1252TestError {
354        fn from(_value: std::io::Error) -> Self {
355            Self::Io
356        }
357    }
358
359    #[test]
360    fn writes_cp1252_text_with_context_mapping() {
361        let mut out = Vec::new();
362        write_cp1252(&mut out, "café", "payload".into(), |context, _source| {
363            Cp1252TestError::Encode { context }
364        })
365        .expect("cp1252 should encode");
366        assert_eq!(out, b"caf\xe9");
367    }
368
369    #[test]
370    fn reports_cp1252_encoding_failures_with_context() {
371        let mut out = Vec::new();
372        let err = write_cp1252(
373            &mut out,
374            "emoji \u{1f600}",
375            "payload".into(),
376            |context, _source| Cp1252TestError::Encode { context },
377        )
378        .expect_err("must fail");
379        assert_eq!(
380            err,
381            Cp1252TestError::Encode {
382                context: "payload".into()
383            }
384        );
385    }
386
387    #[test]
388    fn matches_resource_keys_case_insensitively() {
389        assert!(matches_resource_key(
390            "P_Bastila",
391            2014_u16,
392            "p_bastila",
393            2014_u16
394        ));
395        assert!(!matches_resource_key(
396            "P_Bastila",
397            2014_u16,
398            "p_bastila",
399            2015_u16
400        ));
401        assert!(!matches_resource_key(
402            "P_Bastila",
403            2014_u16,
404            "p_carth",
405            2014_u16
406        ));
407    }
408}