rakata_formats/twoda/
reader.rs

1//! 2DA binary reader.
2
3use std::io::Read;
4
5use rakata_core::decode_text_strict;
6
7use super::{
8    binary, TwoDa, TwoDaBinaryError, TwoDaBinaryOptions, TwoDaRow, TWODA_MAGIC, TWODA_VERSION_V2B,
9};
10
11/// Reads a binary 2DA from a reader.
12///
13/// The stream is consumed from its current position.
14#[cfg_attr(
15    feature = "tracing",
16    tracing::instrument(level = "debug", skip(reader))
17)]
18pub fn read_twoda<R: Read>(reader: &mut R) -> Result<TwoDa, TwoDaBinaryError> {
19    read_twoda_with_options(reader, TwoDaBinaryOptions::default())
20}
21
22/// Reads a binary 2DA from a reader with explicit text options.
23///
24/// The stream is consumed from its current position.
25#[cfg_attr(
26    feature = "tracing",
27    tracing::instrument(level = "debug", skip(reader, options))
28)]
29pub fn read_twoda_with_options<R: Read>(
30    reader: &mut R,
31    options: TwoDaBinaryOptions,
32) -> Result<TwoDa, TwoDaBinaryError> {
33    let mut bytes = Vec::new();
34    reader.read_to_end(&mut bytes)?;
35    read_twoda_from_bytes_with_options(&bytes, options)
36}
37
38/// Reads a binary 2DA (`2DA V2.b`) from in-memory bytes.
39#[cfg_attr(
40    feature = "tracing",
41    tracing::instrument(level = "debug", skip(bytes), fields(bytes_len = bytes.len()))
42)]
43pub fn read_twoda_from_bytes(bytes: &[u8]) -> Result<TwoDa, TwoDaBinaryError> {
44    read_twoda_from_bytes_with_options(bytes, TwoDaBinaryOptions::default())
45}
46
47/// Reads a binary 2DA (`2DA V2.b`) from in-memory bytes with explicit text options.
48#[cfg_attr(
49    feature = "tracing",
50    tracing::instrument(level = "debug", skip(bytes, options), fields(bytes_len = bytes.len()))
51)]
52pub fn read_twoda_from_bytes_with_options(
53    bytes: &[u8],
54    options: TwoDaBinaryOptions,
55) -> Result<TwoDa, TwoDaBinaryError> {
56    if bytes.len() < 9 {
57        return Err(TwoDaBinaryError::InvalidHeader(
58            "file smaller than 2DA header".into(),
59        ));
60    }
61
62    let magic = binary::read_fourcc(bytes, 0)?;
63    if magic != TWODA_MAGIC {
64        return Err(TwoDaBinaryError::InvalidMagic(magic));
65    }
66
67    let version = binary::read_fourcc(bytes, 4)?;
68    if version != TWODA_VERSION_V2B {
69        return Err(TwoDaBinaryError::InvalidVersion(version));
70    }
71
72    if bytes[8] != b'\n' {
73        return Err(TwoDaBinaryError::InvalidHeader(
74            "missing newline after 2DA version".into(),
75        ));
76    }
77
78    let mut cursor = 9;
79    let headers_end = find_byte(bytes, cursor, b'\0')
80        .ok_or_else(|| TwoDaBinaryError::InvalidHeader("missing header terminator".into()))?;
81    let mut headers = split_tabbed(&bytes[cursor..headers_end])
82        .into_iter()
83        .enumerate()
84        .map(|(index, token)| {
85            decode_text_strict(token, options.text_encoding).map_err(|source| {
86                TwoDaBinaryError::TextDecoding {
87                    context: format!("header[{index}]"),
88                    source,
89                }
90            })
91        })
92        .collect::<Result<Vec<_>, _>>()?;
93    if headers.last().is_some_and(String::is_empty) {
94        headers.pop();
95    }
96    cursor = headers_end + 1;
97
98    let row_count = usize::try_from(binary::read_u32(bytes, cursor)?)
99        .map_err(|_| TwoDaBinaryError::InvalidHeader("row count does not fit usize".into()))?;
100    cursor = cursor
101        .checked_add(4)
102        .ok_or_else(|| TwoDaBinaryError::InvalidHeader("row count cursor overflow".into()))?;
103
104    let mut rows = Vec::with_capacity(row_count);
105    for row_index in 0..row_count {
106        let label_end = find_byte(bytes, cursor, b'\t').ok_or_else(|| {
107            TwoDaBinaryError::InvalidHeader(format!(
108                "missing tab terminator for row label {row_index}"
109            ))
110        })?;
111        let label = decode_text_strict(&bytes[cursor..label_end], options.text_encoding).map_err(
112            |source| TwoDaBinaryError::TextDecoding {
113                context: format!("row label[{row_index}]"),
114                source,
115            },
116        )?;
117        rows.push(TwoDaRow {
118            label,
119            cells: vec![String::new(); headers.len()],
120        });
121        cursor = label_end
122            .checked_add(1)
123            .ok_or_else(|| TwoDaBinaryError::InvalidHeader("row label cursor overflow".into()))?;
124    }
125
126    let cell_count = row_count
127        .checked_mul(headers.len())
128        .ok_or_else(|| TwoDaBinaryError::InvalidHeader("cell count overflow".into()))?;
129    let offsets_bytes = cell_count
130        .checked_mul(2)
131        .ok_or_else(|| TwoDaBinaryError::InvalidHeader("offset table size overflow".into()))?;
132    if cursor
133        .checked_add(offsets_bytes + 2)
134        .is_none_or(|end| end > bytes.len())
135    {
136        return Err(TwoDaBinaryError::InvalidHeader(
137            "offset table exceeds file length".into(),
138        ));
139    }
140
141    let mut cell_offsets = Vec::with_capacity(cell_count);
142    for _ in 0..cell_count {
143        cell_offsets.push(binary::read_u16(bytes, cursor)?);
144        cursor = cursor
145            .checked_add(2)
146            .ok_or_else(|| TwoDaBinaryError::InvalidHeader("offset cursor overflow".into()))?;
147    }
148
149    let cell_data_size = usize::from(binary::read_u16(bytes, cursor)?);
150    cursor = cursor
151        .checked_add(2)
152        .ok_or_else(|| TwoDaBinaryError::InvalidHeader("cell data cursor overflow".into()))?;
153    let cell_data_end = cursor
154        .checked_add(cell_data_size)
155        .ok_or_else(|| TwoDaBinaryError::InvalidHeader("cell data end overflow".into()))?;
156    if cell_data_end > bytes.len() {
157        return Err(TwoDaBinaryError::InvalidHeader(
158            "cell string table exceeds file length".into(),
159        ));
160    }
161    let cell_data = &bytes[cursor..cell_data_end];
162
163    if !headers.is_empty() {
164        for (cell_index, offset) in cell_offsets.iter().enumerate() {
165            let offset = usize::from(*offset);
166            if offset >= cell_data_size {
167                return Err(TwoDaBinaryError::InvalidHeader(format!(
168                    "cell offset {offset} out of range for cell index {cell_index}"
169                )));
170            }
171
172            let value_end = find_byte(cell_data, offset, b'\0').ok_or_else(|| {
173                TwoDaBinaryError::InvalidHeader(format!(
174                    "missing NUL terminator for cell index {cell_index}"
175                ))
176            })?;
177            let row_index = cell_index / headers.len();
178            let column_index = cell_index % headers.len();
179            let value = decode_text_strict(&cell_data[offset..value_end], options.text_encoding)
180                .map_err(|source| TwoDaBinaryError::TextDecoding {
181                    context: format!("cell[{row_index}][{column_index}]"),
182                    source,
183                })?;
184            rows[row_index].cells[column_index] = value;
185        }
186    }
187
188    Ok(TwoDa { headers, rows })
189}
190
191fn find_byte(bytes: &[u8], start: usize, needle: u8) -> Option<usize> {
192    bytes
193        .get(start..)?
194        .iter()
195        .position(|byte| *byte == needle)
196        .map(|relative| start + relative)
197}
198
199fn split_tabbed(bytes: &[u8]) -> Vec<&[u8]> {
200    if bytes.is_empty() {
201        return Vec::new();
202    }
203    let mut out = Vec::new();
204    let mut start = 0;
205    for (index, byte) in bytes.iter().enumerate() {
206        if *byte == b'\t' {
207            out.push(&bytes[start..index]);
208            start = index + 1;
209        }
210    }
211    out.push(&bytes[start..]);
212    out
213}
214
215#[cfg(test)]
216mod tests {
217    use super::*;
218    use crate::twoda::{
219        write_twoda_to_vec, write_twoda_to_vec_with_options, TwoDaBinaryOptions, TWODA_MAGIC,
220        TWODA_VERSION_V2B,
221    };
222    use rakata_core::TextEncoding;
223
224    #[test]
225    fn roundtrip_twoda_binary() {
226        let mut table = TwoDa::new(vec!["col3".into(), "col2".into(), "col1".into()]);
227        table
228            .push_row("10", vec!["ghi".into(), "def".into(), "abc".into()])
229            .expect("valid row");
230        table
231            .push_row("1", vec!["123".into(), "ghi".into(), "def".into()])
232            .expect("valid row");
233        table
234            .push_row("2", vec!["abc".into(), "".into(), "123".into()])
235            .expect("valid row");
236
237        let bytes = write_twoda_to_vec(&table).expect("write should succeed");
238        let parsed = read_twoda_from_bytes(&bytes).expect("read should succeed");
239
240        assert_eq!(parsed, table);
241        assert_eq!(parsed.cell(0, "col1"), Some("abc"));
242        assert_eq!(parsed.cell(2, "col2"), Some(""));
243    }
244
245    #[test]
246    fn writer_is_deterministic_for_synthetic_twoda() {
247        let mut table = TwoDa::new(vec!["col3".into(), "col2".into(), "col1".into()]);
248        table
249            .push_row("10", vec!["ghi".into(), "def".into(), "abc".into()])
250            .expect("valid row");
251        table
252            .push_row("1", vec!["123".into(), "ghi".into(), "def".into()])
253            .expect("valid row");
254        table
255            .push_row("2", vec!["abc".into(), "".into(), "123".into()])
256            .expect("valid row");
257
258        let first = write_twoda_to_vec(&table).expect("first write should succeed");
259        let second = write_twoda_to_vec(&table).expect("second write should succeed");
260        assert_eq!(first, second, "canonical 2DA writer output drifted");
261    }
262
263    #[test]
264    fn rejects_truncated_header() {
265        let bytes = vec![0_u8; 8];
266        let err = read_twoda_from_bytes(&bytes).expect_err("must fail");
267        assert!(matches!(err, TwoDaBinaryError::InvalidHeader(_)));
268    }
269
270    #[test]
271    fn rejects_invalid_magic() {
272        let mut bytes = vec![0_u8; 9];
273        bytes[0..4].copy_from_slice(b"BAD!");
274        bytes[4..8].copy_from_slice(&TWODA_VERSION_V2B);
275        bytes[8] = b'\n';
276
277        let err = read_twoda_from_bytes(&bytes).expect_err("must fail");
278        assert!(matches!(err, TwoDaBinaryError::InvalidMagic(_)));
279    }
280
281    #[test]
282    fn rejects_invalid_version() {
283        let mut bytes = vec![0_u8; 9];
284        bytes[0..4].copy_from_slice(&TWODA_MAGIC);
285        bytes[4..8].copy_from_slice(b"V2.0");
286        bytes[8] = b'\n';
287
288        let err = read_twoda_from_bytes(&bytes).expect_err("must fail");
289        assert!(matches!(err, TwoDaBinaryError::InvalidVersion(_)));
290    }
291
292    #[test]
293    fn writer_rejects_unencodable_text() {
294        let mut table = TwoDa::new(vec!["col".into()]);
295        table
296            .push_row("0", vec!["emoji \u{1f600}".into()])
297            .expect("valid table shape");
298
299        let err = write_twoda_to_vec(&table).expect_err("must fail");
300        match err {
301            TwoDaBinaryError::TextEncoding { context, source } => {
302                assert!(context.contains("cell[0][0]"));
303                assert_eq!(source.character, '\u{1f600}');
304            }
305            other => panic!("unexpected error variant: {other}"),
306        }
307    }
308
309    #[test]
310    fn writer_rejects_row_width_mismatch() {
311        let mut table = TwoDa::new(vec!["col1".into(), "col2".into()]);
312        table.rows.push(TwoDaRow {
313            label: "0".into(),
314            cells: vec!["only_one".into()],
315        });
316
317        let err = write_twoda_to_vec(&table).expect_err("must fail");
318        assert!(matches!(err, TwoDaBinaryError::InvalidTable(_)));
319    }
320
321    #[test]
322    fn writer_deduplicates_identical_cell_strings() {
323        let mut table = TwoDa::new(vec!["a".into(), "b".into()]);
324        table
325            .push_row("0", vec!["same".into(), "x".into()])
326            .expect("valid row");
327        table
328            .push_row("1", vec!["same".into(), "y".into()])
329            .expect("valid row");
330
331        let bytes = write_twoda_to_vec(&table).expect("write should succeed");
332        let offsets = extract_cell_offsets(&bytes).expect("must parse offsets");
333        assert_eq!(offsets.len(), 4);
334        assert_eq!(offsets[0], offsets[2]);
335    }
336
337    #[test]
338    fn configurable_encoding_supports_cp1250_data() {
339        let mut table = TwoDa::new(vec!["text".into()]);
340        table
341            .push_row("0", vec!["Za\u{017c}\u{00f3}\u{0142}\u{0107} g\u{0119}\u{015b}l\u{0105} ja\u{017a}\u{0144}".into()])
342            .expect("valid row");
343        let options = TwoDaBinaryOptions {
344            text_encoding: TextEncoding::Windows1250,
345        };
346
347        let bytes = write_twoda_to_vec_with_options(&table, options).expect("write should succeed");
348        let parsed =
349            read_twoda_from_bytes_with_options(&bytes, options).expect("read should succeed");
350        assert_eq!(
351            parsed.cell(0, "text"),
352            Some(
353                "Za\u{017c}\u{00f3}\u{0142}\u{0107} g\u{0119}\u{015b}l\u{0105} ja\u{017a}\u{0144}"
354            )
355        );
356    }
357
358    #[test]
359    fn default_encoding_rejects_cp1250_specific_characters() {
360        let mut table = TwoDa::new(vec!["text".into()]);
361        table
362            .push_row("0", vec!["Za\u{017c}\u{00f3}\u{0142}\u{0107} g\u{0119}\u{015b}l\u{0105} ja\u{017a}\u{0144}".into()])
363            .expect("valid row");
364
365        let err = write_twoda_to_vec(&table).expect_err("must fail");
366        assert!(matches!(err, TwoDaBinaryError::TextEncoding { .. }));
367    }
368
369    fn extract_cell_offsets(bytes: &[u8]) -> Result<Vec<u16>, TwoDaBinaryError> {
370        let mut cursor = 9;
371        let headers_end = find_byte(bytes, cursor, b'\0')
372            .ok_or_else(|| TwoDaBinaryError::InvalidHeader("missing header terminator".into()))?;
373        cursor = headers_end + 1;
374
375        let row_count = usize::try_from(binary::read_u32(bytes, cursor)?)
376            .map_err(|_| TwoDaBinaryError::InvalidHeader("row count conversion failed".into()))?;
377        cursor += 4;
378        for _ in 0..row_count {
379            let label_end = find_byte(bytes, cursor, b'\t')
380                .ok_or_else(|| TwoDaBinaryError::InvalidHeader("missing row label tab".into()))?;
381            cursor = label_end + 1;
382        }
383
384        // Test fixture uses exactly 2 headers.
385        let cell_count = row_count * 2;
386        let mut offsets = Vec::with_capacity(cell_count);
387        for _ in 0..cell_count {
388            offsets.push(binary::read_u16(bytes, cursor)?);
389            cursor += 2;
390        }
391        Ok(offsets)
392    }
393}