1use std::io::Read;
4
5use rakata_core::decode_text_strict;
6
7use super::{
8 binary, TwoDa, TwoDaBinaryError, TwoDaBinaryOptions, TwoDaRow, TWODA_MAGIC, TWODA_VERSION_V2B,
9};
10
11#[cfg_attr(
15 feature = "tracing",
16 tracing::instrument(level = "debug", skip(reader))
17)]
18pub fn read_twoda<R: Read>(reader: &mut R) -> Result<TwoDa, TwoDaBinaryError> {
19 read_twoda_with_options(reader, TwoDaBinaryOptions::default())
20}
21
22#[cfg_attr(
26 feature = "tracing",
27 tracing::instrument(level = "debug", skip(reader, options))
28)]
29pub fn read_twoda_with_options<R: Read>(
30 reader: &mut R,
31 options: TwoDaBinaryOptions,
32) -> Result<TwoDa, TwoDaBinaryError> {
33 let mut bytes = Vec::new();
34 reader.read_to_end(&mut bytes)?;
35 read_twoda_from_bytes_with_options(&bytes, options)
36}
37
38#[cfg_attr(
40 feature = "tracing",
41 tracing::instrument(level = "debug", skip(bytes), fields(bytes_len = bytes.len()))
42)]
43pub fn read_twoda_from_bytes(bytes: &[u8]) -> Result<TwoDa, TwoDaBinaryError> {
44 read_twoda_from_bytes_with_options(bytes, TwoDaBinaryOptions::default())
45}
46
47#[cfg_attr(
49 feature = "tracing",
50 tracing::instrument(level = "debug", skip(bytes, options), fields(bytes_len = bytes.len()))
51)]
52pub fn read_twoda_from_bytes_with_options(
53 bytes: &[u8],
54 options: TwoDaBinaryOptions,
55) -> Result<TwoDa, TwoDaBinaryError> {
56 if bytes.len() < 9 {
57 return Err(TwoDaBinaryError::InvalidHeader(
58 "file smaller than 2DA header".into(),
59 ));
60 }
61
62 let magic = binary::read_fourcc(bytes, 0)?;
63 if magic != TWODA_MAGIC {
64 return Err(TwoDaBinaryError::InvalidMagic(magic));
65 }
66
67 let version = binary::read_fourcc(bytes, 4)?;
68 if version != TWODA_VERSION_V2B {
69 return Err(TwoDaBinaryError::InvalidVersion(version));
70 }
71
72 if bytes[8] != b'\n' {
73 return Err(TwoDaBinaryError::InvalidHeader(
74 "missing newline after 2DA version".into(),
75 ));
76 }
77
78 let mut cursor = 9;
79 let headers_end = find_byte(bytes, cursor, b'\0')
80 .ok_or_else(|| TwoDaBinaryError::InvalidHeader("missing header terminator".into()))?;
81 let mut headers = split_tabbed(&bytes[cursor..headers_end])
82 .into_iter()
83 .enumerate()
84 .map(|(index, token)| {
85 decode_text_strict(token, options.text_encoding).map_err(|source| {
86 TwoDaBinaryError::TextDecoding {
87 context: format!("header[{index}]"),
88 source,
89 }
90 })
91 })
92 .collect::<Result<Vec<_>, _>>()?;
93 if headers.last().is_some_and(String::is_empty) {
94 headers.pop();
95 }
96 cursor = headers_end + 1;
97
98 let row_count = usize::try_from(binary::read_u32(bytes, cursor)?)
99 .map_err(|_| TwoDaBinaryError::InvalidHeader("row count does not fit usize".into()))?;
100 cursor = cursor
101 .checked_add(4)
102 .ok_or_else(|| TwoDaBinaryError::InvalidHeader("row count cursor overflow".into()))?;
103
104 let mut rows = Vec::with_capacity(row_count);
105 for row_index in 0..row_count {
106 let label_end = find_byte(bytes, cursor, b'\t').ok_or_else(|| {
107 TwoDaBinaryError::InvalidHeader(format!(
108 "missing tab terminator for row label {row_index}"
109 ))
110 })?;
111 let label = decode_text_strict(&bytes[cursor..label_end], options.text_encoding).map_err(
112 |source| TwoDaBinaryError::TextDecoding {
113 context: format!("row label[{row_index}]"),
114 source,
115 },
116 )?;
117 rows.push(TwoDaRow {
118 label,
119 cells: vec![String::new(); headers.len()],
120 });
121 cursor = label_end
122 .checked_add(1)
123 .ok_or_else(|| TwoDaBinaryError::InvalidHeader("row label cursor overflow".into()))?;
124 }
125
126 let cell_count = row_count
127 .checked_mul(headers.len())
128 .ok_or_else(|| TwoDaBinaryError::InvalidHeader("cell count overflow".into()))?;
129 let offsets_bytes = cell_count
130 .checked_mul(2)
131 .ok_or_else(|| TwoDaBinaryError::InvalidHeader("offset table size overflow".into()))?;
132 if cursor
133 .checked_add(offsets_bytes + 2)
134 .is_none_or(|end| end > bytes.len())
135 {
136 return Err(TwoDaBinaryError::InvalidHeader(
137 "offset table exceeds file length".into(),
138 ));
139 }
140
141 let mut cell_offsets = Vec::with_capacity(cell_count);
142 for _ in 0..cell_count {
143 cell_offsets.push(binary::read_u16(bytes, cursor)?);
144 cursor = cursor
145 .checked_add(2)
146 .ok_or_else(|| TwoDaBinaryError::InvalidHeader("offset cursor overflow".into()))?;
147 }
148
149 let cell_data_size = usize::from(binary::read_u16(bytes, cursor)?);
150 cursor = cursor
151 .checked_add(2)
152 .ok_or_else(|| TwoDaBinaryError::InvalidHeader("cell data cursor overflow".into()))?;
153 let cell_data_end = cursor
154 .checked_add(cell_data_size)
155 .ok_or_else(|| TwoDaBinaryError::InvalidHeader("cell data end overflow".into()))?;
156 if cell_data_end > bytes.len() {
157 return Err(TwoDaBinaryError::InvalidHeader(
158 "cell string table exceeds file length".into(),
159 ));
160 }
161 let cell_data = &bytes[cursor..cell_data_end];
162
163 if !headers.is_empty() {
164 for (cell_index, offset) in cell_offsets.iter().enumerate() {
165 let offset = usize::from(*offset);
166 if offset >= cell_data_size {
167 return Err(TwoDaBinaryError::InvalidHeader(format!(
168 "cell offset {offset} out of range for cell index {cell_index}"
169 )));
170 }
171
172 let value_end = find_byte(cell_data, offset, b'\0').ok_or_else(|| {
173 TwoDaBinaryError::InvalidHeader(format!(
174 "missing NUL terminator for cell index {cell_index}"
175 ))
176 })?;
177 let row_index = cell_index / headers.len();
178 let column_index = cell_index % headers.len();
179 let value = decode_text_strict(&cell_data[offset..value_end], options.text_encoding)
180 .map_err(|source| TwoDaBinaryError::TextDecoding {
181 context: format!("cell[{row_index}][{column_index}]"),
182 source,
183 })?;
184 rows[row_index].cells[column_index] = value;
185 }
186 }
187
188 Ok(TwoDa { headers, rows })
189}
190
191fn find_byte(bytes: &[u8], start: usize, needle: u8) -> Option<usize> {
192 bytes
193 .get(start..)?
194 .iter()
195 .position(|byte| *byte == needle)
196 .map(|relative| start + relative)
197}
198
199fn split_tabbed(bytes: &[u8]) -> Vec<&[u8]> {
200 if bytes.is_empty() {
201 return Vec::new();
202 }
203 let mut out = Vec::new();
204 let mut start = 0;
205 for (index, byte) in bytes.iter().enumerate() {
206 if *byte == b'\t' {
207 out.push(&bytes[start..index]);
208 start = index + 1;
209 }
210 }
211 out.push(&bytes[start..]);
212 out
213}
214
215#[cfg(test)]
216mod tests {
217 use super::*;
218 use crate::twoda::{
219 write_twoda_to_vec, write_twoda_to_vec_with_options, TwoDaBinaryOptions, TWODA_MAGIC,
220 TWODA_VERSION_V2B,
221 };
222 use rakata_core::TextEncoding;
223
224 #[test]
225 fn roundtrip_twoda_binary() {
226 let mut table = TwoDa::new(vec!["col3".into(), "col2".into(), "col1".into()]);
227 table
228 .push_row("10", vec!["ghi".into(), "def".into(), "abc".into()])
229 .expect("valid row");
230 table
231 .push_row("1", vec!["123".into(), "ghi".into(), "def".into()])
232 .expect("valid row");
233 table
234 .push_row("2", vec!["abc".into(), "".into(), "123".into()])
235 .expect("valid row");
236
237 let bytes = write_twoda_to_vec(&table).expect("write should succeed");
238 let parsed = read_twoda_from_bytes(&bytes).expect("read should succeed");
239
240 assert_eq!(parsed, table);
241 assert_eq!(parsed.cell(0, "col1"), Some("abc"));
242 assert_eq!(parsed.cell(2, "col2"), Some(""));
243 }
244
245 #[test]
246 fn writer_is_deterministic_for_synthetic_twoda() {
247 let mut table = TwoDa::new(vec!["col3".into(), "col2".into(), "col1".into()]);
248 table
249 .push_row("10", vec!["ghi".into(), "def".into(), "abc".into()])
250 .expect("valid row");
251 table
252 .push_row("1", vec!["123".into(), "ghi".into(), "def".into()])
253 .expect("valid row");
254 table
255 .push_row("2", vec!["abc".into(), "".into(), "123".into()])
256 .expect("valid row");
257
258 let first = write_twoda_to_vec(&table).expect("first write should succeed");
259 let second = write_twoda_to_vec(&table).expect("second write should succeed");
260 assert_eq!(first, second, "canonical 2DA writer output drifted");
261 }
262
263 #[test]
264 fn rejects_truncated_header() {
265 let bytes = vec![0_u8; 8];
266 let err = read_twoda_from_bytes(&bytes).expect_err("must fail");
267 assert!(matches!(err, TwoDaBinaryError::InvalidHeader(_)));
268 }
269
270 #[test]
271 fn rejects_invalid_magic() {
272 let mut bytes = vec![0_u8; 9];
273 bytes[0..4].copy_from_slice(b"BAD!");
274 bytes[4..8].copy_from_slice(&TWODA_VERSION_V2B);
275 bytes[8] = b'\n';
276
277 let err = read_twoda_from_bytes(&bytes).expect_err("must fail");
278 assert!(matches!(err, TwoDaBinaryError::InvalidMagic(_)));
279 }
280
281 #[test]
282 fn rejects_invalid_version() {
283 let mut bytes = vec![0_u8; 9];
284 bytes[0..4].copy_from_slice(&TWODA_MAGIC);
285 bytes[4..8].copy_from_slice(b"V2.0");
286 bytes[8] = b'\n';
287
288 let err = read_twoda_from_bytes(&bytes).expect_err("must fail");
289 assert!(matches!(err, TwoDaBinaryError::InvalidVersion(_)));
290 }
291
292 #[test]
293 fn writer_rejects_unencodable_text() {
294 let mut table = TwoDa::new(vec!["col".into()]);
295 table
296 .push_row("0", vec!["emoji \u{1f600}".into()])
297 .expect("valid table shape");
298
299 let err = write_twoda_to_vec(&table).expect_err("must fail");
300 match err {
301 TwoDaBinaryError::TextEncoding { context, source } => {
302 assert!(context.contains("cell[0][0]"));
303 assert_eq!(source.character, '\u{1f600}');
304 }
305 other => panic!("unexpected error variant: {other}"),
306 }
307 }
308
309 #[test]
310 fn writer_rejects_row_width_mismatch() {
311 let mut table = TwoDa::new(vec!["col1".into(), "col2".into()]);
312 table.rows.push(TwoDaRow {
313 label: "0".into(),
314 cells: vec!["only_one".into()],
315 });
316
317 let err = write_twoda_to_vec(&table).expect_err("must fail");
318 assert!(matches!(err, TwoDaBinaryError::InvalidTable(_)));
319 }
320
321 #[test]
322 fn writer_deduplicates_identical_cell_strings() {
323 let mut table = TwoDa::new(vec!["a".into(), "b".into()]);
324 table
325 .push_row("0", vec!["same".into(), "x".into()])
326 .expect("valid row");
327 table
328 .push_row("1", vec!["same".into(), "y".into()])
329 .expect("valid row");
330
331 let bytes = write_twoda_to_vec(&table).expect("write should succeed");
332 let offsets = extract_cell_offsets(&bytes).expect("must parse offsets");
333 assert_eq!(offsets.len(), 4);
334 assert_eq!(offsets[0], offsets[2]);
335 }
336
337 #[test]
338 fn configurable_encoding_supports_cp1250_data() {
339 let mut table = TwoDa::new(vec!["text".into()]);
340 table
341 .push_row("0", vec!["Za\u{017c}\u{00f3}\u{0142}\u{0107} g\u{0119}\u{015b}l\u{0105} ja\u{017a}\u{0144}".into()])
342 .expect("valid row");
343 let options = TwoDaBinaryOptions {
344 text_encoding: TextEncoding::Windows1250,
345 };
346
347 let bytes = write_twoda_to_vec_with_options(&table, options).expect("write should succeed");
348 let parsed =
349 read_twoda_from_bytes_with_options(&bytes, options).expect("read should succeed");
350 assert_eq!(
351 parsed.cell(0, "text"),
352 Some(
353 "Za\u{017c}\u{00f3}\u{0142}\u{0107} g\u{0119}\u{015b}l\u{0105} ja\u{017a}\u{0144}"
354 )
355 );
356 }
357
358 #[test]
359 fn default_encoding_rejects_cp1250_specific_characters() {
360 let mut table = TwoDa::new(vec!["text".into()]);
361 table
362 .push_row("0", vec!["Za\u{017c}\u{00f3}\u{0142}\u{0107} g\u{0119}\u{015b}l\u{0105} ja\u{017a}\u{0144}".into()])
363 .expect("valid row");
364
365 let err = write_twoda_to_vec(&table).expect_err("must fail");
366 assert!(matches!(err, TwoDaBinaryError::TextEncoding { .. }));
367 }
368
369 fn extract_cell_offsets(bytes: &[u8]) -> Result<Vec<u16>, TwoDaBinaryError> {
370 let mut cursor = 9;
371 let headers_end = find_byte(bytes, cursor, b'\0')
372 .ok_or_else(|| TwoDaBinaryError::InvalidHeader("missing header terminator".into()))?;
373 cursor = headers_end + 1;
374
375 let row_count = usize::try_from(binary::read_u32(bytes, cursor)?)
376 .map_err(|_| TwoDaBinaryError::InvalidHeader("row count conversion failed".into()))?;
377 cursor += 4;
378 for _ in 0..row_count {
379 let label_end = find_byte(bytes, cursor, b'\t')
380 .ok_or_else(|| TwoDaBinaryError::InvalidHeader("missing row label tab".into()))?;
381 cursor = label_end + 1;
382 }
383
384 let cell_count = row_count * 2;
386 let mut offsets = Vec::with_capacity(cell_count);
387 for _ in 0..cell_count {
388 offsets.push(binary::read_u16(bytes, cursor)?);
389 cursor += 2;
390 }
391 Ok(offsets)
392 }
393}