rakata_formats/twoda/
mod.rs

1//! 2DA `V2.b` binary reader and writer.
2//!
3//! 2DA is a compact table format used for gameplay metadata. The binary form
4//! stores column headers, row labels, a cell-offset matrix, and a shared string
5//! table.
6//!
7//! ## Format Layout
8//! ```text
9//! +------------------------------+
10//! | "2DA " + "V2.b" + '\n'       |
11//! +------------------------------+
12//! | Header row text              |
13//! | tab-separated, NUL-terminated|
14//! +------------------------------+
15//! | Row count (u32)              |
16//! +------------------------------+
17//! | Row labels                   |
18//! | each terminated by '\t'      |
19//! +------------------------------+
20//! | Cell offset table            |
21//! | u16 * (rows * columns)       |
22//! +------------------------------+
23//! | String table blob            |
24//! | NUL-terminated cell strings  |
25//! +------------------------------+
26//! ```
27//!
28//! Writer output is deterministic: repeated cell strings are deduplicated in
29//! the string table while preserving row and column order.
30
31mod reader;
32mod writer;
33
34pub use reader::{
35    read_twoda, read_twoda_from_bytes, read_twoda_from_bytes_with_options, read_twoda_with_options,
36};
37pub use writer::{
38    write_twoda, write_twoda_to_vec, write_twoda_to_vec_with_options, write_twoda_with_options,
39};
40
41use std::io::Read;
42use thiserror::Error;
43
44#[cfg(feature = "serde")]
45use serde::{Deserialize, Serialize};
46
47use rakata_core::{DecodeTextError, EncodeTextError, TextEncoding};
48
49use crate::binary::{self, DecodeBinary, EncodeBinary};
50
51/// 2DA file type marker.
52const TWODA_MAGIC: [u8; 4] = *b"2DA ";
53/// Binary 2DA format version used by KotOR.
54const TWODA_VERSION_V2B: [u8; 4] = *b"V2.b";
55
56/// Encoding options for binary 2DA reader/writer operations.
57#[derive(Debug, Clone, Copy, PartialEq, Eq)]
58pub struct TwoDaBinaryOptions {
59    /// Text encoding used for headers, row labels, and cell values.
60    pub text_encoding: TextEncoding,
61}
62
63impl Default for TwoDaBinaryOptions {
64    fn default() -> Self {
65        Self {
66            text_encoding: TextEncoding::Windows1252,
67        }
68    }
69}
70
71/// In-memory representation of a binary 2DA table.
72///
73/// The structure keeps header order and row order stable so a write->read
74/// roundtrip remains deterministic.
75#[derive(Debug, Clone, PartialEq, Eq)]
76#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
77pub struct TwoDa {
78    /// Ordered column headers.
79    pub headers: Vec<String>,
80    /// Ordered table rows.
81    pub rows: Vec<TwoDaRow>,
82}
83
84impl TwoDa {
85    /// Creates an empty table with no headers and no rows.
86    pub fn empty() -> Self {
87        Self {
88            headers: Vec::new(),
89            rows: Vec::new(),
90        }
91    }
92
93    /// Creates an empty table with the provided headers.
94    pub fn new(headers: Vec<String>) -> Self {
95        Self {
96            headers,
97            rows: Vec::new(),
98        }
99    }
100
101    /// Appends a row, validating that cell count matches header count.
102    pub fn push_row(
103        &mut self,
104        label: impl Into<String>,
105        cells: Vec<String>,
106    ) -> Result<(), TwoDaBinaryError> {
107        if cells.len() != self.headers.len() {
108            return Err(TwoDaBinaryError::InvalidTable(format!(
109                "row width {} does not match header width {}",
110                cells.len(),
111                self.headers.len()
112            )));
113        }
114        self.rows.push(TwoDaRow {
115            label: label.into(),
116            cells,
117        });
118        Ok(())
119    }
120
121    /// Returns the number of rows.
122    pub fn row_count(&self) -> usize {
123        self.rows.len()
124    }
125
126    /// Returns the number of columns.
127    pub fn column_count(&self) -> usize {
128        self.headers.len()
129    }
130
131    /// Returns a cell by row index and column header name.
132    pub fn cell(&self, row_index: usize, column_name: &str) -> Option<&str> {
133        let column_index = self
134            .headers
135            .iter()
136            .position(|header| header == column_name)?;
137        self.rows
138            .get(row_index)
139            .and_then(|row| row.cells.get(column_index))
140            .map(String::as_str)
141    }
142}
143
144impl DecodeBinary for TwoDa {
145    type Error = TwoDaBinaryError;
146
147    fn decode_binary(bytes: &[u8]) -> Result<Self, Self::Error> {
148        read_twoda_from_bytes(bytes)
149    }
150}
151
152impl EncodeBinary for TwoDa {
153    type Error = TwoDaBinaryError;
154
155    fn encode_binary(&self) -> Result<Vec<u8>, Self::Error> {
156        write_twoda_to_vec(self)
157    }
158}
159
160/// One row in a 2DA table.
161#[derive(Debug, Clone, PartialEq, Eq)]
162#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
163pub struct TwoDaRow {
164    /// Row label written in the row-label table.
165    pub label: String,
166    /// Ordered cell values aligned with [`TwoDa::headers`].
167    pub cells: Vec<String>,
168}
169
170/// Errors produced while parsing or serializing binary 2DA data.
171#[derive(Debug, Error)]
172pub enum TwoDaBinaryError {
173    /// I/O read/write failure.
174    #[error(transparent)]
175    Io(#[from] std::io::Error),
176    /// Header magic does not match `2DA `.
177    #[error("invalid 2DA magic: {0:?}")]
178    InvalidMagic([u8; 4]),
179    /// Header version is unsupported.
180    #[error("invalid 2DA version: {0:?}")]
181    InvalidVersion([u8; 4]),
182    /// Header/body layout is invalid or truncated.
183    #[error("invalid 2DA header: {0}")]
184    InvalidHeader(String),
185    /// In-memory table content is structurally invalid for writing.
186    #[error("invalid 2DA table: {0}")]
187    InvalidTable(String),
188    /// Value cannot fit the target on-disk integer width.
189    #[error("value overflow while writing field `{0}`")]
190    ValueOverflow(&'static str),
191    /// Text cannot be represented losslessly as the configured target encoding.
192    #[error("2DA text encoding failed for {context}: {source}")]
193    TextEncoding {
194        /// Context of the value being encoded.
195        context: String,
196        /// Source encoding error with exact character location.
197        #[source]
198        source: EncodeTextError,
199    },
200    /// Text bytes cannot be decoded losslessly using the configured encoding.
201    #[error("2DA text decoding failed for {context}: {source}")]
202    TextDecoding {
203        /// Context of the value being decoded.
204        context: String,
205        /// Source decoding error with byte position details.
206        #[source]
207        source: DecodeTextError,
208    },
209}
210
211impl From<binary::BinaryLayoutError> for TwoDaBinaryError {
212    fn from(error: binary::BinaryLayoutError) -> Self {
213        Self::InvalidHeader(error.to_string())
214    }
215}
216
217fn validate_twoda(twoda: &TwoDa) -> Result<(), TwoDaBinaryError> {
218    for (index, header) in twoda.headers.iter().enumerate() {
219        if header.contains('\0') || header.contains('\t') {
220            return Err(TwoDaBinaryError::InvalidTable(format!(
221                "header[{index}] contains reserved delimiter"
222            )));
223        }
224    }
225    for (row_index, row) in twoda.rows.iter().enumerate() {
226        if row.cells.len() != twoda.headers.len() {
227            return Err(TwoDaBinaryError::InvalidTable(format!(
228                "row[{row_index}] width {} does not match header width {}",
229                row.cells.len(),
230                twoda.headers.len()
231            )));
232        }
233        if row.label.contains('\0') || row.label.contains('\t') {
234            return Err(TwoDaBinaryError::InvalidTable(format!(
235                "row label[{row_index}] contains reserved delimiter"
236            )));
237        }
238        for (column_index, value) in row.cells.iter().enumerate() {
239            if value.contains('\0') {
240                return Err(TwoDaBinaryError::InvalidTable(format!(
241                    "cell[{row_index}][{column_index}] contains NUL byte"
242                )));
243            }
244        }
245    }
246    Ok(())
247}
248
249//
250// Serde Support (CSV/JSON)
251//
252
253#[cfg(feature = "serde")]
254mod serde_impl {
255    use super::*;
256
257    pub mod csv_impl {
258        use super::*;
259        use csv::{ReaderBuilder, StringRecord, WriterBuilder};
260        use std::io::{Cursor, Write};
261
262        /// Reads a 2DA from CSV format.
263        pub fn read_twoda_from_csv<R: Read>(reader: &mut R) -> Result<TwoDa, TwoDaBinaryError> {
264            let mut csv_reader = ReaderBuilder::new()
265                .has_headers(true)
266                .flexible(true)
267                .trim(::csv::Trim::All)
268                .from_reader(reader);
269
270            let headers: Vec<String> = csv_reader
271                .headers()
272                .map_err(|e| TwoDaBinaryError::InvalidTable(e.to_string()))?
273                .iter()
274                .map(|s| s.to_string())
275                .collect();
276
277            // Interoperability Convention:
278            // The first column in 2DA CSVs is reserved for the row label.
279            // Some tools explicitly name it "label", while others leave it implicit.
280            // We always strip the first column from the header list to treat it as metadata.
281            let actual_headers = if headers.is_empty() {
282                Vec::new()
283            } else {
284                headers[1..].to_vec()
285            };
286
287            let mut twoda = TwoDa::new(actual_headers);
288
289            for result in csv_reader.records() {
290                let record = result.map_err(|e| TwoDaBinaryError::InvalidTable(e.to_string()))?;
291                if record.is_empty() {
292                    continue;
293                }
294
295                // First field is the row label
296                let label = record.get(0).unwrap_or_default().to_string();
297                // Remaining fields are cells
298                let cells: Vec<String> = record.iter().skip(1).map(|s| s.to_string()).collect();
299
300                // Ensure row width matches the header count.
301                if cells.len() != twoda.headers.len() {
302                    return Err(TwoDaBinaryError::InvalidTable(format!(
303                        "row width mismatch: expected {}, got {}",
304                        twoda.headers.len(),
305                        cells.len()
306                    )));
307                }
308
309                twoda.push_row(label, cells)?;
310            }
311
312            Ok(twoda)
313        }
314
315        /// Writes a 2DA to CSV format.
316        pub fn write_twoda_to_csv<W: Write>(
317            writer: &mut W,
318            twoda: &TwoDa,
319        ) -> Result<(), TwoDaBinaryError> {
320            validate_twoda(twoda)?;
321
322            let mut csv_writer = WriterBuilder::new().has_headers(true).from_writer(writer);
323
324            // Write header row: label column + actual column headers
325            let mut header_record = StringRecord::new();
326            header_record.push_field("label");
327            for header in &twoda.headers {
328                header_record.push_field(header);
329            }
330            csv_writer
331                .write_record(&header_record)
332                .map_err(|e| TwoDaBinaryError::InvalidTable(e.to_string()))?;
333
334            // Write data rows
335            for row in &twoda.rows {
336                let mut record = StringRecord::new();
337                record.push_field(&row.label);
338                for cell in &row.cells {
339                    record.push_field(cell);
340                }
341                csv_writer
342                    .write_record(&record)
343                    .map_err(|e| TwoDaBinaryError::InvalidTable(e.to_string()))?;
344            }
345
346            csv_writer
347                .flush()
348                .map_err(|e| TwoDaBinaryError::InvalidTable(e.to_string()))?;
349
350            Ok(())
351        }
352
353        /// Serializes a 2DA to a CSV byte vector.
354        pub fn write_twoda_to_csv_vec(twoda: &TwoDa) -> Result<Vec<u8>, TwoDaBinaryError> {
355            let mut cursor = Cursor::new(Vec::new());
356            write_twoda_to_csv(&mut cursor, twoda)?;
357            Ok(cursor.into_inner())
358        }
359
360        /// Parses a 2DA from CSV bytes.
361        pub fn read_twoda_from_csv_bytes(bytes: &[u8]) -> Result<TwoDa, TwoDaBinaryError> {
362            let mut cursor = Cursor::new(bytes);
363            read_twoda_from_csv(&mut cursor)
364        }
365    }
366
367    pub mod json_impl {
368        use super::*;
369        use serde_json::{from_slice, from_str, to_string_pretty, to_vec};
370
371        /// Serializes a 2DA to JSON.
372        pub fn write_twoda_to_json(twoda: &TwoDa) -> Result<String, TwoDaBinaryError> {
373            to_string_pretty(twoda).map_err(|e| TwoDaBinaryError::InvalidTable(e.to_string()))
374        }
375
376        /// Serializes a 2DA to JSON bytes.
377        pub fn write_twoda_to_json_vec(twoda: &TwoDa) -> Result<Vec<u8>, TwoDaBinaryError> {
378            to_vec(twoda).map_err(|e| TwoDaBinaryError::InvalidTable(e.to_string()))
379        }
380
381        /// Deserializes a 2DA from JSON.
382        pub fn read_twoda_from_json(json: &str) -> Result<TwoDa, TwoDaBinaryError> {
383            from_str(json).map_err(|e| TwoDaBinaryError::InvalidTable(e.to_string()))
384        }
385
386        /// Deserializes a 2DA from JSON bytes.
387        pub fn read_twoda_from_json_bytes(bytes: &[u8]) -> Result<TwoDa, TwoDaBinaryError> {
388            from_slice(bytes).map_err(|e| TwoDaBinaryError::InvalidTable(e.to_string()))
389        }
390    }
391}
392
393#[cfg(feature = "serde")]
394pub use serde_impl::csv_impl::{
395    read_twoda_from_csv, read_twoda_from_csv_bytes, write_twoda_to_csv, write_twoda_to_csv_vec,
396};
397
398#[cfg(feature = "serde")]
399pub use serde_impl::json_impl::{
400    read_twoda_from_json, read_twoda_from_json_bytes, write_twoda_to_json, write_twoda_to_json_vec,
401};
402
403/// Auto-detects the format from file extension and reads accordingly.
404///
405/// Supported extensions (case-insensitive):
406/// - `.2da`, `.bif`, `.mod`, `.erf`, `.rim` (or unknown) -> binary 2DA
407/// - `.csv` -> CSV format
408/// - `.json` -> JSON format
409#[cfg_attr(
410    feature = "tracing",
411    tracing::instrument(level = "debug", skip(reader))
412)]
413pub fn read_twoda_auto<R: Read>(
414    reader: &mut R,
415    name_or_extension: &str,
416) -> Result<TwoDa, TwoDaBinaryError> {
417    let ext = if let Some(idx) = name_or_extension.rfind('.') {
418        &name_or_extension[idx + 1..]
419    } else {
420        name_or_extension
421    };
422
423    match ext.to_lowercase().as_str() {
424        "csv" => {
425            #[cfg(feature = "serde")]
426            {
427                read_twoda_from_csv(reader)
428            }
429            #[cfg(not(feature = "serde"))]
430            Err(TwoDaBinaryError::InvalidTable(
431                "CSV format requires serde feature".into(),
432            ))
433        }
434        "json" => {
435            #[cfg(feature = "serde")]
436            {
437                // JSON requires reading to end to parse
438                let mut bytes = Vec::new();
439                reader.read_to_end(&mut bytes)?;
440                read_twoda_from_json_bytes(&bytes)
441            }
442            #[cfg(not(feature = "serde"))]
443            Err(TwoDaBinaryError::InvalidTable(
444                "JSON format requires serde feature".into(),
445            ))
446        }
447        _ => read_twoda(reader),
448    }
449}