rakata_formats/gff/
mod.rs

1//! GFF V3.2 binary reader and writer.
2//!
3//! GFF is a typed, table-backed binary graph format used for most KotOR game
4//! data objects (`UTC`, `UTI`, `ARE`, `DLG`, and others).
5//!
6//! ## Format Layout
7//! ```text
8//! +------------------------------+ 0x0000
9//! | Header (56 bytes)            |
10//! | file_type + version +        |
11//! | offsets/counts for tables    |
12//! +------------------------------+ struct_offset
13//! | Struct table                 |
14//! | 12 bytes * struct_count      |
15//! +------------------------------+ field_offset
16//! | Field table                  |
17//! | 12 bytes * field_count       |
18//! +------------------------------+ label_offset
19//! | Label table                  |
20//! | 16 bytes * label_count       |
21//! +------------------------------+ field_data_offset
22//! | Field data blob              |
23//! +------------------------------+ field_indices_offset
24//! | Field indices array (u32)    |
25//! +------------------------------+ list_indices_offset
26//! | List indices array (u32)     |
27//! +------------------------------+
28//! ```
29//!
30//! ## Logical Data Model
31//! ```text
32//! Gff
33//!  `-- root GffStruct
34//!       `-- [GffField(label, GffValue)]
35//!            `-- nested structs/lists recursively reference tables
36//! ```
37//!
38//! Offsets in struct and field records reference table locations or blob
39//! offsets depending on field type. This module keeps those rules explicit and
40//! validates all ranges before decoding.
41
42mod label;
43mod reader;
44mod writer;
45
46pub use label::*;
47pub use reader::{read_gff, read_gff_from_bytes};
48pub use writer::{write_gff, write_gff_to_vec};
49
50use num_enum::{IntoPrimitive, TryFromPrimitive};
51use thiserror::Error;
52
53use rakata_core::{DecodeTextError, EncodeTextError, ResRef, StrRef, TextEncoding};
54
55use crate::binary::{self, DecodeBinary, EncodeBinary};
56
57/// GFF V3.2 header size.
58const GFF_HEADER_SIZE: usize = 56;
59/// Size of one struct-table entry.
60const STRUCT_ENTRY_SIZE: usize = 12;
61/// Size of one field-table entry.
62const FIELD_ENTRY_SIZE: usize = 12;
63/// Fixed width for field labels in the label table.
64const LABEL_SIZE: usize = 16;
65/// Binary GFF version supported by KotOR.
66const GFF_VERSION_V32: [u8; 4] = *b"V3.2";
67/// Default encoding used for non-localized GFF text values.
68const DEFAULT_TEXT_ENCODING: TextEncoding = TextEncoding::Windows1252;
69
70/// In-memory representation of a binary GFF file.
71#[derive(Debug, Clone, PartialEq)]
72pub struct Gff {
73    /// Four-byte file type (`GFF `, `UTC `, `UTI `, ...).
74    pub file_type: [u8; 4],
75    /// Root structure.
76    pub root: GffStruct,
77}
78
79impl Gff {
80    /// Creates a GFF value with the provided file type and root structure.
81    pub fn new(file_type: [u8; 4], root: GffStruct) -> Self {
82        Self { file_type, root }
83    }
84
85    /// Creates a generic `GFF ` container.
86    pub fn generic(root: GffStruct) -> Self {
87        Self {
88            file_type: *b"GFF ",
89            root,
90        }
91    }
92}
93
94impl DecodeBinary for Gff {
95    type Error = GffBinaryError;
96
97    fn decode_binary(bytes: &[u8]) -> Result<Self, Self::Error> {
98        read_gff_from_bytes(bytes)
99    }
100}
101
102impl EncodeBinary for Gff {
103    type Error = GffBinaryError;
104
105    fn encode_binary(&self) -> Result<Vec<u8>, Self::Error> {
106        write_gff_to_vec(self)
107    }
108}
109
110/// One GFF struct node.
111#[derive(Debug, Clone, PartialEq)]
112pub struct GffStruct {
113    /// Struct ID from the binary table.
114    pub struct_id: i32,
115    /// Ordered fields for this struct.
116    pub fields: Vec<GffField>,
117}
118
119impl GffStruct {
120    /// Creates an empty struct with the given `struct_id`.
121    pub fn new(struct_id: i32) -> Self {
122        Self {
123            struct_id,
124            fields: Vec::new(),
125        }
126    }
127
128    /// Creates a struct with pre-populated fields.
129    pub fn with_fields(struct_id: i32, fields: Vec<GffField>) -> Self {
130        Self { struct_id, fields }
131    }
132
133    /// Appends a new field.
134    pub fn push_field(&mut self, label: impl TryInto<GffLabel>, value: GffValue) {
135        self.fields.push(GffField {
136            label: label
137                .try_into()
138                .unwrap_or_else(|_| panic!("failed to push field with invalid label")),
139            value,
140        });
141    }
142
143    /// Returns the first field value that matches `label`.
144    pub fn field(&self, label: &str) -> Option<&GffValue> {
145        self.fields
146            .iter()
147            .find(|field| field.label == label)
148            .map(|field| &field.value)
149    }
150}
151
152/// One labeled GFF field.
153#[derive(Debug, Clone, PartialEq)]
154pub struct GffField {
155    /// Field label.
156    pub label: GffLabel,
157    /// Field payload.
158    pub value: GffValue,
159}
160
161/// Runtime value for one GFF field.
162#[derive(Debug, Clone, PartialEq)]
163pub enum GffValue {
164    /// Unsigned 8-bit integer.
165    UInt8(u8),
166    /// Signed 8-bit integer.
167    Int8(i8),
168    /// Unsigned 16-bit integer.
169    UInt16(u16),
170    /// Signed 16-bit integer.
171    Int16(i16),
172    /// Unsigned 32-bit integer.
173    UInt32(u32),
174    /// Signed 32-bit integer.
175    Int32(i32),
176    /// Unsigned 64-bit integer.
177    UInt64(u64),
178    /// Signed 64-bit integer.
179    Int64(i64),
180    /// 32-bit floating point.
181    Single(f32),
182    /// 64-bit floating point.
183    Double(f64),
184    /// CExoString.
185    String(String),
186    /// CResRef canonicalized to the typed resource reference.
187    ResRef(ResRef),
188    /// CExoLocString payload.
189    LocalizedString(GffLocalizedString),
190    /// Arbitrary binary blob.
191    Binary(Vec<u8>),
192    /// Nested struct.
193    Struct(Box<GffStruct>),
194    /// Struct list.
195    List(Vec<GffStruct>),
196    /// Vector4 / orientation.
197    Vector4([f32; 4]),
198    /// Vector3 / position.
199    Vector3([f32; 3]),
200    /// StrRef extension field (type id 18).
201    StrRef(StrRef),
202}
203
204impl GffValue {
205    /// Test/fixture helper: constructs a [`GffValue::ResRef`] from a string
206    /// literal, panicking on invalid input.
207    ///
208    /// Production code should construct `GffValue::ResRef(resref)` directly
209    /// with a validated [`ResRef`]. This helper exists so fixture generators
210    /// and unit tests can avoid boilerplate when the input is a known-valid
211    /// literal.
212    #[doc(hidden)]
213    pub fn resref_lit(value: &str) -> Self {
214        GffValue::ResRef(ResRef::new(value).expect("valid resref literal"))
215    }
216}
217
218/// CExoLocString payload.
219#[derive(Debug, Clone, PartialEq, Eq)]
220pub struct GffLocalizedString {
221    /// TLK string reference (`StrRef::invalid()` means use substrings).
222    pub string_ref: StrRef,
223    /// Embedded localized substrings.
224    pub substrings: Vec<GffLocalizedSubstring>,
225}
226
227impl GffLocalizedString {
228    /// Creates an empty localized string.
229    pub fn new(string_ref: impl Into<StrRef>) -> Self {
230        Self {
231            string_ref: string_ref.into(),
232            substrings: Vec::new(),
233        }
234    }
235}
236
237/// One localized substring entry.
238#[derive(Debug, Clone, PartialEq, Eq)]
239pub struct GffLocalizedSubstring {
240    /// Packed string ID (`language_id * 2 + gender`).
241    pub string_id: u32,
242    /// Decoded text payload.
243    pub text: String,
244}
245
246impl GffLocalizedSubstring {
247    /// Returns the language ID portion (`string_id / 2`).
248    pub fn language_id(&self) -> u32 {
249        self.string_id / 2
250    }
251
252    /// Returns `true` for feminine entries (`string_id % 2 == 1`).
253    pub fn is_feminine(&self) -> bool {
254        self.string_id % 2 == 1
255    }
256}
257
258/// Errors produced while parsing or writing binary GFF data.
259#[derive(Debug, Error)]
260pub enum GffBinaryError {
261    /// I/O read/write failure.
262    #[error(transparent)]
263    Io(#[from] std::io::Error),
264    /// Header/body layout is invalid or truncated.
265    #[error("invalid GFF header: {0}")]
266    InvalidHeader(String),
267    /// GFF version is unsupported.
268    #[error("invalid GFF version: {0:?}")]
269    InvalidVersion([u8; 4]),
270    /// Encountered an unknown field type ID.
271    #[error("invalid GFF field type id: {0}")]
272    InvalidFieldType(u32),
273    /// In-memory data is not valid for binary serialization.
274    #[error("invalid GFF data: {0}")]
275    InvalidData(String),
276    /// Value cannot fit the target on-disk width.
277    #[error("value overflow while writing `{0}`")]
278    ValueOverflow(&'static str),
279    /// Label exceeds 16 bytes after encoding.
280    #[error("label `{label}` encoded length {len} exceeds maximum {max}")]
281    LabelTooLong {
282        /// Label text.
283        label: String,
284        /// Encoded byte length.
285        len: usize,
286        /// Maximum allowed byte length.
287        max: usize,
288    },
289    /// Text cannot be represented in the target encoding.
290    #[error("GFF text encoding failed for {context}: {source}")]
291    TextEncoding {
292        /// Context path for error reporting.
293        context: String,
294        /// Source encoding error.
295        #[source]
296        source: EncodeTextError,
297    },
298    /// Text bytes cannot be decoded losslessly.
299    #[error("GFF text decoding failed for {context}: {source}")]
300    TextDecoding {
301        /// Context path for error reporting.
302        context: String,
303        /// Source decoding error.
304        #[source]
305        source: DecodeTextError,
306    },
307    /// Language ID maps to an unsupported encoding.
308    #[error("unsupported language id {0} for localized string encoding")]
309    UnsupportedLanguageEncoding(u32),
310}
311
312impl From<binary::BinaryLayoutError> for GffBinaryError {
313    fn from(error: binary::BinaryLayoutError) -> Self {
314        Self::InvalidHeader(error.to_string())
315    }
316}
317
318#[derive(Debug, Clone, Copy, PartialEq, Eq, IntoPrimitive, TryFromPrimitive)]
319#[repr(u32)]
320pub(super) enum FieldType {
321    UInt8 = 0,
322    Int8 = 1,
323    UInt16 = 2,
324    Int16 = 3,
325    UInt32 = 4,
326    Int32 = 5,
327    UInt64 = 6,
328    Int64 = 7,
329    Single = 8,
330    Double = 9,
331    String = 10,
332    ResRef = 11,
333    LocalizedString = 12,
334    Binary = 13,
335    Struct = 14,
336    List = 15,
337    Vector4 = 16,
338    Vector3 = 17,
339    StrRef = 18,
340}
341
342pub(super) fn to_u32(value: usize, name: &'static str) -> Result<u32, GffBinaryError> {
343    u32::try_from(value).map_err(|_| GffBinaryError::ValueOverflow(name))
344}
345
346pub(super) fn to_usize(value: u32, name: &'static str) -> Result<usize, GffBinaryError> {
347    binary::checked_to_usize(value, name).map_err(|_| {
348        GffBinaryError::InvalidData(format!("{name} does not fit target platform usize"))
349    })
350}
351
352//
353// Serde Support (JSON)
354//
355
356#[cfg(feature = "serde")]
357/// JSON serialization support for GFF.
358pub mod serde_json_fmt {
359    use super::*;
360    use serde::{Deserialize, Serialize};
361    use serde_json::{from_slice, from_str, to_string_pretty, to_vec};
362    use std::collections::BTreeMap;
363
364    /// Serializes a GFF to JSON.
365    pub fn write_gff_to_json(gff: &Gff) -> Result<String, GffBinaryError> {
366        let dto = GffDto::from(gff);
367        to_string_pretty(&dto).map_err(|e| GffBinaryError::InvalidData(e.to_string()))
368    }
369
370    /// Serializes a GFF to JSON bytes.
371    pub fn write_gff_to_json_vec(gff: &Gff) -> Result<Vec<u8>, GffBinaryError> {
372        let dto = GffDto::from(gff);
373        to_vec(&dto).map_err(|e| GffBinaryError::InvalidData(e.to_string()))
374    }
375
376    /// Deserializes a GFF from JSON.
377    pub fn read_gff_from_json(json: &str) -> Result<Gff, GffBinaryError> {
378        let dto: GffDto = from_str(json).map_err(|e| GffBinaryError::InvalidData(e.to_string()))?;
379        Gff::try_from(dto)
380    }
381
382    /// Deserializes a GFF from JSON bytes.
383    pub fn read_gff_from_json_bytes(bytes: &[u8]) -> Result<Gff, GffBinaryError> {
384        let dto: GffDto =
385            from_slice(bytes).map_err(|e| GffBinaryError::InvalidData(e.to_string()))?;
386        Gff::try_from(dto)
387    }
388
389    /// Serializable DTO for the root GFF file.
390    #[derive(Serialize, Deserialize)]
391    pub struct GffDto {
392        /// File type signature (e.g. "UTC ").
393        pub file_type: String,
394        /// Root struct data.
395        pub root: GffStructDto,
396    }
397
398    /// Serializable DTO for a GFF struct.
399    #[derive(Serialize, Deserialize)]
400    pub struct GffStructDto {
401        /// Struct ID (usually -1 for root, or specific ID for list items).
402        pub struct_id: i32,
403        /// Field map (sorted by label).
404        #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
405        pub fields: BTreeMap<String, GffValueDto>,
406    }
407
408    /// Serializable DTO for a GFF field value.
409    #[derive(Serialize, Deserialize)]
410    #[serde(tag = "type", content = "value")]
411    pub enum GffValueDto {
412        /// Unsigned 8-bit integer.
413        UInt8(u8),
414        /// Signed 8-bit integer.
415        Int8(i8),
416        /// Unsigned 16-bit integer.
417        UInt16(u16),
418        /// Signed 16-bit integer.
419        Int16(i16),
420        /// Unsigned 32-bit integer.
421        UInt32(u32),
422        /// Signed 32-bit integer.
423        Int32(i32),
424        /// Unsigned 64-bit integer.
425        UInt64(u64),
426        /// Signed 64-bit integer.
427        Int64(i64),
428        /// 32-bit float.
429        Single(f32),
430        /// 64-bit float.
431        Double(f64),
432        /// String value.
433        String(String),
434        /// Resource reference string.
435        ResRef(String),
436        /// Localized string object.
437        LocalizedString(GffLocalizedStringDto),
438        /// Binary blob (serialized as hex string).
439        #[serde(with = "hex_bytes")]
440        Binary(Vec<u8>),
441        /// Nested struct.
442        Struct(Box<GffStructDto>),
443        /// List of structs.
444        List(Vec<GffStructDto>),
445        /// 4-component vector.
446        Vector4([f32; 4]),
447        /// 3-component vector.
448        Vector3([f32; 3]),
449        /// String reference ID.
450        StrRef(i32),
451    }
452
453    /// Serializable DTO for a localized string.
454    #[derive(Serialize, Deserialize)]
455    pub struct GffLocalizedStringDto {
456        /// Reference into `dialog.tlk`.
457        pub str_ref: i32,
458        /// Map of language ID to localized text.
459        #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
460        pub substrings: BTreeMap<u32, String>,
461    }
462
463    impl From<&Gff> for GffDto {
464        fn from(gff: &Gff) -> Self {
465            let file_type = rakata_core::text::decode_text(
466                &gff.file_type,
467                rakata_core::text::TextEncoding::Windows1252,
468            );
469            Self {
470                file_type,
471                root: GffStructDto::from(&gff.root),
472            }
473        }
474    }
475
476    impl TryFrom<GffDto> for Gff {
477        type Error = GffBinaryError;
478
479        fn try_from(dto: GffDto) -> Result<Self, Self::Error> {
480            let mut file_type = [0u8; 4];
481            let bytes = dto.file_type.as_bytes();
482            if bytes.len() > 4 {
483                return Err(GffBinaryError::InvalidHeader("file_type too long".into()));
484            }
485            file_type[..bytes.len()].copy_from_slice(bytes);
486
487            Ok(Self {
488                file_type,
489                root: GffStruct::try_from(dto.root)?,
490            })
491        }
492    }
493
494    impl From<&GffStruct> for GffStructDto {
495        fn from(s: &GffStruct) -> Self {
496            let mut fields = BTreeMap::new();
497            for field in &s.fields {
498                fields.insert(field.label.to_string(), GffValueDto::from(&field.value));
499            }
500            Self {
501                struct_id: s.struct_id,
502                fields,
503            }
504        }
505    }
506
507    impl TryFrom<GffStructDto> for GffStruct {
508        type Error = GffBinaryError;
509
510        fn try_from(dto: GffStructDto) -> Result<Self, Self::Error> {
511            let mut fields = Vec::with_capacity(dto.fields.len());
512            for (label, value_dto) in dto.fields {
513                fields.push(GffField {
514                    label: label.try_into().map_err(|_| {
515                        GffBinaryError::InvalidData("Invalid GFF label in JSON".into())
516                    })?,
517                    value: GffValue::try_from(value_dto)?,
518                });
519            }
520            Ok(Self {
521                struct_id: dto.struct_id,
522                fields,
523            })
524        }
525    }
526
527    impl From<&GffValue> for GffValueDto {
528        fn from(v: &GffValue) -> Self {
529            match v {
530                GffValue::UInt8(x) => Self::UInt8(*x),
531                GffValue::Int8(x) => Self::Int8(*x),
532                GffValue::UInt16(x) => Self::UInt16(*x),
533                GffValue::Int16(x) => Self::Int16(*x),
534                GffValue::UInt32(x) => Self::UInt32(*x),
535                GffValue::Int32(x) => Self::Int32(*x),
536                GffValue::UInt64(x) => Self::UInt64(*x),
537                GffValue::Int64(x) => Self::Int64(*x),
538                GffValue::Single(x) => Self::Single(*x),
539                GffValue::Double(x) => Self::Double(*x),
540                GffValue::String(x) => Self::String(x.clone()),
541                GffValue::ResRef(x) => Self::ResRef(x.as_str().to_owned()),
542                GffValue::LocalizedString(x) => {
543                    Self::LocalizedString(GffLocalizedStringDto::from(x))
544                }
545                GffValue::Binary(x) => Self::Binary(x.clone()),
546                GffValue::Struct(x) => Self::Struct(Box::new(GffStructDto::from(x.as_ref()))),
547                GffValue::List(x) => Self::List(x.iter().map(GffStructDto::from).collect()),
548                GffValue::Vector4(x) => Self::Vector4(*x),
549                GffValue::Vector3(x) => Self::Vector3(*x),
550                GffValue::StrRef(x) => Self::StrRef(x.raw()),
551            }
552        }
553    }
554
555    impl TryFrom<GffValueDto> for GffValue {
556        type Error = GffBinaryError;
557
558        fn try_from(dto: GffValueDto) -> Result<Self, Self::Error> {
559            Ok(match dto {
560                GffValueDto::UInt8(x) => Self::UInt8(x),
561                GffValueDto::Int8(x) => Self::Int8(x),
562                GffValueDto::UInt16(x) => Self::UInt16(x),
563                GffValueDto::Int16(x) => Self::Int16(x),
564                GffValueDto::UInt32(x) => Self::UInt32(x),
565                GffValueDto::Int32(x) => Self::Int32(x),
566                GffValueDto::UInt64(x) => Self::UInt64(x),
567                GffValueDto::Int64(x) => Self::Int64(x),
568                GffValueDto::Single(x) => Self::Single(x),
569                GffValueDto::Double(x) => Self::Double(x),
570                GffValueDto::String(x) => Self::String(x),
571                GffValueDto::ResRef(x) => Self::ResRef(
572                    ResRef::new(&x)
573                        .map_err(|e| GffBinaryError::InvalidData(format!("resref `{x}`: {e}")))?,
574                ),
575                GffValueDto::LocalizedString(x) => {
576                    Self::LocalizedString(GffLocalizedString::from(x))
577                }
578                GffValueDto::Binary(x) => Self::Binary(x),
579                GffValueDto::Struct(x) => Self::Struct(Box::new(GffStruct::try_from(*x)?)),
580                GffValueDto::List(x) => {
581                    let mut list = Vec::with_capacity(x.len());
582                    for item in x {
583                        list.push(GffStruct::try_from(item)?);
584                    }
585                    Self::List(list)
586                }
587                GffValueDto::Vector4(x) => Self::Vector4(x),
588                GffValueDto::Vector3(x) => Self::Vector3(x),
589                GffValueDto::StrRef(x) => Self::StrRef(StrRef::from_raw(x)),
590            })
591        }
592    }
593
594    impl From<&GffLocalizedString> for GffLocalizedStringDto {
595        fn from(s: &GffLocalizedString) -> Self {
596            let mut substrings = BTreeMap::new();
597            for sub in &s.substrings {
598                substrings.insert(sub.string_id, sub.text.clone());
599            }
600            Self {
601                str_ref: s.string_ref.raw(),
602                substrings,
603            }
604        }
605    }
606
607    impl From<GffLocalizedStringDto> for GffLocalizedString {
608        fn from(dto: GffLocalizedStringDto) -> Self {
609            let mut substrings = Vec::with_capacity(dto.substrings.len());
610            for (string_id, text) in dto.substrings {
611                substrings.push(GffLocalizedSubstring { string_id, text });
612            }
613            Self {
614                string_ref: StrRef::from_raw(dto.str_ref),
615                substrings,
616            }
617        }
618    }
619
620    mod hex_bytes {
621        use serde::{Deserialize, Deserializer, Serializer};
622
623        pub fn serialize<S>(bytes: &[u8], serializer: S) -> Result<S::Ok, S::Error>
624        where
625            S: Serializer,
626        {
627            let hex = hex_encode(bytes);
628            serializer.serialize_str(&hex)
629        }
630
631        pub fn deserialize<'de, D>(deserializer: D) -> Result<Vec<u8>, D::Error>
632        where
633            D: Deserializer<'de>,
634        {
635            let s = String::deserialize(deserializer)?;
636            hex_decode(&s).map_err(serde::de::Error::custom)
637        }
638
639        fn hex_encode(bytes: &[u8]) -> String {
640            use std::fmt::Write;
641            let mut s = String::with_capacity(bytes.len() * 2);
642            for b in bytes {
643                write!(&mut s, "{b:02X}").expect("writing to a String cannot fail");
644            }
645            s
646        }
647
648        fn hex_decode(s: &str) -> Result<Vec<u8>, String> {
649            if !s.len().is_multiple_of(2) {
650                return Err("odd length hex string".into());
651            }
652            let mut bytes = Vec::with_capacity(s.len() / 2);
653            for i in (0..s.len()).step_by(2) {
654                let byte_str = &s[i..i + 2];
655                let byte = u8::from_str_radix(byte_str, 16)
656                    .map_err(|e| format!("invalid hex byte {}: {}", byte_str, e))?;
657                bytes.push(byte);
658            }
659            Ok(bytes)
660        }
661    }
662}
663
664#[cfg(feature = "serde")]
665pub use serde_json_fmt::{
666    read_gff_from_json, read_gff_from_json_bytes, write_gff_to_json, write_gff_to_json_vec,
667};