rakata_formats/gff/
reader.rs

1//! GFF V3.2 binary reader.
2
3use std::io::Read;
4
5use rakata_core::{decode_text_strict, text_encoding_for_language, ResRef, StrRef};
6
7use super::{
8    binary, to_usize, FieldType, Gff, GffBinaryError, GffField, GffLocalizedString,
9    GffLocalizedSubstring, GffStruct, GffValue, DEFAULT_TEXT_ENCODING, FIELD_ENTRY_SIZE,
10    GFF_HEADER_SIZE, GFF_VERSION_V32, LABEL_SIZE, STRUCT_ENTRY_SIZE,
11};
12
13/// Reads a binary GFF file from a reader.
14///
15/// The stream is consumed from its current position.
16#[cfg_attr(
17    feature = "tracing",
18    tracing::instrument(level = "debug", skip(reader))
19)]
20pub fn read_gff<R: Read>(reader: &mut R) -> Result<Gff, GffBinaryError> {
21    let mut bytes = Vec::new();
22    reader.read_to_end(&mut bytes)?;
23    crate::trace_debug!(bytes_len = bytes.len(), "read gff bytes from reader");
24    read_gff_from_bytes(&bytes)
25}
26
27/// Reads a binary GFF file from bytes.
28#[cfg_attr(
29    feature = "tracing",
30    tracing::instrument(level = "debug", skip(bytes), fields(bytes_len = bytes.len()))
31)]
32pub fn read_gff_from_bytes(bytes: &[u8]) -> Result<Gff, GffBinaryError> {
33    let header = parse_header(bytes)?;
34    let labels = read_labels(bytes, &header)?;
35    let parser = GffParser {
36        bytes,
37        header,
38        labels,
39    };
40    let root = parser.read_struct(0, 0)?;
41    crate::trace_debug!(
42        file_type = ?parser.header.file_type,
43        struct_count = parser.header.struct_count,
44        field_count = parser.header.field_count,
45        label_count = parser.header.label_count,
46        root_field_count = root.fields.len(),
47        "parsed gff from bytes"
48    );
49    Ok(Gff {
50        file_type: parser.header.file_type,
51        root,
52    })
53}
54
55#[derive(Debug, Clone, Copy)]
56struct GffHeader {
57    file_type: [u8; 4],
58    struct_offset: usize,
59    struct_count: usize,
60    field_offset: usize,
61    field_count: usize,
62    label_offset: usize,
63    label_count: usize,
64    field_data_offset: usize,
65    field_data_count: usize,
66    field_indices_offset: usize,
67    field_indices_count: usize,
68    list_indices_offset: usize,
69    list_indices_count: usize,
70}
71
72struct GffParser<'a> {
73    bytes: &'a [u8],
74    header: GffHeader,
75    labels: Vec<super::GffLabel>,
76}
77
78impl<'a> GffParser<'a> {
79    fn read_struct(&self, struct_index: usize, depth: usize) -> Result<GffStruct, GffBinaryError> {
80        if depth > self.header.struct_count {
81            return Err(GffBinaryError::InvalidData(
82                "detected nested struct cycle while reading".into(),
83            ));
84        }
85        if struct_index >= self.header.struct_count {
86            return Err(GffBinaryError::InvalidData(format!(
87                "struct index {struct_index} out of range"
88            )));
89        }
90
91        let base = self
92            .header
93            .struct_offset
94            .checked_add(
95                struct_index
96                    .checked_mul(STRUCT_ENTRY_SIZE)
97                    .ok_or_else(|| GffBinaryError::InvalidHeader("struct index overflow".into()))?,
98            )
99            .ok_or_else(|| GffBinaryError::InvalidHeader("struct base overflow".into()))?;
100        let struct_id = read_i32(self.bytes, base)?;
101        let data_or_offset = binary::read_u32(self.bytes, base + 4)?;
102        let field_count = usize::try_from(binary::read_u32(self.bytes, base + 8)?)
103            .map_err(|_| GffBinaryError::InvalidData("field count does not fit usize".into()))?;
104
105        let mut fields = Vec::with_capacity(field_count);
106        match field_count {
107            0 => {}
108            1 => {
109                let field_index = usize::try_from(data_or_offset).map_err(|_| {
110                    GffBinaryError::InvalidData("single field index does not fit usize".into())
111                })?;
112                fields.push(self.read_field(field_index, depth + 1)?);
113            }
114            _ => {
115                let indices_rel_offset = usize::try_from(data_or_offset).map_err(|_| {
116                    GffBinaryError::InvalidData("field indices offset does not fit usize".into())
117                })?;
118                let indices_size = field_count.checked_mul(4).ok_or_else(|| {
119                    GffBinaryError::InvalidData("field indices size overflow".into())
120                })?;
121                if indices_rel_offset
122                    .checked_add(indices_size)
123                    .is_none_or(|end| end > self.header.field_indices_count)
124                {
125                    return Err(GffBinaryError::InvalidData(format!(
126                        "field indices block out of range for struct {struct_index}"
127                    )));
128                }
129                let indices_base = self
130                    .header
131                    .field_indices_offset
132                    .checked_add(indices_rel_offset)
133                    .ok_or_else(|| {
134                        GffBinaryError::InvalidData("field indices absolute offset overflow".into())
135                    })?;
136                for i in 0..field_count {
137                    let field_index =
138                        usize::try_from(binary::read_u32(self.bytes, indices_base + i * 4)?)
139                            .map_err(|_| {
140                                GffBinaryError::InvalidData(
141                                    "field index from field-indices table does not fit usize"
142                                        .into(),
143                                )
144                            })?;
145                    fields.push(self.read_field(field_index, depth + 1)?);
146                }
147            }
148        }
149
150        Ok(GffStruct { struct_id, fields })
151    }
152
153    fn read_field(&self, field_index: usize, depth: usize) -> Result<GffField, GffBinaryError> {
154        if field_index >= self.header.field_count {
155            return Err(GffBinaryError::InvalidData(format!(
156                "field index {field_index} out of range"
157            )));
158        }
159        let base = self
160            .header
161            .field_offset
162            .checked_add(
163                field_index
164                    .checked_mul(FIELD_ENTRY_SIZE)
165                    .ok_or_else(|| GffBinaryError::InvalidHeader("field index overflow".into()))?,
166            )
167            .ok_or_else(|| GffBinaryError::InvalidHeader("field base overflow".into()))?;
168
169        let field_type_raw = binary::read_u32(self.bytes, base)?;
170        let field_type = FieldType::try_from(field_type_raw)
171            .map_err(|_| GffBinaryError::InvalidFieldType(field_type_raw))?;
172        let label_index = usize::try_from(binary::read_u32(self.bytes, base + 4)?)
173            .map_err(|_| GffBinaryError::InvalidData("label index does not fit usize".into()))?;
174        let data_or_offset = binary::read_u32(self.bytes, base + 8)?;
175        let label = *self.labels.get(label_index).ok_or_else(|| {
176            GffBinaryError::InvalidData(format!("label index {label_index} out of range"))
177        })?;
178
179        let value = match field_type {
180            FieldType::UInt8 => {
181                GffValue::UInt8(u8::try_from(data_or_offset & 0xFF).expect("masked to 8 bits"))
182            }
183            FieldType::Int8 => {
184                GffValue::Int8(i8::from_le_bytes([
185                    u8::try_from(data_or_offset & 0xFF).expect("masked to 8 bits")
186                ]))
187            }
188            FieldType::UInt16 => {
189                GffValue::UInt16(u16::try_from(data_or_offset & 0xFFFF).expect("masked to 16 bits"))
190            }
191            FieldType::Int16 => GffValue::Int16(i16::from_le_bytes(
192                u16::try_from(data_or_offset & 0xFFFF)
193                    .expect("masked to 16 bits")
194                    .to_le_bytes(),
195            )),
196            FieldType::UInt32 => GffValue::UInt32(data_or_offset),
197            FieldType::Int32 => GffValue::Int32(i32::from_le_bytes(data_or_offset.to_le_bytes())),
198            FieldType::Single => GffValue::Single(f32::from_bits(data_or_offset)),
199            FieldType::UInt64 => GffValue::UInt64(read_u64_at_field_data(self, data_or_offset)?),
200            FieldType::Int64 => GffValue::Int64(i64::from_le_bytes(
201                read_u64_at_field_data(self, data_or_offset)?.to_le_bytes(),
202            )),
203            FieldType::Double => GffValue::Double(f64::from_bits(read_u64_at_field_data(
204                self,
205                data_or_offset,
206            )?)),
207            FieldType::String => GffValue::String(read_sized_string(
208                self,
209                data_or_offset,
210                format!("field[{field_index}] string"),
211            )?),
212            FieldType::ResRef => {
213                let raw = read_sized_string_u8(
214                    self,
215                    data_or_offset,
216                    format!("field[{field_index}] resref"),
217                )?;
218                let resref = ResRef::new(&raw).map_err(|err| {
219                    GffBinaryError::InvalidData(format!(
220                        "field[{field_index}] resref `{raw}`: {err}"
221                    ))
222                })?;
223                GffValue::ResRef(resref)
224            }
225            FieldType::LocalizedString => {
226                GffValue::LocalizedString(read_localized_string(self, data_or_offset, field_index)?)
227            }
228            FieldType::Binary => GffValue::Binary(read_binary_blob(self, data_or_offset)?),
229            FieldType::Struct => {
230                let struct_index = usize::try_from(data_or_offset).map_err(|_| {
231                    GffBinaryError::InvalidData("nested struct index does not fit usize".into())
232                })?;
233                GffValue::Struct(Box::new(self.read_struct(struct_index, depth + 1)?))
234            }
235            FieldType::List => GffValue::List(read_struct_list(self, data_or_offset, depth + 1)?),
236            FieldType::Vector4 => GffValue::Vector4(read_vector4(self, data_or_offset)?),
237            FieldType::Vector3 => GffValue::Vector3(read_vector3(self, data_or_offset)?),
238            FieldType::StrRef => GffValue::StrRef(StrRef::from_raw(i32::from_le_bytes(
239                data_or_offset.to_le_bytes(),
240            ))),
241        };
242
243        Ok(GffField { label, value })
244    }
245}
246
247fn parse_header(bytes: &[u8]) -> Result<GffHeader, GffBinaryError> {
248    if bytes.len() < GFF_HEADER_SIZE {
249        return Err(GffBinaryError::InvalidHeader(
250            "file smaller than GFF header".into(),
251        ));
252    }
253    let file_type = binary::read_fourcc(bytes, 0)?;
254    let version = binary::read_fourcc(bytes, 4)?;
255    binary::expect_fourcc(version, GFF_VERSION_V32).map_err(GffBinaryError::InvalidVersion)?;
256
257    let struct_offset = to_usize(binary::read_u32(bytes, 8)?, "struct_offset")?;
258    let struct_count = to_usize(binary::read_u32(bytes, 12)?, "struct_count")?;
259    let field_offset = to_usize(binary::read_u32(bytes, 16)?, "field_offset")?;
260    let field_count = to_usize(binary::read_u32(bytes, 20)?, "field_count")?;
261    let label_offset = to_usize(binary::read_u32(bytes, 24)?, "label_offset")?;
262    let label_count = to_usize(binary::read_u32(bytes, 28)?, "label_count")?;
263    let field_data_offset = to_usize(binary::read_u32(bytes, 32)?, "field_data_offset")?;
264    let field_data_count = to_usize(binary::read_u32(bytes, 36)?, "field_data_count")?;
265    let field_indices_offset = to_usize(binary::read_u32(bytes, 40)?, "field_indices_offset")?;
266    let field_indices_count = to_usize(binary::read_u32(bytes, 44)?, "field_indices_count")?;
267    let list_indices_offset = to_usize(binary::read_u32(bytes, 48)?, "list_indices_offset")?;
268    let list_indices_count = to_usize(binary::read_u32(bytes, 52)?, "list_indices_count")?;
269
270    check_table_bounds(
271        bytes.len(),
272        struct_offset,
273        struct_count
274            .checked_mul(STRUCT_ENTRY_SIZE)
275            .ok_or(GffBinaryError::InvalidHeader(
276                "struct table size overflow".into(),
277            ))?,
278        "struct table",
279    )?;
280    check_table_bounds(
281        bytes.len(),
282        field_offset,
283        field_count
284            .checked_mul(FIELD_ENTRY_SIZE)
285            .ok_or(GffBinaryError::InvalidHeader(
286                "field table size overflow".into(),
287            ))?,
288        "field table",
289    )?;
290    check_table_bounds(
291        bytes.len(),
292        label_offset,
293        label_count
294            .checked_mul(LABEL_SIZE)
295            .ok_or(GffBinaryError::InvalidHeader(
296                "label table size overflow".into(),
297            ))?,
298        "label table",
299    )?;
300    check_table_bounds(
301        bytes.len(),
302        field_data_offset,
303        field_data_count,
304        "field data",
305    )?;
306    check_table_bounds(
307        bytes.len(),
308        field_indices_offset,
309        field_indices_count,
310        "field indices",
311    )?;
312    check_table_bounds(
313        bytes.len(),
314        list_indices_offset,
315        list_indices_count,
316        "list indices",
317    )?;
318
319    if struct_count == 0 {
320        return Err(GffBinaryError::InvalidHeader(
321            "struct table is empty (missing root struct)".into(),
322        ));
323    }
324
325    Ok(GffHeader {
326        file_type,
327        struct_offset,
328        struct_count,
329        field_offset,
330        field_count,
331        label_offset,
332        label_count,
333        field_data_offset,
334        field_data_count,
335        field_indices_offset,
336        field_indices_count,
337        list_indices_offset,
338        list_indices_count,
339    })
340}
341
342fn read_labels(bytes: &[u8], header: &GffHeader) -> Result<Vec<super::GffLabel>, GffBinaryError> {
343    let mut labels = Vec::with_capacity(header.label_count);
344    for label_index in 0..header.label_count {
345        let offset = header
346            .label_offset
347            .checked_add(
348                label_index
349                    .checked_mul(LABEL_SIZE)
350                    .ok_or_else(|| GffBinaryError::InvalidHeader("label offset overflow".into()))?,
351            )
352            .ok_or_else(|| GffBinaryError::InvalidHeader("label base overflow".into()))?;
353        let raw = bytes
354            .get(offset..offset + LABEL_SIZE)
355            .ok_or_else(|| GffBinaryError::InvalidHeader("label slice out of range".into()))?;
356        let end = raw.iter().position(|byte| *byte == 0).unwrap_or(LABEL_SIZE);
357        let label = decode_text_strict(&raw[..end], DEFAULT_TEXT_ENCODING).map_err(|source| {
358            GffBinaryError::TextDecoding {
359                context: format!("label[{label_index}]"),
360                source,
361            }
362        })?;
363        let gff_label = super::GffLabel::new(&label).map_err(|err| {
364            GffBinaryError::InvalidData(format!("label[{label_index}] `{label}` is invalid: {err}"))
365        })?;
366        labels.push(gff_label);
367    }
368    Ok(labels)
369}
370
371fn read_u64_at_field_data(parser: &GffParser<'_>, offset: u32) -> Result<u64, GffBinaryError> {
372    let offset = to_usize(offset, "field_data_offset")?;
373    let absolute = parser
374        .header
375        .field_data_offset
376        .checked_add(offset)
377        .ok_or_else(|| GffBinaryError::InvalidData("field data absolute offset overflow".into()))?;
378    let end_rel = offset
379        .checked_add(8)
380        .ok_or_else(|| GffBinaryError::InvalidData("field data u64 end overflow".into()))?;
381    if end_rel > parser.header.field_data_count {
382        return Err(GffBinaryError::InvalidData(
383            "field data u64 read out of range".into(),
384        ));
385    }
386    Ok(binary::read_u64(parser.bytes, absolute)?)
387}
388
389fn read_sized_string(
390    parser: &GffParser<'_>,
391    offset: u32,
392    context: String,
393) -> Result<String, GffBinaryError> {
394    let offset = to_usize(offset, "field_data_offset")?;
395    let base = parser
396        .header
397        .field_data_offset
398        .checked_add(offset)
399        .ok_or_else(|| GffBinaryError::InvalidData("string base overflow".into()))?;
400    let len = to_usize(binary::read_u32(parser.bytes, base)?, "string_length")?;
401    let data_rel_end = offset
402        .checked_add(4)
403        .and_then(|v| v.checked_add(len))
404        .ok_or_else(|| GffBinaryError::InvalidData("string end overflow".into()))?;
405    if data_rel_end > parser.header.field_data_count {
406        return Err(GffBinaryError::InvalidData(
407            "string payload exceeds field data bounds".into(),
408        ));
409    }
410    let data_start = base + 4;
411    let raw = parser
412        .bytes
413        .get(data_start..data_start + len)
414        .ok_or_else(|| GffBinaryError::InvalidData("string bytes out of range".into()))?;
415    decode_text_strict(raw, DEFAULT_TEXT_ENCODING)
416        .map_err(|source| GffBinaryError::TextDecoding { context, source })
417}
418
419fn read_sized_string_u8(
420    parser: &GffParser<'_>,
421    offset: u32,
422    context: String,
423) -> Result<String, GffBinaryError> {
424    let offset = to_usize(offset, "field_data_offset")?;
425    let base = parser
426        .header
427        .field_data_offset
428        .checked_add(offset)
429        .ok_or_else(|| GffBinaryError::InvalidData("resref base overflow".into()))?;
430    let len =
431        usize::from(*parser.bytes.get(base).ok_or_else(|| {
432            GffBinaryError::InvalidData("resref length byte out of range".into())
433        })?);
434    let data_rel_end = offset
435        .checked_add(1)
436        .and_then(|v| v.checked_add(len))
437        .ok_or_else(|| GffBinaryError::InvalidData("resref end overflow".into()))?;
438    if data_rel_end > parser.header.field_data_count {
439        return Err(GffBinaryError::InvalidData(
440            "resref payload exceeds field data bounds".into(),
441        ));
442    }
443    let data_start = base + 1;
444    let raw = parser
445        .bytes
446        .get(data_start..data_start + len)
447        .ok_or_else(|| GffBinaryError::InvalidData("resref bytes out of range".into()))?;
448    decode_text_strict(raw, DEFAULT_TEXT_ENCODING)
449        .map_err(|source| GffBinaryError::TextDecoding { context, source })
450}
451
452fn read_binary_blob(parser: &GffParser<'_>, offset: u32) -> Result<Vec<u8>, GffBinaryError> {
453    let offset = to_usize(offset, "field_data_offset")?;
454    let base = parser
455        .header
456        .field_data_offset
457        .checked_add(offset)
458        .ok_or_else(|| GffBinaryError::InvalidData("binary base overflow".into()))?;
459    let len = to_usize(binary::read_u32(parser.bytes, base)?, "binary_length")?;
460    let data_rel_end = offset
461        .checked_add(4)
462        .and_then(|v| v.checked_add(len))
463        .ok_or_else(|| GffBinaryError::InvalidData("binary end overflow".into()))?;
464    if data_rel_end > parser.header.field_data_count {
465        return Err(GffBinaryError::InvalidData(
466            "binary payload exceeds field data bounds".into(),
467        ));
468    }
469    let data_start = base + 4;
470    let raw = parser
471        .bytes
472        .get(data_start..data_start + len)
473        .ok_or_else(|| GffBinaryError::InvalidData("binary bytes out of range".into()))?;
474    Ok(raw.to_vec())
475}
476
477fn read_vector4(parser: &GffParser<'_>, offset: u32) -> Result<[f32; 4], GffBinaryError> {
478    let offset = to_usize(offset, "field_data_offset")?;
479    let base = parser
480        .header
481        .field_data_offset
482        .checked_add(offset)
483        .ok_or_else(|| GffBinaryError::InvalidData("vector4 base overflow".into()))?;
484    let end_rel = offset
485        .checked_add(16)
486        .ok_or_else(|| GffBinaryError::InvalidData("vector4 end overflow".into()))?;
487    if end_rel > parser.header.field_data_count {
488        return Err(GffBinaryError::InvalidData(
489            "vector4 exceeds field data bounds".into(),
490        ));
491    }
492    Ok([
493        binary::read_f32(parser.bytes, base)?,
494        binary::read_f32(parser.bytes, base + 4)?,
495        binary::read_f32(parser.bytes, base + 8)?,
496        binary::read_f32(parser.bytes, base + 12)?,
497    ])
498}
499
500fn read_vector3(parser: &GffParser<'_>, offset: u32) -> Result<[f32; 3], GffBinaryError> {
501    let offset = to_usize(offset, "field_data_offset")?;
502    let base = parser
503        .header
504        .field_data_offset
505        .checked_add(offset)
506        .ok_or_else(|| GffBinaryError::InvalidData("vector3 base overflow".into()))?;
507    let end_rel = offset
508        .checked_add(12)
509        .ok_or_else(|| GffBinaryError::InvalidData("vector3 end overflow".into()))?;
510    if end_rel > parser.header.field_data_count {
511        return Err(GffBinaryError::InvalidData(
512            "vector3 exceeds field data bounds".into(),
513        ));
514    }
515    Ok([
516        binary::read_f32(parser.bytes, base)?,
517        binary::read_f32(parser.bytes, base + 4)?,
518        binary::read_f32(parser.bytes, base + 8)?,
519    ])
520}
521
522fn read_struct_list(
523    parser: &GffParser<'_>,
524    offset: u32,
525    depth: usize,
526) -> Result<Vec<GffStruct>, GffBinaryError> {
527    let offset = to_usize(offset, "list_indices_offset")?;
528    let count_base = parser
529        .header
530        .list_indices_offset
531        .checked_add(offset)
532        .ok_or_else(|| GffBinaryError::InvalidData("list base overflow".into()))?;
533    if offset
534        .checked_add(4)
535        .is_none_or(|end| end > parser.header.list_indices_count)
536    {
537        return Err(GffBinaryError::InvalidData(
538            "list count read out of range".into(),
539        ));
540    }
541    let count = to_usize(binary::read_u32(parser.bytes, count_base)?, "list_count")?;
542    let list_entries_size = count
543        .checked_mul(4)
544        .ok_or_else(|| GffBinaryError::InvalidData("list entries size overflow".into()))?;
545    if offset
546        .checked_add(4)
547        .and_then(|v| v.checked_add(list_entries_size))
548        .is_none_or(|end| end > parser.header.list_indices_count)
549    {
550        return Err(GffBinaryError::InvalidData(
551            "list entries out of range".into(),
552        ));
553    }
554    let mut out = Vec::with_capacity(count);
555    let entries_base = count_base + 4;
556    for index in 0..count {
557        let struct_index = to_usize(
558            binary::read_u32(parser.bytes, entries_base + index * 4)?,
559            "list_struct_index",
560        )?;
561        out.push(parser.read_struct(struct_index, depth + 1)?);
562    }
563    Ok(out)
564}
565
566fn read_localized_string(
567    parser: &GffParser<'_>,
568    offset: u32,
569    field_index: usize,
570) -> Result<GffLocalizedString, GffBinaryError> {
571    let offset = to_usize(offset, "field_data_offset")?;
572    let base = parser
573        .header
574        .field_data_offset
575        .checked_add(offset)
576        .ok_or_else(|| GffBinaryError::InvalidData("locstring base overflow".into()))?;
577    let total_size = to_usize(
578        binary::read_u32(parser.bytes, base)?,
579        "locstring_total_size",
580    )?;
581    let payload_rel_start = offset
582        .checked_add(4)
583        .ok_or_else(|| GffBinaryError::InvalidData("locstring payload start overflow".into()))?;
584    let payload_rel_end = payload_rel_start
585        .checked_add(total_size)
586        .ok_or_else(|| GffBinaryError::InvalidData("locstring payload end overflow".into()))?;
587    if payload_rel_end > parser.header.field_data_count {
588        return Err(GffBinaryError::InvalidData(format!(
589            "locstring payload out of range for field[{field_index}]"
590        )));
591    }
592
593    let payload_start = base + 4;
594    let string_ref = StrRef::from_raw(read_i32(parser.bytes, payload_start)?);
595    let substring_count = to_usize(
596        binary::read_u32(parser.bytes, payload_start + 4)?,
597        "substring_count",
598    )?;
599    let mut cursor = payload_start + 8;
600    let payload_end = payload_start + total_size;
601    let mut substrings = Vec::with_capacity(substring_count);
602
603    for substring_index in 0..substring_count {
604        if cursor.checked_add(8).is_none_or(|end| end > payload_end) {
605            return Err(GffBinaryError::InvalidData(format!(
606                "locstring header truncated at substring {substring_index}"
607            )));
608        }
609        let string_id = binary::read_u32(parser.bytes, cursor)?;
610        let length = to_usize(
611            binary::read_u32(parser.bytes, cursor + 4)?,
612            "substring_length",
613        )?;
614        cursor += 8;
615        if cursor
616            .checked_add(length)
617            .is_none_or(|end| end > payload_end)
618        {
619            return Err(GffBinaryError::InvalidData(format!(
620                "locstring substring bytes out of range at index {substring_index}"
621            )));
622        }
623        let bytes = parser.bytes.get(cursor..cursor + length).ok_or_else(|| {
624            GffBinaryError::InvalidData("locstring substring slice invalid".into())
625        })?;
626        let language_id = string_id / 2;
627        let encoding = text_encoding_for_language(language_id)
628            // Optional enhancement track: language IDs 70..=72 remain
629            // unsupported by default because they are not required for
630            // vanilla K1/K2 parity.
631            .map_err(|err| GffBinaryError::UnsupportedLanguageEncoding(err.language_id.raw()))?;
632        let text =
633            decode_text_strict(bytes, encoding).map_err(|source| GffBinaryError::TextDecoding {
634                context: format!("field[{field_index}] locstring[{substring_index}]"),
635                source,
636            })?;
637        substrings.push(GffLocalizedSubstring { string_id, text });
638        cursor += length;
639    }
640
641    if cursor != payload_end {
642        return Err(GffBinaryError::InvalidData(format!(
643            "locstring payload has {} trailing bytes",
644            payload_end - cursor
645        )));
646    }
647
648    Ok(GffLocalizedString {
649        string_ref,
650        substrings,
651    })
652}
653
654fn check_table_bounds(
655    total_len: usize,
656    offset: usize,
657    size: usize,
658    table_name: &str,
659) -> Result<(), GffBinaryError> {
660    binary::check_range_in_bounds(total_len, offset, size, table_name)?;
661    Ok(())
662}
663
664fn read_i32(bytes: &[u8], offset: usize) -> Result<i32, GffBinaryError> {
665    let bits = binary::read_u32(bytes, offset)?;
666    Ok(i32::from_le_bytes(bits.to_le_bytes()))
667}
668
669#[cfg(test)]
670mod tests {
671    use super::*;
672    use crate::gff::write_gff_to_vec;
673
674    const TEST_GFF: &[u8] = include_bytes!(concat!(
675        env!("CARGO_MANIFEST_DIR"),
676        "/../../fixtures/test.gff"
677    ));
678    const TEST_UTC: &[u8] = include_bytes!(concat!(
679        env!("CARGO_MANIFEST_DIR"),
680        "/../../fixtures/test.utc"
681    ));
682
683    #[test]
684    fn roundtrip_gff_binary_with_all_core_field_variants() {
685        let mut root = GffStruct::new(-1);
686        root.push_field("uint8", GffValue::UInt8(255));
687        root.push_field("int8", GffValue::Int8(-127));
688        root.push_field("uint16", GffValue::UInt16(65535));
689        root.push_field("int16", GffValue::Int16(-32768));
690        root.push_field("uint32", GffValue::UInt32(u32::MAX));
691        root.push_field("int32", GffValue::Int32(i32::MIN));
692        root.push_field("uint64", GffValue::UInt64(4_294_967_296));
693        root.push_field("int64", GffValue::Int64(2_147_483_647));
694        root.push_field("single", GffValue::Single(12.34567));
695        root.push_field("double", GffValue::Double(12.345678901234));
696        root.push_field("string", GffValue::String("abcdefghij123456789".into()));
697        root.push_field("resref", GffValue::resref_lit("resref01"));
698        root.push_field(
699            "locstring",
700            GffValue::LocalizedString(GffLocalizedString {
701                string_ref: StrRef::invalid(),
702                substrings: vec![
703                    GffLocalizedSubstring {
704                        string_id: 0,
705                        text: "male_eng".into(),
706                    },
707                    GffLocalizedSubstring {
708                        string_id: 5,
709                        text: "fem_german".into(),
710                    },
711                ],
712            }),
713        );
714        root.push_field("binary", GffValue::Binary(b"binarydata".to_vec()));
715        root.push_field("orientation", GffValue::Vector4([1.0, 2.0, 3.0, 4.0]));
716        root.push_field("position", GffValue::Vector3([11.0, 22.0, 33.0]));
717
718        let mut child = GffStruct::new(0);
719        child.push_field("child_uint8", GffValue::UInt8(4));
720        root.push_field("child_struct", GffValue::Struct(Box::new(child)));
721        root.push_field(
722            "list",
723            GffValue::List(vec![GffStruct::new(1), GffStruct::new(2)]),
724        );
725
726        let original = Gff::generic(root);
727        let bytes = write_gff_to_vec(&original).expect("write should succeed");
728        let parsed = read_gff_from_bytes(&bytes).expect("read should succeed");
729        assert_eq!(parsed, original);
730    }
731
732    #[test]
733    fn parses_gff_fixture() {
734        let gff = read_gff_from_bytes(TEST_GFF).expect("fixture should parse");
735        assert_eq!(gff.file_type, *b"GFF ");
736
737        assert_eq!(find_field(&gff.root, "uint8"), &GffValue::UInt8(255));
738        assert_eq!(find_field(&gff.root, "int8"), &GffValue::Int8(-127));
739        assert_eq!(find_field(&gff.root, "uint16"), &GffValue::UInt16(65535));
740        assert_eq!(find_field(&gff.root, "int16"), &GffValue::Int16(-32768));
741        assert_eq!(find_field(&gff.root, "uint32"), &GffValue::UInt32(u32::MAX));
742        assert_eq!(find_field(&gff.root, "int32"), &GffValue::Int32(i32::MIN));
743        assert_eq!(
744            find_field(&gff.root, "uint64"),
745            &GffValue::UInt64(4_294_967_296)
746        );
747        assert_eq!(
748            find_field(&gff.root, "string"),
749            &GffValue::String("abcdefghij123456789".into())
750        );
751        assert_eq!(
752            find_field(&gff.root, "resref"),
753            &GffValue::resref_lit("resref01")
754        );
755        match find_field(&gff.root, "locstring") {
756            GffValue::LocalizedString(loc) => {
757                assert_eq!(loc.string_ref, StrRef::invalid());
758                assert_eq!(loc.substrings.len(), 2);
759                assert_eq!(loc.substrings[0].text, "male_eng");
760                assert_eq!(loc.substrings[1].text, "fem_german");
761            }
762            other => panic!("expected localized string, got {other:?}"),
763        }
764    }
765
766    #[test]
767    fn read_write_roundtrip_preserves_fixture_semantics() {
768        let parsed = read_gff_from_bytes(TEST_GFF).expect("read should succeed");
769        let bytes = write_gff_to_vec(&parsed).expect("write should succeed");
770        let reparsed = read_gff_from_bytes(&bytes).expect("re-read should succeed");
771        assert_eq!(reparsed, parsed);
772    }
773
774    #[test]
775    fn writer_is_deterministic_for_parsed_fixture() {
776        let parsed = read_gff_from_bytes(TEST_GFF).expect("fixture should parse");
777        let first = write_gff_to_vec(&parsed).expect("first write should succeed");
778        let second = write_gff_to_vec(&parsed).expect("second write should succeed");
779        assert_eq!(first, second, "canonical GFF writer output drifted");
780    }
781
782    #[test]
783    fn roundtrip_preserves_list_order_and_struct_ids() {
784        let mut first = GffStruct::new(500);
785        first.push_field("marker", GffValue::UInt16(11));
786
787        let mut second = GffStruct::new(2);
788        second.push_field("marker", GffValue::UInt16(22));
789
790        let mut third = GffStruct::new(9_999);
791        third.push_field("marker", GffValue::UInt16(33));
792
793        let mut root = GffStruct::new(-1);
794        root.push_field("ordered", GffValue::List(vec![first, second, third]));
795
796        let gff = Gff::generic(root);
797        let bytes = write_gff_to_vec(&gff).expect("write should succeed");
798        let reparsed = read_gff_from_bytes(&bytes).expect("read should succeed");
799
800        let list = find_list(&reparsed.root, "ordered");
801        assert_eq!(list_struct_ids(list), vec![500, 2, 9_999]);
802        assert_eq!(list_u16_field(list, "marker"), vec![11, 22, 33]);
803    }
804
805    #[test]
806    fn utc_fixture_roundtrip_preserves_list_indices_and_values() {
807        let parsed = read_gff_from_bytes(TEST_UTC).expect("fixture should parse");
808        let bytes = write_gff_to_vec(&parsed).expect("write should succeed");
809        let reparsed = read_gff_from_bytes(&bytes).expect("re-read should succeed");
810
811        for label in ["FeatList", "Equip_ItemList", "ItemList", "ClassList"] {
812            assert_eq!(
813                list_struct_ids(find_list(&parsed.root, label)),
814                list_struct_ids(find_list(&reparsed.root, label)),
815                "list struct_id order changed for {label}"
816            );
817        }
818
819        assert_eq!(
820            list_u16_field(find_list(&parsed.root, "FeatList"), "Feat"),
821            list_u16_field(find_list(&reparsed.root, "FeatList"), "Feat")
822        );
823        assert_eq!(
824            list_resref_field(find_list(&parsed.root, "Equip_ItemList"), "EquippedRes"),
825            list_resref_field(find_list(&reparsed.root, "Equip_ItemList"), "EquippedRes")
826        );
827        assert_eq!(
828            list_resref_field(find_list(&parsed.root, "ItemList"), "InventoryRes"),
829            list_resref_field(find_list(&reparsed.root, "ItemList"), "InventoryRes")
830        );
831    }
832
833    #[test]
834    fn rejects_invalid_version() {
835        let mut bytes = vec![0_u8; GFF_HEADER_SIZE];
836        bytes[0..4].copy_from_slice(b"GFF ");
837        bytes[4..8].copy_from_slice(b"V9.9");
838        let err = read_gff_from_bytes(&bytes).expect_err("must fail");
839        assert!(matches!(err, GffBinaryError::InvalidVersion(_)));
840    }
841
842    #[test]
843    fn rejects_truncated_header() {
844        let bytes = vec![0_u8; GFF_HEADER_SIZE - 1];
845        let err = read_gff_from_bytes(&bytes).expect_err("must fail");
846        assert!(matches!(err, GffBinaryError::InvalidHeader(_)));
847    }
848
849    #[test]
850    fn rejects_unknown_field_type() {
851        let mut bytes = TEST_GFF.to_vec();
852        let field_offset = usize::try_from(u32::from_le_bytes(
853            bytes[16..20].try_into().expect("field offset bytes"),
854        ))
855        .expect("offset fits in usize");
856        bytes[field_offset..field_offset + 4].copy_from_slice(&99_u32.to_le_bytes());
857
858        let err = read_gff_from_bytes(&bytes).expect_err("must fail");
859        assert!(matches!(err, GffBinaryError::InvalidFieldType(99)));
860    }
861
862    #[test]
863    fn writer_rejects_unsupported_locstring_language_ids() {
864        let mut root = GffStruct::new(-1);
865        root.push_field(
866            "locstring",
867            GffValue::LocalizedString(GffLocalizedString {
868                string_ref: StrRef::invalid(),
869                substrings: vec![GffLocalizedSubstring {
870                    string_id: 140,
871                    text: "test".into(),
872                }],
873            }),
874        );
875        let gff = Gff::generic(root);
876
877        let err = write_gff_to_vec(&gff).expect_err("must fail");
878        assert!(matches!(
879            err,
880            GffBinaryError::UnsupportedLanguageEncoding(70)
881        ));
882    }
883
884    fn find_field<'a>(structure: &'a GffStruct, label: &str) -> &'a GffValue {
885        structure
886            .field(label)
887            .unwrap_or_else(|| panic!("missing field {label}"))
888    }
889
890    fn find_list<'a>(structure: &'a GffStruct, label: &str) -> &'a [GffStruct] {
891        match structure.field(label) {
892            Some(GffValue::List(values)) => values.as_slice(),
893            Some(other) => panic!("field {label} is not a list: {other:?}"),
894            None => panic!("missing list field {label}"),
895        }
896    }
897
898    fn list_struct_ids(list: &[GffStruct]) -> Vec<i32> {
899        list.iter().map(|entry| entry.struct_id).collect::<Vec<_>>()
900    }
901
902    fn list_u16_field(list: &[GffStruct], label: &str) -> Vec<u16> {
903        list.iter()
904            .map(|entry| match entry.field(label) {
905                Some(GffValue::UInt16(value)) => *value,
906                Some(other) => panic!("field {label} is not UInt16: {other:?}"),
907                None => panic!("missing field {label}"),
908            })
909            .collect::<Vec<_>>()
910    }
911
912    fn list_resref_field(list: &[GffStruct], label: &str) -> Vec<String> {
913        list.iter()
914            .map(|entry| match entry.field(label) {
915                Some(GffValue::ResRef(value)) => value.to_string(),
916                Some(other) => panic!("field {label} is not ResRef: {other:?}"),
917                None => panic!("missing field {label}"),
918            })
919            .collect::<Vec<_>>()
920    }
921}