Skip to main content

rakata_formats/gff/
reader.rs

1//! GFF V3.2 binary reader.
2
3use std::io::Read;
4
5use rakata_core::{decode_text_strict, text_encoding_for_language, ResRef, StrRef};
6
7use super::{
8    binary, to_usize, FieldType, Gff, GffBinaryError, GffField, GffLocalizedString,
9    GffLocalizedSubstring, GffStruct, GffValue, DEFAULT_TEXT_ENCODING, FIELD_ENTRY_SIZE,
10    GFF_HEADER_SIZE, GFF_VERSION_V32, LABEL_SIZE, STRUCT_ENTRY_SIZE,
11};
12
13/// Reads a binary GFF file from a reader.
14///
15/// The stream is consumed from its current position.
16#[cfg_attr(
17    feature = "tracing",
18    tracing::instrument(level = "debug", skip(reader))
19)]
20pub fn read_gff<R: Read>(reader: &mut R) -> Result<Gff, GffBinaryError> {
21    let mut bytes = Vec::new();
22    reader.read_to_end(&mut bytes)?;
23    crate::trace_debug!(bytes_len = bytes.len(), "read gff bytes from reader");
24    read_gff_from_bytes(&bytes)
25}
26
27/// Reads a binary GFF file from bytes.
28#[cfg_attr(
29    feature = "tracing",
30    tracing::instrument(level = "debug", skip(bytes), fields(bytes_len = bytes.len()))
31)]
32pub fn read_gff_from_bytes(bytes: &[u8]) -> Result<Gff, GffBinaryError> {
33    let header = parse_header(bytes)?;
34    let labels = read_labels(bytes, &header)?;
35    let parser = GffParser {
36        bytes,
37        header,
38        labels,
39    };
40    let root = parser.read_struct(0, 0)?;
41    crate::trace_debug!(
42        file_type = ?parser.header.file_type,
43        struct_count = parser.header.struct_count,
44        field_count = parser.header.field_count,
45        label_count = parser.header.label_count,
46        root_field_count = root.fields.len(),
47        "parsed gff from bytes"
48    );
49    Ok(Gff {
50        file_type: parser.header.file_type,
51        root,
52    })
53}
54
55#[derive(Debug, Clone, Copy)]
56struct GffHeader {
57    file_type: [u8; 4],
58    struct_offset: usize,
59    struct_count: usize,
60    field_offset: usize,
61    field_count: usize,
62    label_offset: usize,
63    label_count: usize,
64    field_data_offset: usize,
65    field_data_count: usize,
66    field_indices_offset: usize,
67    field_indices_count: usize,
68    list_indices_offset: usize,
69    list_indices_count: usize,
70}
71
72struct GffParser<'a> {
73    bytes: &'a [u8],
74    header: GffHeader,
75    labels: Vec<super::GffLabel>,
76}
77
78impl<'a> GffParser<'a> {
79    fn read_struct(&self, struct_index: usize, depth: usize) -> Result<GffStruct, GffBinaryError> {
80        // Depth alternates struct -> field -> nested struct, growing by
81        // one per hop. The longest legitimate acyclic path visits each
82        // struct once and reads its fields, so max depth is
83        // 2 * struct_count - 1. Use 2 * struct_count as the trip
84        // bound (saturating for paranoia against malformed headers).
85        let depth_limit = self.header.struct_count.saturating_mul(2);
86        if depth > depth_limit {
87            return Err(GffBinaryError::InvalidData(
88                "detected nested struct cycle while reading".into(),
89            ));
90        }
91        if struct_index >= self.header.struct_count {
92            return Err(GffBinaryError::InvalidData(format!(
93                "struct index {struct_index} out of range"
94            )));
95        }
96
97        let base = self
98            .header
99            .struct_offset
100            .checked_add(
101                struct_index
102                    .checked_mul(STRUCT_ENTRY_SIZE)
103                    .ok_or_else(|| GffBinaryError::InvalidHeader("struct index overflow".into()))?,
104            )
105            .ok_or_else(|| GffBinaryError::InvalidHeader("struct base overflow".into()))?;
106        let struct_id = read_i32(self.bytes, base)?;
107        let data_or_offset = binary::read_u32(self.bytes, base + 4)?;
108        let field_count = usize::try_from(binary::read_u32(self.bytes, base + 8)?)
109            .map_err(|_| GffBinaryError::InvalidData("field count does not fit usize".into()))?;
110
111        let mut fields = Vec::with_capacity(field_count);
112        match field_count {
113            0 => {}
114            1 => {
115                let field_index = usize::try_from(data_or_offset).map_err(|_| {
116                    GffBinaryError::InvalidData("single field index does not fit usize".into())
117                })?;
118                fields.push(self.read_field(field_index, depth + 1)?);
119            }
120            _ => {
121                let indices_rel_offset = usize::try_from(data_or_offset).map_err(|_| {
122                    GffBinaryError::InvalidData("field indices offset does not fit usize".into())
123                })?;
124                let indices_size = field_count.checked_mul(4).ok_or_else(|| {
125                    GffBinaryError::InvalidData("field indices size overflow".into())
126                })?;
127                if indices_rel_offset
128                    .checked_add(indices_size)
129                    .is_none_or(|end| end > self.header.field_indices_count)
130                {
131                    return Err(GffBinaryError::InvalidData(format!(
132                        "field indices block out of range for struct {struct_index}"
133                    )));
134                }
135                let indices_base = self
136                    .header
137                    .field_indices_offset
138                    .checked_add(indices_rel_offset)
139                    .ok_or_else(|| {
140                        GffBinaryError::InvalidData("field indices absolute offset overflow".into())
141                    })?;
142                for i in 0..field_count {
143                    let field_index =
144                        usize::try_from(binary::read_u32(self.bytes, indices_base + i * 4)?)
145                            .map_err(|_| {
146                                GffBinaryError::InvalidData(
147                                    "field index from field-indices table does not fit usize"
148                                        .into(),
149                                )
150                            })?;
151                    fields.push(self.read_field(field_index, depth + 1)?);
152                }
153            }
154        }
155
156        Ok(GffStruct { struct_id, fields })
157    }
158
159    fn read_field(&self, field_index: usize, depth: usize) -> Result<GffField, GffBinaryError> {
160        if field_index >= self.header.field_count {
161            return Err(GffBinaryError::InvalidData(format!(
162                "field index {field_index} out of range"
163            )));
164        }
165        let base = self
166            .header
167            .field_offset
168            .checked_add(
169                field_index
170                    .checked_mul(FIELD_ENTRY_SIZE)
171                    .ok_or_else(|| GffBinaryError::InvalidHeader("field index overflow".into()))?,
172            )
173            .ok_or_else(|| GffBinaryError::InvalidHeader("field base overflow".into()))?;
174
175        let field_type_raw = binary::read_u32(self.bytes, base)?;
176        let field_type = FieldType::try_from(field_type_raw)
177            .map_err(|_| GffBinaryError::InvalidFieldType(field_type_raw))?;
178        let label_index = usize::try_from(binary::read_u32(self.bytes, base + 4)?)
179            .map_err(|_| GffBinaryError::InvalidData("label index does not fit usize".into()))?;
180        let data_or_offset = binary::read_u32(self.bytes, base + 8)?;
181        let label = *self.labels.get(label_index).ok_or_else(|| {
182            GffBinaryError::InvalidData(format!("label index {label_index} out of range"))
183        })?;
184
185        let value = match field_type {
186            FieldType::UInt8 => {
187                GffValue::UInt8(u8::try_from(data_or_offset & 0xFF).expect("masked to 8 bits"))
188            }
189            FieldType::Int8 => {
190                GffValue::Int8(i8::from_le_bytes([
191                    u8::try_from(data_or_offset & 0xFF).expect("masked to 8 bits")
192                ]))
193            }
194            FieldType::UInt16 => {
195                GffValue::UInt16(u16::try_from(data_or_offset & 0xFFFF).expect("masked to 16 bits"))
196            }
197            FieldType::Int16 => GffValue::Int16(i16::from_le_bytes(
198                u16::try_from(data_or_offset & 0xFFFF)
199                    .expect("masked to 16 bits")
200                    .to_le_bytes(),
201            )),
202            FieldType::UInt32 => GffValue::UInt32(data_or_offset),
203            FieldType::Int32 => GffValue::Int32(i32::from_le_bytes(data_or_offset.to_le_bytes())),
204            FieldType::Single => GffValue::Single(f32::from_bits(data_or_offset)),
205            FieldType::UInt64 => GffValue::UInt64(read_u64_at_field_data(self, data_or_offset)?),
206            FieldType::Int64 => GffValue::Int64(i64::from_le_bytes(
207                read_u64_at_field_data(self, data_or_offset)?.to_le_bytes(),
208            )),
209            FieldType::Double => GffValue::Double(f64::from_bits(read_u64_at_field_data(
210                self,
211                data_or_offset,
212            )?)),
213            FieldType::String => GffValue::String(read_sized_string(
214                self,
215                data_or_offset,
216                format!("field[{field_index}] string"),
217            )?),
218            FieldType::ResRef => {
219                let raw = read_sized_string_u8(
220                    self,
221                    data_or_offset,
222                    format!("field[{field_index}] resref"),
223                )?;
224                let resref = ResRef::new(&raw).map_err(|err| {
225                    GffBinaryError::InvalidData(format!(
226                        "field[{field_index}] resref `{raw}`: {err}"
227                    ))
228                })?;
229                GffValue::ResRef(resref)
230            }
231            FieldType::LocalizedString => {
232                GffValue::LocalizedString(read_localized_string(self, data_or_offset, field_index)?)
233            }
234            FieldType::Binary => GffValue::Binary(read_binary_blob(self, data_or_offset)?),
235            FieldType::Struct => {
236                let struct_index = usize::try_from(data_or_offset).map_err(|_| {
237                    GffBinaryError::InvalidData("nested struct index does not fit usize".into())
238                })?;
239                GffValue::Struct(Box::new(self.read_struct(struct_index, depth + 1)?))
240            }
241            FieldType::List => GffValue::List(read_struct_list(self, data_or_offset, depth + 1)?),
242            FieldType::Vector4 => GffValue::Vector4(read_vector4(self, data_or_offset)?),
243            FieldType::Vector3 => GffValue::Vector3(read_vector3(self, data_or_offset)?),
244            FieldType::StrRef => GffValue::StrRef(StrRef::from_raw(i32::from_le_bytes(
245                data_or_offset.to_le_bytes(),
246            ))),
247        };
248
249        Ok(GffField { label, value })
250    }
251}
252
253fn parse_header(bytes: &[u8]) -> Result<GffHeader, GffBinaryError> {
254    if bytes.len() < GFF_HEADER_SIZE {
255        return Err(GffBinaryError::InvalidHeader(
256            "file smaller than GFF header".into(),
257        ));
258    }
259    let file_type = binary::read_fourcc(bytes, 0)?;
260    let version = binary::read_fourcc(bytes, 4)?;
261    binary::expect_fourcc(version, GFF_VERSION_V32).map_err(GffBinaryError::InvalidVersion)?;
262
263    let struct_offset = to_usize(binary::read_u32(bytes, 8)?, "struct_offset")?;
264    let struct_count = to_usize(binary::read_u32(bytes, 12)?, "struct_count")?;
265    let field_offset = to_usize(binary::read_u32(bytes, 16)?, "field_offset")?;
266    let field_count = to_usize(binary::read_u32(bytes, 20)?, "field_count")?;
267    let label_offset = to_usize(binary::read_u32(bytes, 24)?, "label_offset")?;
268    let label_count = to_usize(binary::read_u32(bytes, 28)?, "label_count")?;
269    let field_data_offset = to_usize(binary::read_u32(bytes, 32)?, "field_data_offset")?;
270    let field_data_count = to_usize(binary::read_u32(bytes, 36)?, "field_data_count")?;
271    let field_indices_offset = to_usize(binary::read_u32(bytes, 40)?, "field_indices_offset")?;
272    let field_indices_count = to_usize(binary::read_u32(bytes, 44)?, "field_indices_count")?;
273    let list_indices_offset = to_usize(binary::read_u32(bytes, 48)?, "list_indices_offset")?;
274    let list_indices_count = to_usize(binary::read_u32(bytes, 52)?, "list_indices_count")?;
275
276    check_table_bounds(
277        bytes.len(),
278        struct_offset,
279        struct_count
280            .checked_mul(STRUCT_ENTRY_SIZE)
281            .ok_or(GffBinaryError::InvalidHeader(
282                "struct table size overflow".into(),
283            ))?,
284        "struct table",
285    )?;
286    check_table_bounds(
287        bytes.len(),
288        field_offset,
289        field_count
290            .checked_mul(FIELD_ENTRY_SIZE)
291            .ok_or(GffBinaryError::InvalidHeader(
292                "field table size overflow".into(),
293            ))?,
294        "field table",
295    )?;
296    check_table_bounds(
297        bytes.len(),
298        label_offset,
299        label_count
300            .checked_mul(LABEL_SIZE)
301            .ok_or(GffBinaryError::InvalidHeader(
302                "label table size overflow".into(),
303            ))?,
304        "label table",
305    )?;
306    check_table_bounds(
307        bytes.len(),
308        field_data_offset,
309        field_data_count,
310        "field data",
311    )?;
312    check_table_bounds(
313        bytes.len(),
314        field_indices_offset,
315        field_indices_count,
316        "field indices",
317    )?;
318    check_table_bounds(
319        bytes.len(),
320        list_indices_offset,
321        list_indices_count,
322        "list indices",
323    )?;
324
325    if struct_count == 0 {
326        return Err(GffBinaryError::InvalidHeader(
327            "struct table is empty (missing root struct)".into(),
328        ));
329    }
330
331    Ok(GffHeader {
332        file_type,
333        struct_offset,
334        struct_count,
335        field_offset,
336        field_count,
337        label_offset,
338        label_count,
339        field_data_offset,
340        field_data_count,
341        field_indices_offset,
342        field_indices_count,
343        list_indices_offset,
344        list_indices_count,
345    })
346}
347
348fn read_labels(bytes: &[u8], header: &GffHeader) -> Result<Vec<super::GffLabel>, GffBinaryError> {
349    let mut labels = Vec::with_capacity(header.label_count);
350    for label_index in 0..header.label_count {
351        let offset = header
352            .label_offset
353            .checked_add(
354                label_index
355                    .checked_mul(LABEL_SIZE)
356                    .ok_or_else(|| GffBinaryError::InvalidHeader("label offset overflow".into()))?,
357            )
358            .ok_or_else(|| GffBinaryError::InvalidHeader("label base overflow".into()))?;
359        let raw = bytes
360            .get(offset..offset + LABEL_SIZE)
361            .ok_or_else(|| GffBinaryError::InvalidHeader("label slice out of range".into()))?;
362        let end = raw.iter().position(|byte| *byte == 0).unwrap_or(LABEL_SIZE);
363        let label = decode_text_strict(&raw[..end], DEFAULT_TEXT_ENCODING).map_err(|source| {
364            GffBinaryError::TextDecoding {
365                context: format!("label[{label_index}]"),
366                source,
367            }
368        })?;
369        let gff_label = super::GffLabel::new(&label).map_err(|err| {
370            GffBinaryError::InvalidData(format!("label[{label_index}] `{label}` is invalid: {err}"))
371        })?;
372        labels.push(gff_label);
373    }
374    Ok(labels)
375}
376
377fn read_u64_at_field_data(parser: &GffParser<'_>, offset: u32) -> Result<u64, GffBinaryError> {
378    let offset = to_usize(offset, "field_data_offset")?;
379    let absolute = parser
380        .header
381        .field_data_offset
382        .checked_add(offset)
383        .ok_or_else(|| GffBinaryError::InvalidData("field data absolute offset overflow".into()))?;
384    let end_rel = offset
385        .checked_add(8)
386        .ok_or_else(|| GffBinaryError::InvalidData("field data u64 end overflow".into()))?;
387    if end_rel > parser.header.field_data_count {
388        return Err(GffBinaryError::InvalidData(
389            "field data u64 read out of range".into(),
390        ));
391    }
392    Ok(binary::read_u64(parser.bytes, absolute)?)
393}
394
395fn read_sized_string(
396    parser: &GffParser<'_>,
397    offset: u32,
398    context: String,
399) -> Result<String, GffBinaryError> {
400    let offset = to_usize(offset, "field_data_offset")?;
401    let base = parser
402        .header
403        .field_data_offset
404        .checked_add(offset)
405        .ok_or_else(|| GffBinaryError::InvalidData("string base overflow".into()))?;
406    let len = to_usize(binary::read_u32(parser.bytes, base)?, "string_length")?;
407    let data_rel_end = offset
408        .checked_add(4)
409        .and_then(|v| v.checked_add(len))
410        .ok_or_else(|| GffBinaryError::InvalidData("string end overflow".into()))?;
411    if data_rel_end > parser.header.field_data_count {
412        return Err(GffBinaryError::InvalidData(
413            "string payload exceeds field data bounds".into(),
414        ));
415    }
416    let data_start = base + 4;
417    let raw = parser
418        .bytes
419        .get(data_start..data_start + len)
420        .ok_or_else(|| GffBinaryError::InvalidData("string bytes out of range".into()))?;
421    decode_text_strict(raw, DEFAULT_TEXT_ENCODING)
422        .map_err(|source| GffBinaryError::TextDecoding { context, source })
423}
424
425fn read_sized_string_u8(
426    parser: &GffParser<'_>,
427    offset: u32,
428    context: String,
429) -> Result<String, GffBinaryError> {
430    let offset = to_usize(offset, "field_data_offset")?;
431    let base = parser
432        .header
433        .field_data_offset
434        .checked_add(offset)
435        .ok_or_else(|| GffBinaryError::InvalidData("resref base overflow".into()))?;
436    let len =
437        usize::from(*parser.bytes.get(base).ok_or_else(|| {
438            GffBinaryError::InvalidData("resref length byte out of range".into())
439        })?);
440    let data_rel_end = offset
441        .checked_add(1)
442        .and_then(|v| v.checked_add(len))
443        .ok_or_else(|| GffBinaryError::InvalidData("resref end overflow".into()))?;
444    if data_rel_end > parser.header.field_data_count {
445        return Err(GffBinaryError::InvalidData(
446            "resref payload exceeds field data bounds".into(),
447        ));
448    }
449    let data_start = base + 1;
450    let raw = parser
451        .bytes
452        .get(data_start..data_start + len)
453        .ok_or_else(|| GffBinaryError::InvalidData("resref bytes out of range".into()))?;
454    decode_text_strict(raw, DEFAULT_TEXT_ENCODING)
455        .map_err(|source| GffBinaryError::TextDecoding { context, source })
456}
457
458fn read_binary_blob(parser: &GffParser<'_>, offset: u32) -> Result<Vec<u8>, GffBinaryError> {
459    let offset = to_usize(offset, "field_data_offset")?;
460    let base = parser
461        .header
462        .field_data_offset
463        .checked_add(offset)
464        .ok_or_else(|| GffBinaryError::InvalidData("binary base overflow".into()))?;
465    let len = to_usize(binary::read_u32(parser.bytes, base)?, "binary_length")?;
466    let data_rel_end = offset
467        .checked_add(4)
468        .and_then(|v| v.checked_add(len))
469        .ok_or_else(|| GffBinaryError::InvalidData("binary end overflow".into()))?;
470    if data_rel_end > parser.header.field_data_count {
471        return Err(GffBinaryError::InvalidData(
472            "binary payload exceeds field data bounds".into(),
473        ));
474    }
475    let data_start = base + 4;
476    let raw = parser
477        .bytes
478        .get(data_start..data_start + len)
479        .ok_or_else(|| GffBinaryError::InvalidData("binary bytes out of range".into()))?;
480    Ok(raw.to_vec())
481}
482
483fn read_vector4(parser: &GffParser<'_>, offset: u32) -> Result<[f32; 4], GffBinaryError> {
484    let offset = to_usize(offset, "field_data_offset")?;
485    let base = parser
486        .header
487        .field_data_offset
488        .checked_add(offset)
489        .ok_or_else(|| GffBinaryError::InvalidData("vector4 base overflow".into()))?;
490    let end_rel = offset
491        .checked_add(16)
492        .ok_or_else(|| GffBinaryError::InvalidData("vector4 end overflow".into()))?;
493    if end_rel > parser.header.field_data_count {
494        return Err(GffBinaryError::InvalidData(
495            "vector4 exceeds field data bounds".into(),
496        ));
497    }
498    Ok([
499        binary::read_f32(parser.bytes, base)?,
500        binary::read_f32(parser.bytes, base + 4)?,
501        binary::read_f32(parser.bytes, base + 8)?,
502        binary::read_f32(parser.bytes, base + 12)?,
503    ])
504}
505
506fn read_vector3(parser: &GffParser<'_>, offset: u32) -> Result<[f32; 3], GffBinaryError> {
507    let offset = to_usize(offset, "field_data_offset")?;
508    let base = parser
509        .header
510        .field_data_offset
511        .checked_add(offset)
512        .ok_or_else(|| GffBinaryError::InvalidData("vector3 base overflow".into()))?;
513    let end_rel = offset
514        .checked_add(12)
515        .ok_or_else(|| GffBinaryError::InvalidData("vector3 end overflow".into()))?;
516    if end_rel > parser.header.field_data_count {
517        return Err(GffBinaryError::InvalidData(
518            "vector3 exceeds field data bounds".into(),
519        ));
520    }
521    Ok([
522        binary::read_f32(parser.bytes, base)?,
523        binary::read_f32(parser.bytes, base + 4)?,
524        binary::read_f32(parser.bytes, base + 8)?,
525    ])
526}
527
528fn read_struct_list(
529    parser: &GffParser<'_>,
530    offset: u32,
531    depth: usize,
532) -> Result<Vec<GffStruct>, GffBinaryError> {
533    let offset = to_usize(offset, "list_indices_offset")?;
534    let count_base = parser
535        .header
536        .list_indices_offset
537        .checked_add(offset)
538        .ok_or_else(|| GffBinaryError::InvalidData("list base overflow".into()))?;
539    if offset
540        .checked_add(4)
541        .is_none_or(|end| end > parser.header.list_indices_count)
542    {
543        return Err(GffBinaryError::InvalidData(
544            "list count read out of range".into(),
545        ));
546    }
547    let count = to_usize(binary::read_u32(parser.bytes, count_base)?, "list_count")?;
548    let list_entries_size = count
549        .checked_mul(4)
550        .ok_or_else(|| GffBinaryError::InvalidData("list entries size overflow".into()))?;
551    if offset
552        .checked_add(4)
553        .and_then(|v| v.checked_add(list_entries_size))
554        .is_none_or(|end| end > parser.header.list_indices_count)
555    {
556        return Err(GffBinaryError::InvalidData(
557            "list entries out of range".into(),
558        ));
559    }
560    let mut out = Vec::with_capacity(count);
561    let entries_base = count_base + 4;
562    for index in 0..count {
563        let struct_index = to_usize(
564            binary::read_u32(parser.bytes, entries_base + index * 4)?,
565            "list_struct_index",
566        )?;
567        out.push(parser.read_struct(struct_index, depth + 1)?);
568    }
569    Ok(out)
570}
571
572fn read_localized_string(
573    parser: &GffParser<'_>,
574    offset: u32,
575    field_index: usize,
576) -> Result<GffLocalizedString, GffBinaryError> {
577    let offset = to_usize(offset, "field_data_offset")?;
578    let base = parser
579        .header
580        .field_data_offset
581        .checked_add(offset)
582        .ok_or_else(|| GffBinaryError::InvalidData("locstring base overflow".into()))?;
583    let total_size = to_usize(
584        binary::read_u32(parser.bytes, base)?,
585        "locstring_total_size",
586    )?;
587    let payload_rel_start = offset
588        .checked_add(4)
589        .ok_or_else(|| GffBinaryError::InvalidData("locstring payload start overflow".into()))?;
590    let payload_rel_end = payload_rel_start
591        .checked_add(total_size)
592        .ok_or_else(|| GffBinaryError::InvalidData("locstring payload end overflow".into()))?;
593    if payload_rel_end > parser.header.field_data_count {
594        return Err(GffBinaryError::InvalidData(format!(
595            "locstring payload out of range for field[{field_index}]"
596        )));
597    }
598
599    let payload_start = base + 4;
600    let string_ref = StrRef::from_raw(read_i32(parser.bytes, payload_start)?);
601    let substring_count = to_usize(
602        binary::read_u32(parser.bytes, payload_start + 4)?,
603        "substring_count",
604    )?;
605    let mut cursor = payload_start + 8;
606    let payload_end = payload_start + total_size;
607    let mut substrings = Vec::with_capacity(substring_count);
608
609    for substring_index in 0..substring_count {
610        if cursor.checked_add(8).is_none_or(|end| end > payload_end) {
611            return Err(GffBinaryError::InvalidData(format!(
612                "locstring header truncated at substring {substring_index}"
613            )));
614        }
615        let string_id = binary::read_u32(parser.bytes, cursor)?;
616        let length = to_usize(
617            binary::read_u32(parser.bytes, cursor + 4)?,
618            "substring_length",
619        )?;
620        cursor += 8;
621        if cursor
622            .checked_add(length)
623            .is_none_or(|end| end > payload_end)
624        {
625            return Err(GffBinaryError::InvalidData(format!(
626                "locstring substring bytes out of range at index {substring_index}"
627            )));
628        }
629        let bytes = parser.bytes.get(cursor..cursor + length).ok_or_else(|| {
630            GffBinaryError::InvalidData("locstring substring slice invalid".into())
631        })?;
632        let language_id = string_id / 2;
633        let encoding = text_encoding_for_language(language_id)
634            // Optional enhancement track: language IDs 70..=72 remain
635            // unsupported by default because they are not required for
636            // vanilla K1/K2 parity.
637            .map_err(|err| GffBinaryError::UnsupportedLanguageEncoding(err.language_id.raw()))?;
638        let text =
639            decode_text_strict(bytes, encoding).map_err(|source| GffBinaryError::TextDecoding {
640                context: format!("field[{field_index}] locstring[{substring_index}]"),
641                source,
642            })?;
643        substrings.push(GffLocalizedSubstring { string_id, text });
644        cursor += length;
645    }
646
647    if cursor != payload_end {
648        return Err(GffBinaryError::InvalidData(format!(
649            "locstring payload has {} trailing bytes",
650            payload_end - cursor
651        )));
652    }
653
654    Ok(GffLocalizedString {
655        string_ref,
656        substrings,
657    })
658}
659
660fn check_table_bounds(
661    total_len: usize,
662    offset: usize,
663    size: usize,
664    table_name: &str,
665) -> Result<(), GffBinaryError> {
666    binary::check_range_in_bounds(total_len, offset, size, table_name)?;
667    Ok(())
668}
669
670fn read_i32(bytes: &[u8], offset: usize) -> Result<i32, GffBinaryError> {
671    let bits = binary::read_u32(bytes, offset)?;
672    Ok(i32::from_le_bytes(bits.to_le_bytes()))
673}
674
675#[cfg(test)]
676mod tests {
677    use super::*;
678    use crate::gff::write_gff_to_vec;
679
680    const TEST_GFF: &[u8] = include_bytes!(concat!(
681        env!("CARGO_MANIFEST_DIR"),
682        "/../../fixtures/test.gff"
683    ));
684    const TEST_UTC: &[u8] = include_bytes!(concat!(
685        env!("CARGO_MANIFEST_DIR"),
686        "/../../fixtures/test.utc"
687    ));
688
689    #[test]
690    fn roundtrip_gff_binary_with_all_core_field_variants() {
691        let mut root = GffStruct::new(-1);
692        root.push_field("uint8", GffValue::UInt8(255));
693        root.push_field("int8", GffValue::Int8(-127));
694        root.push_field("uint16", GffValue::UInt16(65535));
695        root.push_field("int16", GffValue::Int16(-32768));
696        root.push_field("uint32", GffValue::UInt32(u32::MAX));
697        root.push_field("int32", GffValue::Int32(i32::MIN));
698        root.push_field("uint64", GffValue::UInt64(4_294_967_296));
699        root.push_field("int64", GffValue::Int64(2_147_483_647));
700        root.push_field("single", GffValue::Single(12.34567));
701        root.push_field("double", GffValue::Double(12.345678901234));
702        root.push_field("string", GffValue::String("abcdefghij123456789".into()));
703        root.push_field("resref", GffValue::resref_lit("resref01"));
704        root.push_field(
705            "locstring",
706            GffValue::LocalizedString(GffLocalizedString {
707                string_ref: StrRef::invalid(),
708                substrings: vec![
709                    GffLocalizedSubstring {
710                        string_id: 0,
711                        text: "male_eng".into(),
712                    },
713                    GffLocalizedSubstring {
714                        string_id: 5,
715                        text: "fem_german".into(),
716                    },
717                ],
718            }),
719        );
720        root.push_field("binary", GffValue::Binary(b"binarydata".to_vec()));
721        root.push_field("orientation", GffValue::Vector4([1.0, 2.0, 3.0, 4.0]));
722        root.push_field("position", GffValue::Vector3([11.0, 22.0, 33.0]));
723
724        let mut child = GffStruct::new(0);
725        child.push_field("child_uint8", GffValue::UInt8(4));
726        root.push_field("child_struct", GffValue::Struct(Box::new(child)));
727        root.push_field(
728            "list",
729            GffValue::List(vec![GffStruct::new(1), GffStruct::new(2)]),
730        );
731
732        let original = Gff::generic(root);
733        let bytes = write_gff_to_vec(&original).expect("write should succeed");
734        let parsed = read_gff_from_bytes(&bytes).expect("read should succeed");
735        assert_eq!(parsed, original);
736    }
737
738    #[test]
739    fn parses_gff_fixture() {
740        let gff = read_gff_from_bytes(TEST_GFF).expect("fixture should parse");
741        assert_eq!(gff.file_type, *b"GFF ");
742
743        assert_eq!(find_field(&gff.root, "uint8"), &GffValue::UInt8(255));
744        assert_eq!(find_field(&gff.root, "int8"), &GffValue::Int8(-127));
745        assert_eq!(find_field(&gff.root, "uint16"), &GffValue::UInt16(65535));
746        assert_eq!(find_field(&gff.root, "int16"), &GffValue::Int16(-32768));
747        assert_eq!(find_field(&gff.root, "uint32"), &GffValue::UInt32(u32::MAX));
748        assert_eq!(find_field(&gff.root, "int32"), &GffValue::Int32(i32::MIN));
749        assert_eq!(
750            find_field(&gff.root, "uint64"),
751            &GffValue::UInt64(4_294_967_296)
752        );
753        assert_eq!(
754            find_field(&gff.root, "string"),
755            &GffValue::String("abcdefghij123456789".into())
756        );
757        assert_eq!(
758            find_field(&gff.root, "resref"),
759            &GffValue::resref_lit("resref01")
760        );
761        match find_field(&gff.root, "locstring") {
762            GffValue::LocalizedString(loc) => {
763                assert_eq!(loc.string_ref, StrRef::invalid());
764                assert_eq!(loc.substrings.len(), 2);
765                assert_eq!(loc.substrings[0].text, "male_eng");
766                assert_eq!(loc.substrings[1].text, "fem_german");
767            }
768            other => panic!("expected localized string, got {other:?}"),
769        }
770    }
771
772    #[test]
773    fn read_write_roundtrip_preserves_fixture_semantics() {
774        let parsed = read_gff_from_bytes(TEST_GFF).expect("read should succeed");
775        let bytes = write_gff_to_vec(&parsed).expect("write should succeed");
776        let reparsed = read_gff_from_bytes(&bytes).expect("re-read should succeed");
777        assert_eq!(reparsed, parsed);
778    }
779
780    #[test]
781    fn writer_is_deterministic_for_parsed_fixture() {
782        let parsed = read_gff_from_bytes(TEST_GFF).expect("fixture should parse");
783        let first = write_gff_to_vec(&parsed).expect("first write should succeed");
784        let second = write_gff_to_vec(&parsed).expect("second write should succeed");
785        assert_eq!(first, second, "canonical GFF writer output drifted");
786    }
787
788    #[test]
789    fn roundtrip_preserves_list_order_and_struct_ids() {
790        let mut first = GffStruct::new(500);
791        first.push_field("marker", GffValue::UInt16(11));
792
793        let mut second = GffStruct::new(2);
794        second.push_field("marker", GffValue::UInt16(22));
795
796        let mut third = GffStruct::new(9_999);
797        third.push_field("marker", GffValue::UInt16(33));
798
799        let mut root = GffStruct::new(-1);
800        root.push_field("ordered", GffValue::List(vec![first, second, third]));
801
802        let gff = Gff::generic(root);
803        let bytes = write_gff_to_vec(&gff).expect("write should succeed");
804        let reparsed = read_gff_from_bytes(&bytes).expect("read should succeed");
805
806        let list = find_list(&reparsed.root, "ordered");
807        assert_eq!(list_struct_ids(list), vec![500, 2, 9_999]);
808        assert_eq!(list_u16_field(list, "marker"), vec![11, 22, 33]);
809    }
810
811    #[test]
812    fn utc_fixture_roundtrip_preserves_list_indices_and_values() {
813        let parsed = read_gff_from_bytes(TEST_UTC).expect("fixture should parse");
814        let bytes = write_gff_to_vec(&parsed).expect("write should succeed");
815        let reparsed = read_gff_from_bytes(&bytes).expect("re-read should succeed");
816
817        for label in ["FeatList", "Equip_ItemList", "ItemList", "ClassList"] {
818            assert_eq!(
819                list_struct_ids(find_list(&parsed.root, label)),
820                list_struct_ids(find_list(&reparsed.root, label)),
821                "list struct_id order changed for {label}"
822            );
823        }
824
825        assert_eq!(
826            list_u16_field(find_list(&parsed.root, "FeatList"), "Feat"),
827            list_u16_field(find_list(&reparsed.root, "FeatList"), "Feat")
828        );
829        assert_eq!(
830            list_resref_field(find_list(&parsed.root, "Equip_ItemList"), "EquippedRes"),
831            list_resref_field(find_list(&reparsed.root, "Equip_ItemList"), "EquippedRes")
832        );
833        assert_eq!(
834            list_resref_field(find_list(&parsed.root, "ItemList"), "InventoryRes"),
835            list_resref_field(find_list(&reparsed.root, "ItemList"), "InventoryRes")
836        );
837    }
838
839    #[test]
840    fn rejects_invalid_version() {
841        let mut bytes = vec![0_u8; GFF_HEADER_SIZE];
842        bytes[0..4].copy_from_slice(b"GFF ");
843        bytes[4..8].copy_from_slice(b"V9.9");
844        let err = read_gff_from_bytes(&bytes).expect_err("must fail");
845        assert!(matches!(err, GffBinaryError::InvalidVersion(_)));
846    }
847
848    #[test]
849    fn rejects_truncated_header() {
850        let bytes = vec![0_u8; GFF_HEADER_SIZE - 1];
851        let err = read_gff_from_bytes(&bytes).expect_err("must fail");
852        assert!(matches!(err, GffBinaryError::InvalidHeader(_)));
853    }
854
855    #[test]
856    fn rejects_unknown_field_type() {
857        let mut bytes = TEST_GFF.to_vec();
858        let field_offset = usize::try_from(u32::from_le_bytes(
859            bytes[16..20].try_into().expect("field offset bytes"),
860        ))
861        .expect("offset fits in usize");
862        bytes[field_offset..field_offset + 4].copy_from_slice(&99_u32.to_le_bytes());
863
864        let err = read_gff_from_bytes(&bytes).expect_err("must fail");
865        assert!(matches!(err, GffBinaryError::InvalidFieldType(99)));
866    }
867
868    #[test]
869    fn accepts_two_struct_gff_with_nested_list_entry() {
870        // Regression for a false-positive cycle-detection trip: when
871        // a GFF has only two structs total (root plus one list entry)
872        // the reader used to reject it with a bogus
873        // "detected nested struct cycle" error because the depth
874        // bound was set to `struct_count` (2) while reading the list
875        // entry's fields legitimately needs depth 3. About a quarter
876        // of vanilla K1 .uti blueprints hit this shape.
877        let mut entry = GffStruct::new(0);
878        entry.push_field("PropertyName", GffValue::UInt16(11));
879        entry.push_field("Subtype", GffValue::UInt16(5));
880
881        let mut root = GffStruct::new(-1);
882        root.push_field("PropertiesList", GffValue::List(vec![entry]));
883
884        let gff = Gff::generic(root);
885        let bytes = write_gff_to_vec(&gff).expect("write should succeed");
886        let parsed = read_gff_from_bytes(&bytes).expect("two-struct GFF should parse");
887
888        let GffValue::List(list) = find_field(&parsed.root, "PropertiesList") else {
889            panic!("PropertiesList should be a list");
890        };
891        assert_eq!(list.len(), 1);
892        assert_eq!(list[0].field("PropertyName"), Some(&GffValue::UInt16(11)));
893    }
894
895    #[test]
896    fn writer_rejects_unsupported_locstring_language_ids() {
897        let mut root = GffStruct::new(-1);
898        root.push_field(
899            "locstring",
900            GffValue::LocalizedString(GffLocalizedString {
901                string_ref: StrRef::invalid(),
902                substrings: vec![GffLocalizedSubstring {
903                    string_id: 140,
904                    text: "test".into(),
905                }],
906            }),
907        );
908        let gff = Gff::generic(root);
909
910        let err = write_gff_to_vec(&gff).expect_err("must fail");
911        assert!(matches!(
912            err,
913            GffBinaryError::UnsupportedLanguageEncoding(70)
914        ));
915    }
916
917    fn find_field<'a>(structure: &'a GffStruct, label: &str) -> &'a GffValue {
918        structure
919            .field(label)
920            .unwrap_or_else(|| panic!("missing field {label}"))
921    }
922
923    fn find_list<'a>(structure: &'a GffStruct, label: &str) -> &'a [GffStruct] {
924        match structure.field(label) {
925            Some(GffValue::List(values)) => values.as_slice(),
926            Some(other) => panic!("field {label} is not a list: {other:?}"),
927            None => panic!("missing list field {label}"),
928        }
929    }
930
931    fn list_struct_ids(list: &[GffStruct]) -> Vec<i32> {
932        list.iter().map(|entry| entry.struct_id).collect::<Vec<_>>()
933    }
934
935    fn list_u16_field(list: &[GffStruct], label: &str) -> Vec<u16> {
936        list.iter()
937            .map(|entry| match entry.field(label) {
938                Some(GffValue::UInt16(value)) => *value,
939                Some(other) => panic!("field {label} is not UInt16: {other:?}"),
940                None => panic!("missing field {label}"),
941            })
942            .collect::<Vec<_>>()
943    }
944
945    fn list_resref_field(list: &[GffStruct], label: &str) -> Vec<String> {
946        list.iter()
947            .map(|entry| match entry.field(label) {
948                Some(GffValue::ResRef(value)) => value.to_string(),
949                Some(other) => panic!("field {label} is not ResRef: {other:?}"),
950                None => panic!("missing field {label}"),
951            })
952            .collect::<Vec<_>>()
953    }
954}