rakata_formats/tlk/
reader.rs

1//! TLK binary reader.
2
3use std::io::Read;
4
5use rakata_core::{
6    decode_text_strict, text_encoding_for_language, LanguageId, ResRef, TextEncoding,
7};
8
9use crate::binary;
10
11use super::{
12    Tlk, TlkBinaryError, TlkEntry, ENTRY_SIZE, FILE_HEADER_SIZE, TLK_MAGIC, TLK_VERSION_V3,
13};
14
15/// Reads a TLK from a reader.
16///
17/// The stream is consumed from its current position.
18#[cfg_attr(
19    feature = "tracing",
20    tracing::instrument(level = "debug", skip(reader))
21)]
22pub fn read_tlk<R: Read>(reader: &mut R) -> Result<Tlk, TlkBinaryError> {
23    let mut bytes = Vec::new();
24    reader.read_to_end(&mut bytes)?;
25    read_tlk_from_bytes(&bytes)
26}
27
28/// Reads a TLK from in-memory bytes.
29#[cfg_attr(
30    feature = "tracing",
31    tracing::instrument(level = "debug", skip(bytes), fields(bytes_len = bytes.len()))
32)]
33pub fn read_tlk_from_bytes(bytes: &[u8]) -> Result<Tlk, TlkBinaryError> {
34    if bytes.len() < FILE_HEADER_SIZE {
35        return Err(TlkBinaryError::InvalidHeader(
36            "file smaller than TLK header".into(),
37        ));
38    }
39
40    let magic = binary::read_fourcc(bytes, 0)?;
41    binary::expect_fourcc(magic, TLK_MAGIC).map_err(TlkBinaryError::InvalidMagic)?;
42
43    let version = binary::read_fourcc(bytes, 4)?;
44    binary::expect_fourcc(version, TLK_VERSION_V3).map_err(TlkBinaryError::InvalidVersion)?;
45
46    let language_id = LanguageId::from_raw(binary::read_u32(bytes, 8)?);
47    let text_encoding = text_encoding_for_language(language_id)
48        .map_err(|err| TlkBinaryError::UnsupportedLanguageEncoding(err.language_id.raw()))?;
49    let entry_count = usize::try_from(binary::read_u32(bytes, 12)?)
50        .map_err(|_| TlkBinaryError::InvalidHeader("entry count does not fit usize".into()))?;
51    let entries_offset = usize::try_from(binary::read_u32(bytes, 16)?)
52        .map_err(|_| TlkBinaryError::InvalidHeader("entries offset does not fit usize".into()))?;
53
54    let entries_table_end = FILE_HEADER_SIZE
55        .checked_add(
56            entry_count
57                .checked_mul(ENTRY_SIZE)
58                .ok_or_else(|| TlkBinaryError::InvalidHeader("entry table size overflow".into()))?,
59        )
60        .ok_or_else(|| TlkBinaryError::InvalidHeader("entry table end overflow".into()))?;
61
62    if entries_offset < entries_table_end {
63        return Err(TlkBinaryError::InvalidHeader(
64            "string data offset overlaps entry headers".into(),
65        ));
66    }
67    if entries_offset > bytes.len() {
68        return Err(TlkBinaryError::InvalidHeader(
69            "string data offset beyond file size".into(),
70        ));
71    }
72
73    let mut tlk = Tlk::new(language_id);
74    tlk.entries.reserve(entry_count);
75
76    for index in 0..entry_count {
77        let base = FILE_HEADER_SIZE + index * ENTRY_SIZE;
78        let flags = binary::read_u32(bytes, base)?;
79
80        let sound_bytes = &bytes[base + 4..base + 20];
81        let sound_end = sound_bytes
82            .iter()
83            .position(|byte| *byte == 0)
84            .unwrap_or(sound_bytes.len());
85        let sound_name = decode_text_strict(&sound_bytes[..sound_end], TextEncoding::Windows1252)
86            .map_err(|source| TlkBinaryError::TextDecoding {
87            entry_index: index,
88            source,
89        })?;
90        let voiceover =
91            ResRef::new(&sound_name).map_err(|source| TlkBinaryError::InvalidSoundResRef {
92                entry_index: index,
93                value: sound_name,
94                source,
95            })?;
96
97        let volume_var = binary::read_u32(bytes, base + 20)?;
98        let pitch_var = binary::read_u32(bytes, base + 24)?;
99        let text_offset = usize::try_from(binary::read_u32(bytes, base + 28)?)
100            .map_err(|_| TlkBinaryError::InvalidHeader("text offset does not fit usize".into()))?;
101        let text_length = usize::try_from(binary::read_u32(bytes, base + 32)?)
102            .map_err(|_| TlkBinaryError::InvalidHeader("text length does not fit usize".into()))?;
103        let sound_length = binary::read_f32(bytes, base + 36)?;
104
105        let text_start = entries_offset
106            .checked_add(text_offset)
107            .ok_or_else(|| TlkBinaryError::InvalidHeader("text start overflow".into()))?;
108        let text_end = text_start
109            .checked_add(text_length)
110            .ok_or_else(|| TlkBinaryError::InvalidHeader("text end overflow".into()))?;
111        if text_end > bytes.len() {
112            return Err(TlkBinaryError::InvalidHeader(format!(
113                "text for entry {index} exceeds file length"
114            )));
115        }
116
117        let text =
118            decode_text_strict(&bytes[text_start..text_end], text_encoding).map_err(|source| {
119                TlkBinaryError::TextDecoding {
120                    entry_index: index,
121                    source,
122                }
123            })?;
124        tlk.entries.push(TlkEntry {
125            text,
126            voiceover,
127            text_present: (flags & 0x0001) != 0,
128            sound_present: (flags & 0x0002) != 0,
129            sound_length_present: (flags & 0x0004) != 0,
130            sound_length,
131            volume_var,
132            pitch_var,
133        });
134    }
135
136    Ok(tlk)
137}
138
139#[cfg(test)]
140mod tests {
141    use super::*;
142    use crate::tlk::write_tlk_to_vec;
143
144    #[test]
145    fn roundtrip_tlk_binary() {
146        let mut tlk = Tlk::new(0);
147        let mut first = TlkEntry::new(
148            "Hello \u{20ac} world",
149            ResRef::new("n_gendro").expect("valid"),
150        );
151        first.sound_length_present = true;
152        first.sound_length = 1.5;
153        tlk.entries.push(first);
154        tlk.entries.push(TlkEntry::new("Second", ResRef::blank()));
155
156        let bytes = write_tlk_to_vec(&tlk).expect("write should succeed");
157        let parsed = read_tlk_from_bytes(&bytes).expect("read should succeed");
158
159        assert_eq!(parsed.language_id, LanguageId::from_raw(0));
160        assert_eq!(parsed.entries.len(), 2);
161        assert_eq!(parsed.entries[0].text, "Hello \u{20ac} world");
162        assert_eq!(parsed.entries[0].voiceover.as_str(), "n_gendro");
163        assert!(parsed.entries[0].sound_length_present);
164        assert!((parsed.entries[0].sound_length - 1.5).abs() < f32::EPSILON);
165    }
166
167    #[test]
168    fn writer_is_deterministic_for_synthetic_tlk() {
169        let mut tlk = Tlk::new(0);
170        let mut first = TlkEntry::new(
171            "Hello \u{20ac} world",
172            ResRef::new("n_gendro").expect("valid"),
173        );
174        first.sound_length_present = true;
175        first.sound_length = 1.5;
176        tlk.entries.push(first);
177        tlk.entries.push(TlkEntry::new("Second", ResRef::blank()));
178
179        let first = write_tlk_to_vec(&tlk).expect("first write should succeed");
180        let second = write_tlk_to_vec(&tlk).expect("second write should succeed");
181        assert_eq!(first, second, "canonical TLK writer output drifted");
182    }
183
184    #[test]
185    fn rejects_truncated_header() {
186        let bytes = vec![0_u8; FILE_HEADER_SIZE - 1];
187        let err = read_tlk_from_bytes(&bytes).expect_err("must fail");
188        assert!(matches!(err, TlkBinaryError::InvalidHeader(_)));
189    }
190
191    #[test]
192    fn rejects_invalid_magic() {
193        let mut bytes = vec![0_u8; FILE_HEADER_SIZE];
194        bytes[0..4].copy_from_slice(b"NOPE");
195        bytes[4..8].copy_from_slice(&TLK_VERSION_V3);
196        let err = read_tlk_from_bytes(&bytes).expect_err("must fail");
197        assert!(matches!(err, TlkBinaryError::InvalidMagic(_)));
198    }
199
200    #[test]
201    fn writer_normalizes_inconsistent_entry_flags() {
202        let mut tlk = Tlk::new(0);
203        let mut inconsistent = TlkEntry::new("Line", ResRef::blank());
204        inconsistent.text_present = false;
205        inconsistent.sound_present = true;
206        inconsistent.sound_length_present = true;
207        inconsistent.sound_length = 2.0;
208        tlk.entries.push(inconsistent);
209
210        let bytes = write_tlk_to_vec(&tlk).expect("write should succeed");
211        let parsed = read_tlk_from_bytes(&bytes).expect("read should succeed");
212        let entry = &parsed.entries[0];
213        assert!(entry.text_present);
214        assert!(!entry.sound_present);
215        assert!(!entry.sound_length_present);
216        assert_eq!(entry.sound_length, 0.0);
217    }
218
219    #[test]
220    fn writer_rejects_unencodable_text() {
221        let mut tlk = Tlk::new(0);
222        tlk.entries
223            .push(TlkEntry::new("Unsupported \u{1f600}", ResRef::blank()));
224
225        let err = write_tlk_to_vec(&tlk).expect_err("write must fail");
226        match err {
227            TlkBinaryError::TextEncoding {
228                entry_index,
229                source,
230            } => {
231                assert_eq!(entry_index, 0);
232                assert_eq!(source.character, '\u{1f600}');
233            }
234            other => panic!("unexpected error variant: {other}"),
235        }
236    }
237
238    #[test]
239    fn roundtrip_tlk_binary_polish_cp1250() {
240        let mut tlk = Tlk::new(5);
241        tlk.entries.push(TlkEntry::new(
242            "Za\u{17c}\u{f3}\u{142}\u{107} g\u{119}\u{15b}l\u{105} ja\u{17a}\u{144}",
243            ResRef::blank(),
244        ));
245
246        let bytes = write_tlk_to_vec(&tlk).expect("write should succeed");
247        let parsed = read_tlk_from_bytes(&bytes).expect("read should succeed");
248        assert_eq!(parsed.language_id, LanguageId::from_raw(5));
249        assert_eq!(
250            parsed.entries[0].text,
251            "Za\u{17c}\u{f3}\u{142}\u{107} g\u{119}\u{15b}l\u{105} ja\u{17a}\u{144}"
252        );
253    }
254
255    #[test]
256    fn writer_rejects_unsupported_language_encoding() {
257        let mut tlk = Tlk::new(70);
258        tlk.entries.push(TlkEntry::new("abc", ResRef::blank()));
259
260        let err = write_tlk_to_vec(&tlk).expect_err("write must fail");
261        assert!(matches!(
262            err,
263            TlkBinaryError::UnsupportedLanguageEncoding(70)
264        ));
265    }
266
267    #[test]
268    fn roundtrip_tlk_binary_russian_cp1251() {
269        let mut tlk = Tlk::new(41);
270        tlk.entries.push(TlkEntry::new(
271            "\u{41f}\u{440}\u{438}\u{432}\u{435}\u{442}",
272            ResRef::blank(),
273        ));
274
275        let bytes = write_tlk_to_vec(&tlk).expect("write should succeed");
276        let parsed = read_tlk_from_bytes(&bytes).expect("read should succeed");
277        assert_eq!(parsed.language_id, LanguageId::from_raw(41));
278        assert_eq!(
279            parsed.entries[0].text,
280            "\u{41f}\u{440}\u{438}\u{432}\u{435}\u{442}"
281        );
282    }
283
284    #[test]
285    fn roundtrip_tlk_binary_japanese_shift_jis() {
286        let mut tlk = Tlk::new(131);
287        tlk.entries
288            .push(TlkEntry::new("\u{30c6}\u{30b9}\u{30c8}", ResRef::blank()));
289
290        let bytes = write_tlk_to_vec(&tlk).expect("write should succeed");
291        let parsed = read_tlk_from_bytes(&bytes).expect("read should succeed");
292        assert_eq!(parsed.language_id, LanguageId::from_raw(131));
293        assert_eq!(parsed.entries[0].text, "\u{30c6}\u{30b9}\u{30c8}");
294    }
295
296    #[test]
297    fn reader_rejects_unsupported_language_encoding() {
298        let mut bytes = vec![0_u8; FILE_HEADER_SIZE];
299        bytes[0..4].copy_from_slice(&TLK_MAGIC);
300        bytes[4..8].copy_from_slice(&TLK_VERSION_V3);
301        bytes[8..12].copy_from_slice(&70_u32.to_le_bytes());
302        bytes[12..16].copy_from_slice(&0_u32.to_le_bytes());
303        bytes[16..20].copy_from_slice(
304            &u32::try_from(FILE_HEADER_SIZE)
305                .expect("FILE_HEADER_SIZE fits in u32")
306                .to_le_bytes(),
307        );
308
309        let err = read_tlk_from_bytes(&bytes).expect_err("read must fail");
310        assert!(matches!(
311            err,
312            TlkBinaryError::UnsupportedLanguageEncoding(70)
313        ));
314    }
315
316    #[test]
317    fn reader_rejects_malformed_multibyte_text() {
318        let mut tlk = Tlk::new(131);
319        tlk.entries.push(TlkEntry::new("ok", ResRef::blank()));
320
321        let mut bytes = write_tlk_to_vec(&tlk).expect("write should succeed");
322        let entry_base = FILE_HEADER_SIZE;
323        let text_offset = usize::try_from(u32::from_le_bytes(
324            bytes[entry_base + 28..entry_base + 32]
325                .try_into()
326                .expect("offset bytes"),
327        ))
328        .expect("offset fits");
329        let blob_start = FILE_HEADER_SIZE + ENTRY_SIZE;
330        bytes[blob_start + text_offset] = 0x81;
331        bytes[entry_base + 32..entry_base + 36].copy_from_slice(&1_u32.to_le_bytes());
332
333        let err = read_tlk_from_bytes(&bytes).expect_err("read must fail");
334        assert!(matches!(err, TlkBinaryError::TextDecoding { .. }));
335    }
336
337    #[test]
338    fn reader_rejects_invalid_sound_resref_token() {
339        let mut tlk = Tlk::new(0);
340        tlk.entries
341            .push(TlkEntry::new("ok", ResRef::new("voice").expect("valid")));
342
343        let mut bytes = write_tlk_to_vec(&tlk).expect("write should succeed");
344        let entry_base = FILE_HEADER_SIZE;
345        bytes[entry_base + 4] = b'!';
346
347        let err = read_tlk_from_bytes(&bytes).expect_err("read must fail");
348        assert!(matches!(
349            err,
350            TlkBinaryError::InvalidSoundResRef { entry_index: 0, .. }
351        ));
352    }
353
354    #[test]
355    fn volume_var_and_pitch_var_survive_roundtrip() {
356        let mut tlk = Tlk::new(0);
357        let mut entry = TlkEntry::new("test", ResRef::blank());
358        entry.volume_var = 0xDEADBEEF;
359        entry.pitch_var = 0x12345678;
360        tlk.entries.push(entry);
361
362        let bytes = write_tlk_to_vec(&tlk).expect("write should succeed");
363        let parsed = read_tlk_from_bytes(&bytes).expect("read should succeed");
364
365        assert_eq!(parsed.entries[0].volume_var, 0xDEADBEEF);
366        assert_eq!(parsed.entries[0].pitch_var, 0x12345678);
367    }
368}