Skip to main content

rakata_core/
resref.rs

1use std::fmt::{Display, Formatter};
2use std::str::FromStr;
3use thiserror::Error;
4
5use crate::text::{decode_text, encode_text, EncodeTextError, TextEncoding};
6
7/// Maximum number of bytes for a KotOR resource reference.
8pub const MAX_RESREF_LEN: usize = 16;
9
10/// Error returned when constructing a [`ResRef`] fails validation.
11#[derive(Debug, Clone, PartialEq, Eq, Error)]
12pub enum ResRefError {
13    /// Input exceeded the maximum allowed resref length, measured in
14    /// the Windows-1252 encoding the engine actually stores.
15    #[error("resref length {len} exceeds maximum {max} (Windows-1252 bytes)")]
16    TooLong {
17        /// Actual input length in Windows-1252 bytes.
18        len: usize,
19        /// Maximum allowed length.
20        max: usize,
21    },
22    /// Input contained a Unicode character with no representation in
23    /// the engine's Windows-1252 encoding (e.g., Chinese, emoji).
24    #[error("invalid resref character '{ch}': no Windows-1252 mapping")]
25    InvalidChar {
26        /// The unencodable character.
27        ch: char,
28    },
29}
30
31impl From<EncodeTextError> for ResRefError {
32    fn from(err: EncodeTextError) -> Self {
33        Self::InvalidChar { ch: err.character }
34    }
35}
36
37/// Canonicalized resource reference.
38///
39/// KotOR resource references are case-insensitive identifiers up to
40/// 16 bytes long, stored as Windows-1252-encoded bytes (the engine's
41/// native encoding). This type holds the bytes in an inline fixed-size
42/// buffer, making it `Copy` and zero-allocation.
43///
44/// ## Validation rules
45///
46/// Accepts any input that round-trips through Windows-1252 encoding.
47/// ASCII bytes pass straight through; non-ASCII chars that have a
48/// Windows-1252 representation (`é`, `ü`, `£`, etc.) get transcoded
49/// to their single-byte Windows-1252 form. Characters with no
50/// Windows-1252 mapping (Chinese, emoji, etc.) are rejected.
51///
52/// The engine itself performs no character validation at all (verbatim
53/// memcpy into a 16-byte buffer). Validation here exists to ensure we
54/// only construct resrefs that can actually be stored in the
55/// engine-native encoding. For the full engine audit, see the
56/// **ResRef Validation** section of
57/// `docs/src/formats/resource_system.md`.
58///
59/// ## Storage and access
60///
61/// Use [`Self::as_bytes`] for byte-level work (writing to disk,
62/// hashing, lint inspection, byte-level comparison). For a string
63/// view, use the `Display` impl (e.g. `format!("{resref}")` or
64/// `resref.to_string()`); it decodes Windows-1252 → UTF-8 on demand.
65#[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
66#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
67#[cfg_attr(feature = "serde", serde(try_from = "String", into = "String"))]
68pub struct ResRef {
69    bytes: [u8; MAX_RESREF_LEN],
70    len: u8,
71}
72
73impl ResRef {
74    /// Creates and validates a new resource reference.
75    ///
76    /// The input is transcoded from UTF-8 to Windows-1252 (the engine's
77    /// native encoding); ASCII letters are lowercased for the
78    /// engine's case-insensitive lookup. Returns
79    /// [`ResRefError::InvalidChar`] when a character has no
80    /// Windows-1252 mapping (Chinese, emoji, etc.) and
81    /// [`ResRefError::TooLong`] when the encoded form exceeds 16 bytes.
82    /// An empty string produces a blank ResRef.
83    pub fn new(value: impl AsRef<str>) -> Result<Self, ResRefError> {
84        let encoded = encode_text(value.as_ref(), TextEncoding::Windows1252)?;
85        if encoded.len() > MAX_RESREF_LEN {
86            return Err(ResRefError::TooLong {
87                len: encoded.len(),
88                max: MAX_RESREF_LEN,
89            });
90        }
91        let mut bytes = [0u8; MAX_RESREF_LEN];
92        for (i, &b) in encoded.iter().enumerate() {
93            // Lowercase ASCII letters; leave Windows-1252 0x80+ bytes
94            // untouched (the engine's case folding for the extended
95            // range would need a Windows-1252 case table that we have
96            // not audited; vanilla content does not use these bytes).
97            bytes[i] = b.to_ascii_lowercase();
98        }
99        let len = u8::try_from(encoded.len()).expect("len already bounded by MAX_RESREF_LEN");
100        Ok(Self { bytes, len })
101    }
102
103    /// `const`-friendly version of [`Self::new`] for declaring
104    /// `pub const` resref constants at compile time.
105    ///
106    /// Restricted to ASCII-only input (Windows-1252 transcoding is
107    /// not const-friendly). Use [`Self::new`] for runtime input
108    /// that may contain extended Windows-1252 characters.
109    pub const fn const_new(value: &str) -> Result<Self, ResRefError> {
110        let raw = value.as_bytes();
111        if raw.len() > MAX_RESREF_LEN {
112            return Err(ResRefError::TooLong {
113                len: raw.len(),
114                max: MAX_RESREF_LEN,
115            });
116        }
117        let mut bytes = [0u8; MAX_RESREF_LEN];
118        let mut i = 0;
119        while i < raw.len() {
120            let b = raw[i];
121            if !b.is_ascii() {
122                // CLIPPY: char::from(u8) is not const-stable, and every
123                // u8 is a valid Unicode scalar value (latin-1 range) so
124                // the cast is sound.
125                #[allow(clippy::as_conversions)]
126                return Err(ResRefError::InvalidChar { ch: b as char });
127            }
128            bytes[i] = b.to_ascii_lowercase();
129            i += 1;
130        }
131        // CLIPPY: u8::try_from is not const-stable as of 1.85, and the
132        // length is already bounded above by MAX_RESREF_LEN = 16, so
133        // truncation cannot occur.
134        #[allow(clippy::as_conversions)]
135        let len = raw.len() as u8;
136        Ok(Self { bytes, len })
137    }
138
139    /// Returns an empty resource reference.
140    pub const fn blank() -> Self {
141        Self {
142            bytes: [0u8; MAX_RESREF_LEN],
143            len: 0,
144        }
145    }
146
147    /// Returns the canonical Windows-1252 bytes that make up this
148    /// resref.
149    ///
150    /// This is the engine-actual storage form. Use this for byte-level
151    /// work (writing to disk, hashing, lint inspection of byte
152    /// patterns, comparing against a byte-string literal).
153    pub fn as_bytes(&self) -> &[u8] {
154        &self.bytes[..usize::from(self.len)]
155    }
156
157    /// Returns `true` when the resource reference is empty.
158    pub const fn is_blank(&self) -> bool {
159        self.len == 0
160    }
161
162    /// Returns `true` when the resource reference is empty.
163    ///
164    /// Alias for [`is_blank`](Self::is_blank) matching the standard collection API.
165    pub const fn is_empty(&self) -> bool {
166        self.len == 0
167    }
168
169    /// Returns the length in Windows-1252 bytes.
170    ///
171    /// For ASCII-only resrefs (the overwhelming majority) this equals
172    /// the character count. For extended-character resrefs it is the
173    /// engine-storage byte count, not the UTF-8 byte count of the
174    /// Display form.
175    #[allow(clippy::as_conversions)]
176    pub const fn len(&self) -> usize {
177        // u8 to usize is a lossless widening; usize::from is not yet const-stable
178        // as of 1.93, so `as` is the only option in a const context.
179        self.len as usize
180    }
181}
182
183impl std::fmt::Debug for ResRef {
184    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
185        let decoded = decode_text(self.as_bytes(), TextEncoding::Windows1252);
186        write!(f, "ResRef({decoded:?})")
187    }
188}
189
190impl Default for ResRef {
191    fn default() -> Self {
192        Self::blank()
193    }
194}
195
196impl Display for ResRef {
197    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
198        let decoded = decode_text(self.as_bytes(), TextEncoding::Windows1252);
199        f.write_str(&decoded)
200    }
201}
202
203impl FromStr for ResRef {
204    type Err = ResRefError;
205
206    fn from_str(s: &str) -> Result<Self, Self::Err> {
207        Self::new(s)
208    }
209}
210
211impl TryFrom<&str> for ResRef {
212    type Error = ResRefError;
213
214    fn try_from(value: &str) -> Result<Self, Self::Error> {
215        Self::new(value)
216    }
217}
218
219impl TryFrom<String> for ResRef {
220    type Error = ResRefError;
221
222    fn try_from(value: String) -> Result<Self, Self::Error> {
223        Self::new(value)
224    }
225}
226
227impl From<ResRef> for String {
228    fn from(val: ResRef) -> Self {
229        val.to_string()
230    }
231}
232
233// Convenience byte-comparison impls for assertions and lookups against
234// literals like `resref == "module"` or `resref == b"module"`. These
235// compare the canonical Windows-1252 bytes directly. For ASCII inputs
236// (the overwhelming majority of resrefs) this matches both the engine
237// semantics and what the caller intuitively expects. For an extended
238// Windows-1252 byte (`é` = 0xE9), comparing against a UTF-8 string
239// literal (`"é"` = 0xC3 0xA9) returns false, which is the correct
240// outcome since the byte sequences genuinely differ.
241impl PartialEq<str> for ResRef {
242    fn eq(&self, other: &str) -> bool {
243        self.as_bytes() == other.as_bytes()
244    }
245}
246
247impl PartialEq<&str> for ResRef {
248    fn eq(&self, other: &&str) -> bool {
249        self.as_bytes() == other.as_bytes()
250    }
251}
252
253impl PartialEq<ResRef> for str {
254    fn eq(&self, other: &ResRef) -> bool {
255        other == self
256    }
257}
258
259impl PartialEq<ResRef> for &str {
260    fn eq(&self, other: &ResRef) -> bool {
261        other == self
262    }
263}
264
265impl PartialEq<[u8]> for ResRef {
266    fn eq(&self, other: &[u8]) -> bool {
267        self.as_bytes() == other
268    }
269}
270
271impl PartialEq<&[u8]> for ResRef {
272    fn eq(&self, other: &&[u8]) -> bool {
273        self.as_bytes() == *other
274    }
275}
276
277#[cfg(test)]
278mod tests {
279    use super::*;
280
281    #[test]
282    fn accepts_valid_resref() {
283        let parsed = ResRef::new("P_Bastila").expect("valid resref");
284        assert_eq!(parsed.as_bytes(), b"p_bastila");
285    }
286
287    #[test]
288    fn rejects_too_long_resref() {
289        let err = ResRef::new("this_name_is_longer_than_sixteen").expect_err("must fail");
290        assert!(matches!(err, ResRefError::TooLong { .. }));
291    }
292
293    #[test]
294    fn accepts_arbitrary_ascii_characters() {
295        // The engine has no character whitelist (Ghidra-audited;
296        // see docs/src/formats/resource_system.md). Vanilla content
297        // uses bytes our prior whitelist rejected: `+` in chitin.key
298        // upgrade-modifier resrefs, `!` in RIM key tables, and so
299        // on. All single ASCII bytes round-trip.
300        for name in [
301            "g_w_lghtsbr+1",
302            "bad!name",
303            "with space",
304            "punct.dot",
305            "amp&char",
306            "t3-m4",
307        ] {
308            let parsed = ResRef::new(name).unwrap_or_else(|e| {
309                panic!("`{name}` should be valid (any ASCII byte allowed): {e:?}")
310            });
311            assert_eq!(parsed.as_bytes(), name.as_bytes());
312        }
313    }
314
315    #[test]
316    fn accepts_extended_windows_1252_input() {
317        // `é` has a single-byte Windows-1252 representation (0xE9), so
318        // it is acceptable even though the UTF-8 form is multi-byte.
319        let parsed = ResRef::new("café").expect("é round-trips through Windows-1252");
320        assert_eq!(parsed.as_bytes(), &[b'c', b'a', b'f', 0xE9]);
321    }
322
323    #[test]
324    fn rejects_unencodable_input() {
325        // Characters with no Windows-1252 mapping (CJK, emoji, etc.)
326        // cannot be stored in the engine's byte buffer, so we reject
327        // at the API boundary.
328        let err = ResRef::new("名前").expect_err("CJK has no Windows-1252 mapping");
329        assert!(matches!(err, ResRefError::InvalidChar { .. }));
330    }
331
332    #[test]
333    fn const_new_rejects_non_ascii_input() {
334        // const_new is ASCII-only by design (no const Windows-1252
335        // transcoding), so even a Windows-1252-encodable char like
336        // `é` is rejected at compile time.
337        let err = ResRef::const_new("café").expect_err("non-ASCII rejected at compile time");
338        assert!(matches!(err, ResRefError::InvalidChar { .. }));
339    }
340
341    #[test]
342    fn blank_is_empty() {
343        let r = ResRef::blank();
344        assert!(r.is_blank());
345        assert_eq!(r.as_bytes(), b"");
346        assert_eq!(r.len(), 0);
347    }
348
349    #[test]
350    fn max_length_accepted() {
351        let parsed = ResRef::new("a23456789_123456").expect("16 chars is valid");
352        assert_eq!(parsed.as_bytes(), b"a23456789_123456");
353        assert_eq!(parsed.len(), 16);
354    }
355
356    #[test]
357    fn is_copy() {
358        fn takes_copy<T: Copy>(_: T) {}
359        takes_copy(ResRef::blank());
360    }
361
362    #[test]
363    fn roundtrip_through_string() {
364        let original = ResRef::new("test_resref").expect("valid");
365        let s: String = original.into();
366        assert_eq!(s, "test_resref");
367        let back: ResRef = s.try_into().expect("valid");
368        assert_eq!(back, original);
369    }
370
371    #[test]
372    fn const_new_accepts_valid_resref() {
373        let parsed = ResRef::const_new("itempropdef").expect("valid");
374        assert_eq!(parsed.as_bytes(), b"itempropdef");
375    }
376
377    #[test]
378    fn const_new_lowercases_uppercase_input() {
379        let parsed = ResRef::const_new("ItemPropDef").expect("valid");
380        assert_eq!(parsed.as_bytes(), b"itempropdef");
381    }
382
383    #[test]
384    fn const_new_rejects_too_long() {
385        let err = ResRef::const_new("this_name_is_longer_than_sixteen").expect_err("must fail");
386        assert!(matches!(err, ResRefError::TooLong { .. }));
387    }
388
389    #[test]
390    fn const_new_matches_runtime_new_for_valid_inputs() {
391        for name in ["appearance", "Hk-47", "iprp_damagecost", "T3-M4"] {
392            let runtime = ResRef::new(name).expect("valid");
393            let compile_time = ResRef::const_new(name).expect("valid");
394            assert_eq!(runtime, compile_time, "mismatch for `{name}`");
395        }
396    }
397
398    #[test]
399    fn const_new_can_be_called_in_const_context() {
400        // The point of `const_new`: this declaration would fail to
401        // compile if the function weren't usable in a `const`.
402        const APPEARANCE: Result<ResRef, ResRefError> = ResRef::const_new("appearance");
403        let parsed = APPEARANCE.expect("compile-time-valid");
404        assert_eq!(parsed.as_bytes(), b"appearance");
405    }
406}