tor_persist/
slug.rs

1//! "Slugs" used as part of on-disk filenames and other similar purposes
2//!
3//! Arti uses "slugs" as parts of filenames in many places.
4//! Slugs are fixed or variable strings which either
5//! designate the kind of a thing, or which of various things this is.
6//!
7//! Slugs have a restricted character set:
8//! Lowercase ASCII alphanumerics, underscore, hyphen.
9//! We may extend this to allow additional characters in the future,
10//! but /, +, and . (the slug separators) will never be valid slug characters.
11//! Additionally, : will never be a valid slug character,
12//! because Windows does not allow colons in filenames[^1],
13//!
14//! Slugs may not be empty, and they may not start with a hyphen.
15//!
16//! Slugs can be concatenated to build file names.
17//! When concatenating slugs to make filenames,
18//! they should be separated using `/`, `+`, or `.`
19//! ([`SLUG_SEPARATOR_CHARS`]).
20//! Slugs should not be concatenated without separators (for security reasons).
21//!
22//! On Windows only, the following slugs are forbidden,
23//! because of [absurd Windows filename behaviours](https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file):
24//! `con` `prn` `aux` `nul`
25//! `com1` `com2` `com3` `com4` `com5` `com6` `com7` `com8` `com9` `com0`
26//! `lpt1` `lpt2` `lpt3` `lpt4` `lpt5` `lpt6` `lpt7` `lpt8` `lpt9` `lpt0`.
27//!
28//! [^1]: <https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file#naming-conventions>
29
30pub mod timestamp;
31
32use std::borrow::Borrow;
33use std::ffi::OsStr;
34use std::fmt::{self, Display};
35use std::mem;
36use std::ops::Deref;
37use std::path::Path;
38
39use paste::paste;
40use serde::{Deserialize, Serialize};
41use thiserror::Error;
42
43#[cfg(target_family = "windows")]
44#[cfg_attr(docsrs, doc(cfg(target_family = "windows")))]
45pub use os::ForbiddenOnWindows;
46
47/// An owned slug, checked for syntax
48///
49/// The syntax check can be relied on for safety/soundness.
50// We adopt this rule so that eventually we could have AsRef<[std::ascii::Char]>, etc.
51#[derive(Debug, Clone, Serialize, Deserialize)] //
52#[derive(Eq, PartialEq, Ord, PartialOrd, Hash)] //
53#[derive(derive_more::Display)]
54#[serde(try_from = "String", into = "String")]
55// Box<str> since we don't expect to change the size; that makes it 2 words rather than 3
56// (But our public APIs are in terms of String.)
57pub struct Slug(Box<str>);
58
59/// A borrwed slug, checked for syntax
60///
61/// The syntax check can be relied on for safety/soundness.
62#[derive(Debug, Serialize)] //
63#[derive(Eq, PartialEq, Ord, PartialOrd, Hash)] //
64#[derive(derive_more::Display)]
65#[serde(transparent)]
66#[repr(transparent)] // SAFETY: this attribute is needed for unsafe in new_unchecked
67pub struct SlugRef(str);
68
69/// Characters which are good to use to separate slugs
70///
71/// Guaranteed to never overlap with the valid slug character set.
72///
73/// We might expand this set, but not ever reduce it.
74pub const SLUG_SEPARATOR_CHARS: &str = "/+.";
75
76/// Error for an invalid slug
77#[derive(Error, Debug, Clone, Eq, PartialEq, Hash)]
78#[non_exhaustive]
79pub enum BadSlug {
80    /// Slug contains a forbidden character
81    BadCharacter(char),
82    /// Slug starts with a disallowed character
83    BadFirstCharacter(char),
84    /// An empty slug was supplied where a nonempty one is required
85    EmptySlugNotAllowed,
86    /// We are on Windows and the slug is one of the forbidden ones
87    ///
88    /// On platforms other than Windows, this variant is absent.
89    #[cfg_attr(docsrs, doc(cfg(target_family = "windows")))]
90    #[cfg(target_family = "windows")]
91    ForbiddenOnWindows(ForbiddenOnWindows),
92}
93
94/// Types which can perhaps be used as a slug
95///
96/// This is a trait implemented by `str`, `std::fmt::Arguments`,
97/// and other implementors of `ToString`, for the convenience of call sites:
98/// APIs can have functions taking an `&(impl TryIntoSlug + ?Sized)` or `&dyn TryIntoSlug`
99/// and callers then don't need error-handling boilerplate.
100///
101/// Functions that take a `TryIntoSlug` will need to do a runtime syntax check.
102pub trait TryIntoSlug {
103    /// Convert `self` into a `Slug`, if it has the right syntax
104    fn try_into_slug(&self) -> Result<Slug, BadSlug>;
105}
106
107impl<T: ToString + ?Sized> TryIntoSlug for T {
108    fn try_into_slug(&self) -> Result<Slug, BadSlug> {
109        self.to_string().try_into()
110    }
111}
112
113impl Slug {
114    /// Make a Slug out of an owned `String`, if it has the correct syntax
115    pub fn new(s: String) -> Result<Slug, BadSlug> {
116        Ok(unsafe {
117            // SAFETY: we check, and then call new_unchecked
118            check_syntax(&s)?;
119            Slug::new_unchecked(s)
120        })
121    }
122
123    /// Make a Slug out of an owned `String`, without checking the syntax
124    ///
125    /// # Safety
126    ///
127    /// It's the caller's responsibility to check the syntax of the input string.
128    pub unsafe fn new_unchecked(s: String) -> Slug {
129        Slug(s.into())
130    }
131}
132
133impl SlugRef {
134    /// Make a SlugRef out of a `str`, if it has the correct syntax
135    pub fn new(s: &str) -> Result<&SlugRef, BadSlug> {
136        Ok(unsafe {
137            // SAFETY: we check, and then call new_unchecked
138            check_syntax(s)?;
139            SlugRef::new_unchecked(s)
140        })
141    }
142
143    /// Make a SlugRef out of a `str`, without checking the syntax
144    ///
145    /// # Safety
146    ///
147    /// It's the caller's responsibility to check the syntax of the input string.
148    pub unsafe fn new_unchecked<'s>(s: &'s str) -> &'s SlugRef {
149        unsafe {
150            // SAFETY
151            // SlugRef is repr(transparent).  So the alignment and memory layout
152            // are the same, and the pointer metadata is the same too.
153            // The lifetimes is correct by construction.
154            //
155            // We do this, rather than `struct SlugRef<'r>(&'r str)`,
156            // because that way we couldn't impl Deref.
157            mem::transmute::<&'s str, &'s SlugRef>(s)
158        }
159    }
160
161    /// Make an owned `Slug`
162    fn to_slug(&self) -> Slug {
163        unsafe {
164            // SAFETY: self is a SlugRef so our syntax is right
165            Slug::new_unchecked(self.0.into())
166        }
167    }
168}
169
170impl TryFrom<String> for Slug {
171    type Error = BadSlug;
172    fn try_from(s: String) -> Result<Slug, BadSlug> {
173        Slug::new(s)
174    }
175}
176
177impl From<Slug> for String {
178    fn from(s: Slug) -> String {
179        s.0.into()
180    }
181}
182
183impl<'s> TryFrom<&'s str> for &'s SlugRef {
184    type Error = BadSlug;
185    fn try_from(s: &'s str) -> Result<&'s SlugRef, BadSlug> {
186        SlugRef::new(s)
187    }
188}
189
190impl Deref for Slug {
191    type Target = SlugRef;
192    fn deref(&self) -> &SlugRef {
193        unsafe {
194            // SAFETY: self is a Slug so our syntax is right
195            SlugRef::new_unchecked(&self.0)
196        }
197    }
198}
199
200impl Borrow<SlugRef> for Slug {
201    fn borrow(&self) -> &SlugRef {
202        self
203    }
204}
205impl Borrow<str> for Slug {
206    fn borrow(&self) -> &str {
207        self.as_ref()
208    }
209}
210
211impl ToOwned for SlugRef {
212    type Owned = Slug;
213    fn to_owned(&self) -> Slug {
214        self.to_slug()
215    }
216}
217
218/// Implement `fn as_...(&self) -> ...` and `AsRef`
219macro_rules! impl_as_with_inherent { { $ty:ident } => { paste!{
220    impl SlugRef {
221        #[doc = concat!("Obtain this slug as a `", stringify!($ty), "`")]
222        pub fn [<as_ $ty:snake>](&self) -> &$ty {
223            self.as_ref()
224        }
225    }
226    impl_as_ref!($ty);
227} } }
228/// Implement `AsRef`
229macro_rules! impl_as_ref { { $ty:ty } => { paste!{
230    impl AsRef<$ty> for SlugRef {
231        fn as_ref(&self) -> &$ty {
232            self.0.as_ref()
233        }
234    }
235    impl AsRef<$ty> for Slug {
236        fn as_ref(&self) -> &$ty {
237            self.deref().as_ref()
238        }
239    }
240} } }
241
242impl_as_with_inherent!(str);
243impl_as_with_inherent!(Path);
244impl_as_ref!(OsStr);
245impl_as_ref!([u8]);
246
247/// Check the string `s` to see if it would be valid as a slug
248///
249/// This is a low-level method for special cases.
250/// Usually, use [`Slug::new`] etc.
251//
252// SAFETY
253// This function checks the syntax, and is relied on by unsafe code
254#[allow(clippy::if_same_then_else)] // clippy objects to the repeated Ok(())
255pub fn check_syntax(s: &str) -> Result<(), BadSlug> {
256    if s.is_empty() {
257        return Err(BadSlug::EmptySlugNotAllowed);
258    }
259
260    // Slugs are not allowed to start with a hyphen.
261    if s.starts_with('-') {
262        return Err(BadSlug::BadFirstCharacter('-'));
263    }
264
265    // check legal character set
266    for c in s.chars() {
267        if c.is_ascii_lowercase() {
268            Ok(())
269        } else if c.is_ascii_digit() {
270            Ok(())
271        } else if c == '_' || c == '-' {
272            Ok(())
273        } else {
274            Err(BadSlug::BadCharacter(c))
275        }?;
276    }
277
278    os::check_forbidden(s)?;
279
280    Ok(())
281}
282
283impl Display for BadSlug {
284    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
285        match self {
286            BadSlug::BadCharacter(c) => {
287                let num = u32::from(*c);
288                write!(f, "character {c:?} (U+{num:04X}) is not allowed")
289            }
290            BadSlug::BadFirstCharacter(c) => {
291                let num = u32::from(*c);
292                write!(
293                    f,
294                    "character {c:?} (U+{num:04X}) is not allowed as the first character"
295                )
296            }
297            BadSlug::EmptySlugNotAllowed => {
298                write!(f, "empty identifier (empty slug) not allowed")
299            }
300            #[cfg(target_family = "windows")]
301            BadSlug::ForbiddenOnWindows(e) => os::fmt_error(e, f),
302        }
303    }
304}
305
306/// Forbidden slug support for Windows
307#[cfg(target_family = "windows")]
308mod os {
309    use super::*;
310
311    /// A slug which is forbidden because we are on Windows (as found in an invalid slug error)
312    ///
313    /// This type is available only on Windows platforms.
314    //
315    // Double reference so that BadSlug has to contain only one word, not two
316    pub type ForbiddenOnWindows = &'static &'static str;
317
318    /// The forbidden slugs - windows thinks "C:\\Program Files\lpt0.json" is a printer.
319    const FORBIDDEN: &[&str] = &[
320        "con", "prn", "aux", "nul", //
321        "com1", "com2", "com3", "com4", "com5", "com6", "com7", "com8", "com9", "com0", //
322        "lpt1", "lpt2", "lpt3", "lpt4", "lpt5", "lpt6", "lpt7", "lpt8", "lpt9", "lpt0",
323    ];
324
325    /// Check whether this slug is forbidden here
326    pub(super) fn check_forbidden(s: &str) -> Result<(), BadSlug> {
327        for bad in FORBIDDEN {
328            if s == *bad {
329                return Err(BadSlug::ForbiddenOnWindows(bad));
330            }
331        }
332        Ok(())
333    }
334
335    /// Display a forbidden slug error
336    pub(super) fn fmt_error(s: &ForbiddenOnWindows, f: &mut fmt::Formatter) -> fmt::Result {
337        write!(f, "slug (name) {s:?} is not allowed on Windows")
338    }
339}
340/// Forbidden slug support for non-Windows
341#[cfg(not(target_family = "windows"))]
342mod os {
343    use super::*;
344
345    /// Check whether this slug is forbidden here
346    #[allow(clippy::unnecessary_wraps)]
347    pub(super) fn check_forbidden(_s: &str) -> Result<(), BadSlug> {
348        Ok(())
349    }
350}
351
352#[cfg(test)]
353mod test {
354    // @@ begin test lint list maintained by maint/add_warning @@
355    #![allow(clippy::bool_assert_comparison)]
356    #![allow(clippy::clone_on_copy)]
357    #![allow(clippy::dbg_macro)]
358    #![allow(clippy::mixed_attributes_style)]
359    #![allow(clippy::print_stderr)]
360    #![allow(clippy::print_stdout)]
361    #![allow(clippy::single_char_pattern)]
362    #![allow(clippy::unwrap_used)]
363    #![allow(clippy::unchecked_duration_subtraction)]
364    #![allow(clippy::useless_vec)]
365    #![allow(clippy::needless_pass_by_value)]
366    //! <!-- @@ end test lint list maintained by maint/add_warning @@ -->
367
368    use super::*;
369    use itertools::chain;
370
371    #[test]
372    fn bad() {
373        for c in chain!(
374            SLUG_SEPARATOR_CHARS.chars(), //
375            ['\\', ' ', '\n', '\0']
376        ) {
377            let s = format!("x{c}y");
378            let e_ref = SlugRef::new(&s).unwrap_err();
379            assert_eq!(e_ref, BadSlug::BadCharacter(c));
380            let e_own = Slug::new(s).unwrap_err();
381            assert_eq!(e_ref, e_own);
382        }
383    }
384
385    #[test]
386    fn good() {
387        let all = chain!(
388            b'a'..=b'z', //
389            b'0'..=b'9',
390            [b'_'],
391        )
392        .map(char::from);
393
394        let chk = |s: String| {
395            let sref = SlugRef::new(&s).unwrap();
396            let slug = Slug::new(s.clone()).unwrap();
397            assert_eq!(sref.to_string(), s);
398            assert_eq!(slug.to_string(), s);
399        };
400
401        chk(all.clone().collect());
402
403        for c in all {
404            chk(format!("{c}"));
405        }
406
407        // Hyphens are allowed, but not as the first character
408        chk("a-".into());
409        chk("a-b".into());
410    }
411
412    #[test]
413    fn badchar_msg() {
414        let chk = |s: &str, m: &str| {
415            assert_eq!(
416                SlugRef::new(s).unwrap_err().to_string(),
417                m, //
418            );
419        };
420
421        chk(".", "character '.' (U+002E) is not allowed");
422        chk("\0", "character '\\0' (U+0000) is not allowed");
423        chk(
424            "\u{12345}",
425            "character '\u{12345}' (U+12345) is not allowed",
426        );
427        chk(
428            "-",
429            "character '-' (U+002D) is not allowed as the first character",
430        );
431        chk("A", "character 'A' (U+0041) is not allowed");
432    }
433
434    #[test]
435    fn windows_forbidden() {
436        for s in ["con", "prn", "lpt0"] {
437            let r = SlugRef::new(s);
438            if cfg!(target_family = "windows") {
439                assert_eq!(
440                    r.unwrap_err().to_string(),
441                    format!("slug (name) \"{s}\" is not allowed on Windows"),
442                );
443            } else {
444                assert_eq!(r.unwrap().as_str(), s);
445            }
446        }
447    }
448
449    #[test]
450    fn empty_slug() {
451        assert_eq!(
452            SlugRef::new("").unwrap_err().to_string(),
453            "empty identifier (empty slug) not allowed"
454        );
455    }
456}