1
//! "Slugs" used as part of on-disk filenames and other similar purposes
2
//!
3
//! Arti uses "slugs" as parts of filenames in many places.
4
//! Slugs are fixed or variable strings which either
5
//! designate the kind of a thing, or which of various things this is.
6
//!
7
//! Slugs have a restricted character set:
8
//! Lowercase ASCII alphanumerics, underscore, hyphen.
9
//! We may extend this to allow additional characters in the future,
10
//! but /, +, and . (the slug separators) will never be valid slug characters.
11
//! Additionally, : will never be a valid slug character,
12
//! because Windows does not allow colons in filenames[^1],
13
//!
14
//! Slugs may not be empty, and they may not start with a hyphen.
15
//!
16
//! Slugs can be concatenated to build file names.
17
//! When concatenating slugs to make filenames,
18
//! they should be separated using `/`, `+`, or `.`
19
//! ([`SLUG_SEPARATOR_CHARS`]).
20
//! Slugs should not be concatenated without separators (for security reasons).
21
//!
22
//! On Windows only, the following slugs are forbidden,
23
//! because of [absurd Windows filename behaviours](https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file):
24
//! `con` `prn` `aux` `nul`
25
//! `com1` `com2` `com3` `com4` `com5` `com6` `com7` `com8` `com9` `com0`
26
//! `lpt1` `lpt2` `lpt3` `lpt4` `lpt5` `lpt6` `lpt7` `lpt8` `lpt9` `lpt0`.
27
//!
28
//! [^1]: <https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file#naming-conventions>
29

            
30
pub mod timestamp;
31

            
32
use std::borrow::Borrow;
33
use std::ffi::OsStr;
34
use std::fmt::{self, Display};
35
use std::mem;
36
use std::ops::Deref;
37
use std::path::Path;
38

            
39
use paste::paste;
40
use serde::{Deserialize, Serialize};
41
use thiserror::Error;
42

            
43
#[cfg(target_family = "windows")]
44
#[cfg_attr(docsrs, doc(cfg(target_family = "windows")))]
45
pub use os::ForbiddenOnWindows;
46

            
47
/// An owned slug, checked for syntax
48
///
49
/// The syntax check can be relied on for safety/soundness.
50
// We adopt this rule so that eventually we could have AsRef<[std::ascii::Char]>, etc.
51
4
#[derive(Debug, Clone, Serialize, Deserialize)] //
52
#[derive(Eq, PartialEq, Ord, PartialOrd, Hash)] //
53
#[derive(derive_more::Display)]
54
#[serde(try_from = "String", into = "String")]
55
// Box<str> since we don't expect to change the size; that makes it 2 words rather than 3
56
// (But our public APIs are in terms of String.)
57
pub struct Slug(Box<str>);
58

            
59
/// A borrwed slug, checked for syntax
60
///
61
/// The syntax check can be relied on for safety/soundness.
62
#[derive(Debug, Serialize)] //
63
#[derive(Eq, PartialEq, Ord, PartialOrd, Hash)] //
64
#[derive(derive_more::Display)]
65
#[serde(transparent)]
66
#[repr(transparent)] // SAFETY: this attribute is needed for unsafe in new_unchecked
67
pub struct SlugRef(str);
68

            
69
/// Characters which are good to use to separate slugs
70
///
71
/// Guaranteed to never overlap with the valid slug character set.
72
///
73
/// We might expand this set, but not ever reduce it.
74
pub const SLUG_SEPARATOR_CHARS: &str = "/+.";
75

            
76
/// Error for an invalid slug
77
#[derive(Error, Debug, Clone, Eq, PartialEq, Hash)]
78
#[non_exhaustive]
79
pub enum BadSlug {
80
    /// Slug contains a forbidden character
81
    BadCharacter(char),
82
    /// Slug starts with a disallowed character
83
    BadFirstCharacter(char),
84
    /// An empty slug was supplied where a nonempty one is required
85
    EmptySlugNotAllowed,
86
    /// We are on Windows and the slug is one of the forbidden ones
87
    ///
88
    /// On platforms other than Windows, this variant is absent.
89
    #[cfg_attr(docsrs, doc(cfg(target_family = "windows")))]
90
    #[cfg(target_family = "windows")]
91
    ForbiddenOnWindows(ForbiddenOnWindows),
92
}
93

            
94
/// Types which can perhaps be used as a slug
95
///
96
/// This is a trait implemented by `str`, `std::fmt::Arguments`,
97
/// and other implementors of `ToString`, for the convenience of call sites:
98
/// APIs can have functions taking an `&(impl TryIntoSlug + ?Sized)` or `&dyn TryIntoSlug`
99
/// and callers then don't need error-handling boilerplate.
100
///
101
/// Functions that take a `TryIntoSlug` will need to do a runtime syntax check.
102
pub trait TryIntoSlug {
103
    /// Convert `self` into a `Slug`, if it has the right syntax
104
    fn try_into_slug(&self) -> Result<Slug, BadSlug>;
105
}
106

            
107
impl<T: ToString + ?Sized> TryIntoSlug for T {
108
102
    fn try_into_slug(&self) -> Result<Slug, BadSlug> {
109
102
        self.to_string().try_into()
110
102
    }
111
}
112

            
113
impl Slug {
114
    /// Make a Slug out of an owned `String`, if it has the correct syntax
115
50158
    pub fn new(s: String) -> Result<Slug, BadSlug> {
116
50158
        Ok(unsafe {
117
50158
            // SAFETY: we check, and then call new_unchecked
118
50158
            check_syntax(&s)?;
119
50085
            Slug::new_unchecked(s)
120
        })
121
50158
    }
122

            
123
    /// Make a Slug out of an owned `String`, without checking the syntax
124
    ///
125
    /// # Safety
126
    ///
127
    /// It's the caller's responsibility to check the syntax of the input string.
128
50387
    pub unsafe fn new_unchecked(s: String) -> Slug {
129
50387
        Slug(s.into())
130
50387
    }
131
}
132

            
133
impl SlugRef {
134
    /// Make a SlugRef out of a `str`, if it has the correct syntax
135
1880
    pub fn new(s: &str) -> Result<&SlugRef, BadSlug> {
136
1880
        Ok(unsafe {
137
1880
            // SAFETY: we check, and then call new_unchecked
138
1880
            check_syntax(s)?;
139
1854
            SlugRef::new_unchecked(s)
140
        })
141
1880
    }
142

            
143
    /// Make a SlugRef out of a `str`, without checking the syntax
144
    ///
145
    /// # Safety
146
    ///
147
    /// It's the caller's responsibility to check the syntax of the input string.
148
42702
    pub unsafe fn new_unchecked<'s>(s: &'s str) -> &'s SlugRef {
149
42702
        unsafe {
150
42702
            // SAFETY
151
42702
            // SlugRef is repr(transparent).  So the alignment and memory layout
152
42702
            // are the same, and the pointer metadata is the same too.
153
42702
            // The lifetimes is correct by construction.
154
42702
            //
155
42702
            // We do this, rather than `struct SlugRef<'r>(&'r str)`,
156
42702
            // because that way we couldn't impl Deref.
157
42702
            mem::transmute::<&'s str, &'s SlugRef>(s)
158
42702
        }
159
42702
    }
160

            
161
    /// Make an owned `Slug`
162
302
    fn to_slug(&self) -> Slug {
163
302
        unsafe {
164
302
            // SAFETY: self is a SlugRef so our syntax is right
165
302
            Slug::new_unchecked(self.0.into())
166
302
        }
167
302
    }
168
}
169

            
170
impl TryFrom<String> for Slug {
171
    type Error = BadSlug;
172
26544
    fn try_from(s: String) -> Result<Slug, BadSlug> {
173
26544
        Slug::new(s)
174
26544
    }
175
}
176

            
177
impl From<Slug> for String {
178
47
    fn from(s: Slug) -> String {
179
47
        s.0.into()
180
47
    }
181
}
182

            
183
impl<'s> TryFrom<&'s str> for &'s SlugRef {
184
    type Error = BadSlug;
185
    fn try_from(s: &'s str) -> Result<&'s SlugRef, BadSlug> {
186
        SlugRef::new(s)
187
    }
188
}
189

            
190
impl Deref for Slug {
191
    type Target = SlugRef;
192
40848
    fn deref(&self) -> &SlugRef {
193
40848
        unsafe {
194
40848
            // SAFETY: self is a Slug so our syntax is right
195
40848
            SlugRef::new_unchecked(&self.0)
196
40848
        }
197
40848
    }
198
}
199

            
200
impl Borrow<SlugRef> for Slug {
201
480
    fn borrow(&self) -> &SlugRef {
202
480
        self
203
480
    }
204
}
205
impl Borrow<str> for Slug {
206
    fn borrow(&self) -> &str {
207
        self.as_ref()
208
    }
209
}
210

            
211
impl ToOwned for SlugRef {
212
    type Owned = Slug;
213
302
    fn to_owned(&self) -> Slug {
214
302
        self.to_slug()
215
302
    }
216
}
217

            
218
/// Implement `fn as_...(&self) -> ...` and `AsRef`
219
macro_rules! impl_as_with_inherent { { $ty:ident } => { paste!{
220
    impl SlugRef {
221
39975
        #[doc = concat!("Obtain this slug as a `", stringify!($ty), "`")]
222
39975
        pub fn [<as_ $ty:snake>](&self) -> &$ty {
223
39975
            self.as_ref()
224
39975
        }
225
    }
226
    impl_as_ref!($ty);
227
} } }
228
/// Implement `AsRef`
229
macro_rules! impl_as_ref { { $ty:ty } => { paste!{
230
    impl AsRef<$ty> for SlugRef {
231
41870
        fn as_ref(&self) -> &$ty {
232
41870
            self.0.as_ref()
233
41870
        }
234
    }
235
    impl AsRef<$ty> for Slug {
236
519
        fn as_ref(&self) -> &$ty {
237
519
            self.deref().as_ref()
238
519
        }
239
    }
240
} } }
241

            
242
impl_as_with_inherent!(str);
243
impl_as_with_inherent!(Path);
244
impl_as_ref!(OsStr);
245
impl_as_ref!([u8]);
246

            
247
/// Check the string `s` to see if it would be valid as a slug
248
///
249
/// This is a low-level method for special cases.
250
/// Usually, use [`Slug::new`] etc.
251
//
252
// SAFETY
253
// This function checks the syntax, and is relied on by unsafe code
254
#[allow(clippy::if_same_then_else)] // clippy objects to the repeated Ok(())
255
135198
pub fn check_syntax(s: &str) -> Result<(), BadSlug> {
256
135198
    if s.is_empty() {
257
280
        return Err(BadSlug::EmptySlugNotAllowed);
258
134918
    }
259
134918

            
260
134918
    // Slugs are not allowed to start with a hyphen.
261
134918
    if s.starts_with('-') {
262
409
        return Err(BadSlug::BadFirstCharacter('-'));
263
134509
    }
264

            
265
    // check legal character set
266
1850061
    for c in s.chars() {
267
1850061
        if c.is_ascii_lowercase() {
268
1031338
            Ok(())
269
818723
        } else if c.is_ascii_digit() {
270
766337
            Ok(())
271
52386
        } else if c == '_' || c == '-' {
272
51491
            Ok(())
273
        } else {
274
895
            Err(BadSlug::BadCharacter(c))
275
895
        }?;
276
    }
277

            
278
133614
    os::check_forbidden(s)?;
279

            
280
133614
    Ok(())
281
135198
}
282

            
283
impl Display for BadSlug {
284
57
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
285
57
        match self {
286
53
            BadSlug::BadCharacter(c) => {
287
53
                let num = u32::from(*c);
288
53
                write!(f, "character {c:?} (U+{num:04X}) is not allowed")
289
            }
290
2
            BadSlug::BadFirstCharacter(c) => {
291
2
                let num = u32::from(*c);
292
2
                write!(
293
2
                    f,
294
2
                    "character {c:?} (U+{num:04X}) is not allowed as the first character"
295
2
                )
296
            }
297
            BadSlug::EmptySlugNotAllowed => {
298
2
                write!(f, "empty identifier (empty slug) not allowed")
299
            }
300
            #[cfg(target_family = "windows")]
301
            BadSlug::ForbiddenOnWindows(e) => os::fmt_error(e, f),
302
        }
303
57
    }
304
}
305

            
306
/// Forbidden slug support for Windows
307
#[cfg(target_family = "windows")]
308
mod os {
309
    use super::*;
310

            
311
    /// A slug which is forbidden because we are on Windows (as found in an invalid slug error)
312
    ///
313
    /// This type is available only on Windows platforms.
314
    //
315
    // Double reference so that BadSlug has to contain only one word, not two
316
    pub type ForbiddenOnWindows = &'static &'static str;
317

            
318
    /// The forbidden slugs - windows thinks "C:\\Program Files\lpt0.json" is a printer.
319
    const FORBIDDEN: &[&str] = &[
320
        "con", "prn", "aux", "nul", //
321
        "com1", "com2", "com3", "com4", "com5", "com6", "com7", "com8", "com9", "com0", //
322
        "lpt1", "lpt2", "lpt3", "lpt4", "lpt5", "lpt6", "lpt7", "lpt8", "lpt9", "lpt0",
323
    ];
324

            
325
    /// Check whether this slug is forbidden here
326
    pub(super) fn check_forbidden(s: &str) -> Result<(), BadSlug> {
327
        for bad in FORBIDDEN {
328
            if s == *bad {
329
                return Err(BadSlug::ForbiddenOnWindows(bad));
330
            }
331
        }
332
        Ok(())
333
    }
334

            
335
    /// Display a forbidden slug error
336
    pub(super) fn fmt_error(s: &ForbiddenOnWindows, f: &mut fmt::Formatter) -> fmt::Result {
337
        write!(f, "slug (name) {s:?} is not allowed on Windows")
338
    }
339
}
340
/// Forbidden slug support for non-Windows
341
#[cfg(not(target_family = "windows"))]
342
mod os {
343
    use super::*;
344

            
345
    /// Check whether this slug is forbidden here
346
    #[allow(clippy::unnecessary_wraps)]
347
133614
    pub(super) fn check_forbidden(_s: &str) -> Result<(), BadSlug> {
348
133614
        Ok(())
349
133614
    }
350
}
351

            
352
#[cfg(test)]
353
mod test {
354
    // @@ begin test lint list maintained by maint/add_warning @@
355
    #![allow(clippy::bool_assert_comparison)]
356
    #![allow(clippy::clone_on_copy)]
357
    #![allow(clippy::dbg_macro)]
358
    #![allow(clippy::mixed_attributes_style)]
359
    #![allow(clippy::print_stderr)]
360
    #![allow(clippy::print_stdout)]
361
    #![allow(clippy::single_char_pattern)]
362
    #![allow(clippy::unwrap_used)]
363
    #![allow(clippy::unchecked_duration_subtraction)]
364
    #![allow(clippy::useless_vec)]
365
    #![allow(clippy::needless_pass_by_value)]
366
    //! <!-- @@ end test lint list maintained by maint/add_warning @@ -->
367

            
368
    use super::*;
369
    use itertools::chain;
370

            
371
    #[test]
372
    fn bad() {
373
        for c in chain!(
374
            SLUG_SEPARATOR_CHARS.chars(), //
375
            ['\\', ' ', '\n', '\0']
376
        ) {
377
            let s = format!("x{c}y");
378
            let e_ref = SlugRef::new(&s).unwrap_err();
379
            assert_eq!(e_ref, BadSlug::BadCharacter(c));
380
            let e_own = Slug::new(s).unwrap_err();
381
            assert_eq!(e_ref, e_own);
382
        }
383
    }
384

            
385
    #[test]
386
    fn good() {
387
        let all = chain!(
388
            b'a'..=b'z', //
389
            b'0'..=b'9',
390
            [b'_'],
391
        )
392
        .map(char::from);
393

            
394
        let chk = |s: String| {
395
            let sref = SlugRef::new(&s).unwrap();
396
            let slug = Slug::new(s.clone()).unwrap();
397
            assert_eq!(sref.to_string(), s);
398
            assert_eq!(slug.to_string(), s);
399
        };
400

            
401
        chk(all.clone().collect());
402

            
403
        for c in all {
404
            chk(format!("{c}"));
405
        }
406

            
407
        // Hyphens are allowed, but not as the first character
408
        chk("a-".into());
409
        chk("a-b".into());
410
    }
411

            
412
    #[test]
413
    fn badchar_msg() {
414
        let chk = |s: &str, m: &str| {
415
            assert_eq!(
416
                SlugRef::new(s).unwrap_err().to_string(),
417
                m, //
418
            );
419
        };
420

            
421
        chk(".", "character '.' (U+002E) is not allowed");
422
        chk("\0", "character '\\0' (U+0000) is not allowed");
423
        chk(
424
            "\u{12345}",
425
            "character '\u{12345}' (U+12345) is not allowed",
426
        );
427
        chk(
428
            "-",
429
            "character '-' (U+002D) is not allowed as the first character",
430
        );
431
        chk("A", "character 'A' (U+0041) is not allowed");
432
    }
433

            
434
    #[test]
435
    fn windows_forbidden() {
436
        for s in ["con", "prn", "lpt0"] {
437
            let r = SlugRef::new(s);
438
            if cfg!(target_family = "windows") {
439
                assert_eq!(
440
                    r.unwrap_err().to_string(),
441
                    format!("slug (name) \"{s}\" is not allowed on Windows"),
442
                );
443
            } else {
444
                assert_eq!(r.unwrap().as_str(), s);
445
            }
446
        }
447
    }
448

            
449
    #[test]
450
    fn empty_slug() {
451
        assert_eq!(
452
            SlugRef::new("").unwrap_err().to_string(),
453
            "empty identifier (empty slug) not allowed"
454
        );
455
    }
456
}