tor_config/
sources.rs

1//! `ConfigurationSources`: Helper for handling configuration files
2//!
3//! This module provides [`ConfigurationSources`].
4//!
5//! This layer brings together the functionality of
6//! our underlying configuration library,
7//! [`fs_mistrust`] and [`tor_config::cmdline`](crate::cmdline).
8//!
9//! A `ConfigurationSources` records a set of filenames of TOML files,
10//! ancillary instructions for reading them,
11//! and also a set of command line options.
12//!
13//! Usually, call [`ConfigurationSources::from_cmdline`],
14//! perhaps [`set_mistrust`](ConfigurationSources::set_mistrust),
15//! and finally [`load`](ConfigurationSources::load).
16//! The resulting [`ConfigurationTree`] can then be deserialized.
17//!
18//! If you want to watch for config file changes,
19//! use [`ConfigurationSources::scan()`],
20//! to obtain a [`FoundConfigFiles`],
21//! start watching the paths returned by [`FoundConfigFiles::iter()`],
22//! and then call [`FoundConfigFiles::load()`].
23//! (This ordering starts watching the files before you read them,
24//! which is necessary to avoid possibly missing changes.)
25
26use std::ffi::OsString;
27use std::{fs, io, sync::Arc};
28
29use figment::Figment;
30use void::ResultVoidExt as _;
31
32use crate::err::ConfigError;
33use crate::{CmdLine, ConfigurationTree};
34
35use std::path::{Path, PathBuf};
36
37/// A description of where to find our configuration options.
38#[derive(Clone, Debug, Default)]
39pub struct ConfigurationSources {
40    /// List of files to read (in order).
41    files: Vec<(ConfigurationSource, MustRead)>,
42    /// A list of command-line options to apply after parsing the files.
43    options: Vec<String>,
44    /// We will check all files we read
45    mistrust: fs_mistrust::Mistrust,
46}
47
48/// Rules for whether we should proceed if a configuration file is unreadable.
49///
50/// Some files (like the default configuration file) are okay to skip if they
51/// aren't present. Others (like those specified on the command line) really
52/// need to be there.
53#[derive(Clone, Debug, Copy, Eq, PartialEq)]
54#[allow(clippy::exhaustive_enums)]
55pub enum MustRead {
56    /// This file is okay to skip if it isn't present,
57    TolerateAbsence,
58
59    /// This file must be present and readable.
60    MustRead,
61}
62
63/// A configuration file or directory, for use by a `ConfigurationSources`
64///
65/// You can make one out of a `PathBuf`, examining its syntax like `arti` does,
66/// using `ConfigurationSource::from_path`.
67#[derive(Clone, Debug, Eq, PartialEq, Hash, Ord, PartialOrd)]
68#[allow(clippy::exhaustive_enums)]
69pub enum ConfigurationSource {
70    /// A plain file
71    File(PathBuf),
72
73    /// A directory
74    Dir(PathBuf),
75
76    /// A verbatim TOML file
77    Verbatim(Arc<String>),
78}
79
80impl ConfigurationSource {
81    /// Interpret a path (or string) as a configuration file or directory spec
82    ///
83    /// If the path syntactically specifies a directory
84    /// (i.e., can be seen to be a directory without accessing the filesystem,
85    /// for example because it ends in a directory separator such as `/`)
86    /// it is treated as specifying a directory.
87    pub fn from_path<P: Into<PathBuf>>(p: P) -> ConfigurationSource {
88        use ConfigurationSource as CS;
89        let p = p.into();
90        if is_syntactically_directory(&p) {
91            CS::Dir(p)
92        } else {
93            CS::File(p)
94        }
95    }
96
97    /// Use the provided text as verbatim TOML, as if it had been read from disk.
98    pub fn from_verbatim(text: String) -> ConfigurationSource {
99        Self::Verbatim(Arc::new(text))
100    }
101
102    /// Return a reference to the inner `Path`, if there is one.
103    pub fn as_path(&self) -> Option<&Path> {
104        use ConfigurationSource as CS;
105        match self {
106            CS::File(p) | CS::Dir(p) => Some(p),
107            CS::Verbatim(_) => None,
108        }
109    }
110}
111
112/// Configuration files and directories we found in the filesystem
113///
114/// Result of [`ConfigurationSources::scan`].
115///
116/// When loading configuration files and also watching for filesystem updates,
117/// this type encapsulates all the actual filesystem objects that need watching.
118#[derive(Debug)]
119pub struct FoundConfigFiles<'srcs> {
120    /// The things we found
121    ///
122    /// This includes both:
123    ///  * Files which ought to be read
124    ///  * Directories, which may or may not contain any currently-relevant files
125    ///
126    /// The directories are retained for the purpose of watching for config changes:
127    /// we will want to detect files being created within them,
128    /// so our caller needs to discover them (via [`FoundConfigFiles::iter()`]).
129    files: Vec<FoundConfigFile>,
130
131    /// Our parent, which contains details we need for `load`
132    sources: &'srcs ConfigurationSources,
133}
134
135/// A configuration source file or directory, found or not found on the filesystem
136#[derive(Debug, Clone)]
137struct FoundConfigFile {
138    /// The path of the (putative) object
139    source: ConfigurationSource,
140
141    /// Were we expecting this to definitely exist
142    must_read: MustRead,
143}
144
145impl ConfigurationSources {
146    /// Create a new empty [`ConfigurationSources`].
147    pub fn new_empty() -> Self {
148        Self::default()
149    }
150
151    /// Establish a [`ConfigurationSources`] the from an infallible command line and defaults
152    ///
153    /// Convenience method for if the default config file location(s) can be infallibly computed.
154    pub fn from_cmdline<F, O>(
155        default_config_files: impl IntoIterator<Item = ConfigurationSource>,
156        config_files_options: impl IntoIterator<Item = F>,
157        cmdline_toml_override_options: impl IntoIterator<Item = O>,
158    ) -> Self
159    where
160        F: Into<PathBuf>,
161        O: Into<String>,
162    {
163        ConfigurationSources::try_from_cmdline(
164            || Ok(default_config_files),
165            config_files_options,
166            cmdline_toml_override_options,
167        )
168        .void_unwrap()
169    }
170
171    /// Establish a [`ConfigurationSources`] the usual way from a command line and defaults
172    ///
173    /// The caller should have parsed the program's command line, and extracted (inter alia)
174    ///
175    ///  * `config_files_options`: Paths of config file(s) (or directories of `.toml` files)
176    ///  * `cmdline_toml_override_options`: Overrides ("key=value")
177    ///
178    /// The caller should also provide `default_config_files`,
179    /// which returns the default locations of the configuration files.
180    /// This used if no file(s) are specified on the command line.
181    //
182    // The other inputs are always used and therefore
183    // don't need to be lifted into FnOnce() -> Result.
184    ///
185    /// `mistrust` is used to check whether the configuration files have appropriate permissions.
186    ///
187    /// `ConfigurationSource::Dir`s
188    /// will be scanned for files whose name ends in `.toml`.
189    /// All those files (if any) will be read (in lexical order by filename).
190    pub fn try_from_cmdline<F, O, DEF, E>(
191        default_config_files: impl FnOnce() -> Result<DEF, E>,
192        config_files_options: impl IntoIterator<Item = F>,
193        cmdline_toml_override_options: impl IntoIterator<Item = O>,
194    ) -> Result<Self, E>
195    where
196        F: Into<PathBuf>,
197        O: Into<String>,
198        DEF: IntoIterator<Item = ConfigurationSource>,
199    {
200        let mut cfg_sources = ConfigurationSources::new_empty();
201
202        let mut any_files = false;
203        for f in config_files_options {
204            let f = f.into();
205            cfg_sources.push_source(ConfigurationSource::from_path(f), MustRead::MustRead);
206            any_files = true;
207        }
208        if !any_files {
209            for default in default_config_files()? {
210                cfg_sources.push_source(default, MustRead::TolerateAbsence);
211            }
212        }
213
214        for s in cmdline_toml_override_options {
215            cfg_sources.push_option(s);
216        }
217
218        Ok(cfg_sources)
219    }
220
221    /// Add `src` to the list of files or directories that we want to read configuration from.
222    ///
223    /// Configuration files are loaded and applied in the order that they are
224    /// added to this object.
225    ///
226    /// If the listed file is absent, loading the configuration won't succeed.
227    pub fn push_source(&mut self, src: ConfigurationSource, must_read: MustRead) {
228        self.files.push((src, must_read));
229    }
230
231    /// Add `s` to the list of overridden options to apply to our configuration.
232    ///
233    /// Options are applied after all configuration files are loaded, in the
234    /// order that they are added to this object.
235    ///
236    /// The format for `s` is as in [`CmdLine`].
237    pub fn push_option(&mut self, option: impl Into<String>) {
238        self.options.push(option.into());
239    }
240
241    /// Sets the filesystem permission mistrust
242    ///
243    /// This value only indicates whether and how to check permissions
244    /// on the configuration file itself.
245    /// It *does not* specify whether and how to check permissions of the
246    /// paths provided within.
247    /// This is defined by the `storage.permissions.dangerously_trust_everyone` flag.
248    pub fn set_mistrust(&mut self, mistrust: fs_mistrust::Mistrust) {
249        self.mistrust = mistrust;
250    }
251
252    /// Reads the filesystem permission mistrust
253    ///
254    /// This value only indicates whether and how to check permissions
255    /// on the configuration file itself.
256    /// It *does not* specify whether and how to check permissions of the
257    /// paths provided within.
258    /// This is defined by the `storage.permissions.dangerously_trust_everyone` flag.
259    pub fn mistrust(&self) -> &fs_mistrust::Mistrust {
260        &self.mistrust
261    }
262
263    /// Scan for files and load the configuration into a new [`ConfigurationTree`].
264    ///
265    /// This is a convenience method for [`scan()`](Self::scan)
266    /// followed by [`files.load`].
267    pub fn load(&self) -> Result<ConfigurationTree, ConfigError> {
268        let files = self.scan()?;
269        files.load()
270    }
271
272    /// Scan for configuration source files (including scanning any directories)
273    pub fn scan(&self) -> Result<FoundConfigFiles, ConfigError> {
274        let mut out = vec![];
275
276        for &(ref source, must_read) in &self.files {
277            let required = must_read == MustRead::MustRead;
278
279            // Returns Err(error) if we should bail,
280            // or Ok(()) if we should ignore the error and skip the file.
281            let handle_io_error = |e: io::Error, p: &Path| {
282                if e.kind() == io::ErrorKind::NotFound && !required {
283                    Result::<_, crate::ConfigError>::Ok(())
284                } else {
285                    Err(crate::ConfigError::Io {
286                        action: "reading",
287                        path: p.to_owned(),
288                        err: Arc::new(e),
289                    })
290                }
291            };
292
293            use ConfigurationSource as CS;
294            match &source {
295                CS::Dir(dirname) => {
296                    let dir = match fs::read_dir(dirname) {
297                        Ok(y) => y,
298                        Err(e) => {
299                            handle_io_error(e, dirname.as_ref())?;
300                            continue;
301                        }
302                    };
303                    out.push(FoundConfigFile {
304                        source: source.clone(),
305                        must_read,
306                    });
307                    // Rebinding `found` avoids using the directory name by mistake.
308                    let mut entries = vec![];
309                    for found in dir {
310                        // reuse map_io_err, which embeds the directory name,
311                        // since if we have Err we don't have an entry name.
312                        let found = match found {
313                            Ok(y) => y,
314                            Err(e) => {
315                                handle_io_error(e, dirname.as_ref())?;
316                                continue;
317                            }
318                        };
319                        let leaf = found.file_name();
320                        let leaf: &Path = leaf.as_ref();
321                        match leaf.extension() {
322                            Some(e) if e == "toml" => {}
323                            _ => continue,
324                        }
325                        entries.push(found.path());
326                    }
327                    entries.sort();
328                    out.extend(entries.into_iter().map(|path| FoundConfigFile {
329                        source: CS::File(path),
330                        must_read: MustRead::TolerateAbsence,
331                    }));
332                }
333                CS::File(_) | CS::Verbatim(_) => {
334                    out.push(FoundConfigFile {
335                        source: source.clone(),
336                        must_read,
337                    });
338                }
339            }
340        }
341
342        Ok(FoundConfigFiles {
343            files: out,
344            sources: self,
345        })
346    }
347}
348
349impl FoundConfigFiles<'_> {
350    /// Iterate over the filesystem objects that the scan found
351    //
352    // This ought really to be `impl IntoIterator for &Self` but that's awkward without TAIT
353    pub fn iter(&self) -> impl Iterator<Item = &ConfigurationSource> {
354        self.files.iter().map(|f| &f.source)
355    }
356
357    /// Add every file and commandline source to `builder`, returning a new
358    /// builder.
359    fn add_sources(self, mut builder: Figment) -> Result<Figment, ConfigError> {
360        use figment::providers::Format;
361
362        // Note that we're using `merge` here.  It causes later sources' options
363        // to replace those in earlier sources, and causes arrays to be replaced
364        // rather than extended.
365        //
366        // TODO #1337: This array behavior is not necessarily ideal for all
367        // cases, but doing something smarter would probably require us to hack
368        // figment-rs or toml.
369
370        for FoundConfigFile { source, must_read } in self.files {
371            use ConfigurationSource as CS;
372
373            let required = must_read == MustRead::MustRead;
374
375            let file = match source {
376                CS::File(file) => file,
377                CS::Dir(_) => continue,
378                CS::Verbatim(text) => {
379                    builder = builder.merge(figment::providers::Toml::string(&text));
380                    continue;
381                }
382            };
383
384            match self
385                .sources
386                .mistrust
387                .verifier()
388                .permit_readable()
389                .check(&file)
390            {
391                Ok(()) => {}
392                Err(fs_mistrust::Error::NotFound(_)) if !required => {
393                    continue;
394                }
395                Err(e) => return Err(ConfigError::FileAccess(e)),
396            }
397
398            // We use file_exact here so that figment won't look in parent
399            // directories if the target file can't be found.
400            let f = figment::providers::Toml::file_exact(file);
401            builder = builder.merge(f);
402        }
403
404        let mut cmdline = CmdLine::new();
405        for opt in &self.sources.options {
406            cmdline.push_toml_line(opt.clone());
407        }
408        builder = builder.merge(cmdline);
409
410        Ok(builder)
411    }
412
413    /// Load the configuration into a new [`ConfigurationTree`].
414    pub fn load(self) -> Result<ConfigurationTree, ConfigError> {
415        let mut builder = Figment::new();
416        builder = self.add_sources(builder)?;
417
418        Ok(ConfigurationTree(builder))
419    }
420}
421
422/// Does it end in a slash?  (Or some other way of saying this is a directory.)
423fn is_syntactically_directory(p: &Path) -> bool {
424    use std::path::Component as PC;
425
426    match p.components().next_back() {
427        None => false,
428        Some(PC::Prefix(_)) | Some(PC::RootDir) | Some(PC::CurDir) | Some(PC::ParentDir) => true,
429        Some(PC::Normal(_)) => {
430            // Does it end in a slash?
431            let l = p.components().count();
432
433            // stdlib doesn't let us tell if the thing ends in a path separator.
434            // components() normalises, so doesn't give us an empty component
435            // But, if it ends in a path separator, adding a path component char will
436            // mean adding a component.
437            // This will work regardless of the path separator, on any platform where
438            // paths naming directories are like those for files.
439            // It would even work on some others, eg VMS.
440            let mut appended = OsString::from(p);
441            appended.push("a");
442            let l2 = PathBuf::from(appended).components().count();
443            l2 != l
444        }
445    }
446}
447
448#[cfg(test)]
449mod test {
450    // @@ begin test lint list maintained by maint/add_warning @@
451    #![allow(clippy::bool_assert_comparison)]
452    #![allow(clippy::clone_on_copy)]
453    #![allow(clippy::dbg_macro)]
454    #![allow(clippy::mixed_attributes_style)]
455    #![allow(clippy::print_stderr)]
456    #![allow(clippy::print_stdout)]
457    #![allow(clippy::single_char_pattern)]
458    #![allow(clippy::unwrap_used)]
459    #![allow(clippy::unchecked_duration_subtraction)]
460    #![allow(clippy::useless_vec)]
461    #![allow(clippy::needless_pass_by_value)]
462    //! <!-- @@ end test lint list maintained by maint/add_warning @@ -->
463
464    use super::*;
465    use itertools::Itertools;
466    use tempfile::tempdir;
467
468    static EX_TOML: &str = "
469[hello]
470world = \"stuff\"
471friends = 4242
472";
473
474    /// Make a ConfigurationSources (that doesn't include the arti defaults)
475    fn sources_nodefaults<P: AsRef<Path>>(
476        files: &[(P, MustRead)],
477        opts: &[String],
478    ) -> ConfigurationSources {
479        let mistrust = fs_mistrust::Mistrust::new_dangerously_trust_everyone();
480        let files = files
481            .iter()
482            .map(|(p, m)| (ConfigurationSource::from_path(p.as_ref()), *m))
483            .collect_vec();
484        let options = opts.iter().cloned().collect_vec();
485        ConfigurationSources {
486            files,
487            options,
488            mistrust,
489        }
490    }
491
492    /// Load from a set of files and option strings, without taking
493    /// the arti defaults into account.
494    fn load_nodefaults<P: AsRef<Path>>(
495        files: &[(P, MustRead)],
496        opts: &[String],
497    ) -> Result<ConfigurationTree, crate::ConfigError> {
498        sources_nodefaults(files, opts).load()
499    }
500
501    #[test]
502    fn non_required_file() {
503        let td = tempdir().unwrap();
504        let dflt = td.path().join("a_file");
505        let files = vec![(dflt, MustRead::TolerateAbsence)];
506        load_nodefaults(&files, Default::default()).unwrap();
507    }
508
509    static EX2_TOML: &str = "
510[hello]
511world = \"nonsense\"
512";
513
514    #[test]
515    fn both_required_and_not() {
516        let td = tempdir().unwrap();
517        let dflt = td.path().join("a_file");
518        let cf = td.path().join("other_file");
519        std::fs::write(&cf, EX2_TOML).unwrap();
520        let files = vec![(dflt, MustRead::TolerateAbsence), (cf, MustRead::MustRead)];
521        let c = load_nodefaults(&files, Default::default()).unwrap();
522
523        assert!(c.get_string("hello.friends").is_err());
524        assert_eq!(c.get_string("hello.world").unwrap(), "nonsense");
525    }
526
527    #[test]
528    fn dir_with_some() {
529        let td = tempdir().unwrap();
530        let cf = td.path().join("1.toml");
531        let d = td.path().join("extra.d/");
532        let df = d.join("2.toml");
533        let xd = td.path().join("nonexistent.d/");
534        std::fs::create_dir(&d).unwrap();
535        std::fs::write(&cf, EX_TOML).unwrap();
536        std::fs::write(df, EX2_TOML).unwrap();
537        std::fs::write(d.join("not-toml"), "SYNTAX ERROR").unwrap();
538
539        let files = vec![
540            (cf, MustRead::MustRead),
541            (d, MustRead::MustRead),
542            (xd.clone(), MustRead::TolerateAbsence),
543        ];
544        let c = sources_nodefaults(&files, Default::default());
545        let found = c.scan().unwrap();
546
547        assert_eq!(
548            found
549                .iter()
550                .map(|p| p
551                    .as_path()
552                    .unwrap()
553                    .strip_prefix(&td)
554                    .unwrap()
555                    .to_str()
556                    .unwrap())
557                .collect_vec(),
558            &["1.toml", "extra.d", "extra.d/2.toml"]
559        );
560
561        let c = found.load().unwrap();
562
563        assert_eq!(c.get_string("hello.friends").unwrap(), "4242");
564        assert_eq!(c.get_string("hello.world").unwrap(), "nonsense");
565
566        let files = vec![(xd, MustRead::MustRead)];
567        let e = load_nodefaults(&files, Default::default())
568            .unwrap_err()
569            .to_string();
570        assert!(dbg!(e).contains("nonexistent.d"));
571    }
572
573    #[test]
574    fn load_two_files_with_cmdline() {
575        let td = tempdir().unwrap();
576        let cf1 = td.path().join("a_file");
577        let cf2 = td.path().join("other_file");
578        std::fs::write(&cf1, EX_TOML).unwrap();
579        std::fs::write(&cf2, EX2_TOML).unwrap();
580        let v = vec![(cf1, MustRead::TolerateAbsence), (cf2, MustRead::MustRead)];
581        let v2 = vec!["other.var=present".to_string()];
582        let c = load_nodefaults(&v, &v2).unwrap();
583
584        assert_eq!(c.get_string("hello.friends").unwrap(), "4242");
585        assert_eq!(c.get_string("hello.world").unwrap(), "nonsense");
586        assert_eq!(c.get_string("other.var").unwrap(), "present");
587    }
588
589    #[test]
590    fn from_cmdline() {
591        // Try one with specified files
592        let sources = ConfigurationSources::from_cmdline(
593            [ConfigurationSource::from_path("/etc/loid.toml")],
594            ["/family/yor.toml", "/family/anya.toml"],
595            ["decade=1960", "snack=peanuts"],
596        );
597        let files: Vec<_> = sources
598            .files
599            .iter()
600            .map(|file| file.0.as_path().unwrap().to_str().unwrap())
601            .collect();
602        assert_eq!(files, vec!["/family/yor.toml", "/family/anya.toml"]);
603        assert_eq!(sources.files[0].1, MustRead::MustRead);
604        assert_eq!(
605            &sources.options,
606            &vec!["decade=1960".to_owned(), "snack=peanuts".to_owned()]
607        );
608
609        // Try once with default only.
610        let sources = ConfigurationSources::from_cmdline(
611            [ConfigurationSource::from_path("/etc/loid.toml")],
612            Vec::<PathBuf>::new(),
613            ["decade=1960", "snack=peanuts"],
614        );
615        assert_eq!(
616            &sources.files,
617            &vec![(
618                ConfigurationSource::from_path("/etc/loid.toml"),
619                MustRead::TolerateAbsence
620            )]
621        );
622    }
623
624    #[test]
625    fn dir_syntax() {
626        let chk = |tf, s: &str| assert_eq!(tf, is_syntactically_directory(s.as_ref()), "{:?}", s);
627
628        chk(false, "");
629        chk(false, "1");
630        chk(false, "1/2");
631        chk(false, "/1");
632        chk(false, "/1/2");
633
634        chk(true, "/");
635        chk(true, ".");
636        chk(true, "./");
637        chk(true, "..");
638        chk(true, "../");
639        chk(true, "/");
640        chk(true, "1/");
641        chk(true, "1/2/");
642        chk(true, "/1/");
643        chk(true, "/1/2/");
644    }
645}