1
//! `ConfigurationSources`: Helper for handling configuration files
2
//!
3
//! This module provides [`ConfigurationSources`].
4
//!
5
//! This layer brings together the functionality of
6
//! our underlying configuration library,
7
//! [`fs_mistrust`] and [`tor_config::cmdline`](crate::cmdline).
8
//!
9
//! A `ConfigurationSources` records a set of filenames of TOML files,
10
//! ancillary instructions for reading them,
11
//! and also a set of command line options.
12
//!
13
//! Usually, call [`ConfigurationSources::from_cmdline`],
14
//! perhaps [`set_mistrust`](ConfigurationSources::set_mistrust),
15
//! and finally [`load`](ConfigurationSources::load).
16
//! The resulting [`ConfigurationTree`] can then be deserialized.
17
//!
18
//! If you want to watch for config file changes,
19
//! use [`ConfigurationSources::scan()`],
20
//! to obtain a [`FoundConfigFiles`],
21
//! start watching the paths returned by [`FoundConfigFiles::iter()`],
22
//! and then call [`FoundConfigFiles::load()`].
23
//! (This ordering starts watching the files before you read them,
24
//! which is necessary to avoid possibly missing changes.)
25

            
26
use std::ffi::OsString;
27
use std::{fs, io, sync::Arc};
28

            
29
use figment::Figment;
30
use void::ResultVoidExt as _;
31

            
32
use crate::err::ConfigError;
33
use crate::{CmdLine, ConfigurationTree};
34

            
35
use std::path::{Path, PathBuf};
36

            
37
/// A description of where to find our configuration options.
38
#[derive(Clone, Debug, Default)]
39
pub struct ConfigurationSources {
40
    /// List of files to read (in order).
41
    files: Vec<(ConfigurationSource, MustRead)>,
42
    /// A list of command-line options to apply after parsing the files.
43
    options: Vec<String>,
44
    /// We will check all files we read
45
    mistrust: fs_mistrust::Mistrust,
46
}
47

            
48
/// Rules for whether we should proceed if a configuration file is unreadable.
49
///
50
/// Some files (like the default configuration file) are okay to skip if they
51
/// aren't present. Others (like those specified on the command line) really
52
/// need to be there.
53
#[derive(Clone, Debug, Copy, Eq, PartialEq)]
54
#[allow(clippy::exhaustive_enums)]
55
pub enum MustRead {
56
    /// This file is okay to skip if it isn't present,
57
    TolerateAbsence,
58

            
59
    /// This file must be present and readable.
60
    MustRead,
61
}
62

            
63
/// A configuration file or directory, for use by a `ConfigurationSources`
64
///
65
/// You can make one out of a `PathBuf`, examining its syntax like `arti` does,
66
/// using `ConfigurationSource::from_path`.
67
#[derive(Clone, Debug, Eq, PartialEq, Hash, Ord, PartialOrd)]
68
#[allow(clippy::exhaustive_enums)]
69
pub enum ConfigurationSource {
70
    /// A plain file
71
    File(PathBuf),
72

            
73
    /// A directory
74
    Dir(PathBuf),
75

            
76
    /// A verbatim TOML file
77
    Verbatim(Arc<String>),
78
}
79

            
80
impl ConfigurationSource {
81
    /// Interpret a path (or string) as a configuration file or directory spec
82
    ///
83
    /// If the path syntactically specifies a directory
84
    /// (i.e., can be seen to be a directory without accessing the filesystem,
85
    /// for example because it ends in a directory separator such as `/`)
86
    /// it is treated as specifying a directory.
87
620
    pub fn from_path<P: Into<PathBuf>>(p: P) -> ConfigurationSource {
88
        use ConfigurationSource as CS;
89
620
        let p = p.into();
90
620
        if is_syntactically_directory(&p) {
91
232
            CS::Dir(p)
92
        } else {
93
388
            CS::File(p)
94
        }
95
620
    }
96

            
97
    /// Use the provided text as verbatim TOML, as if it had been read from disk.
98
334
    pub fn from_verbatim(text: String) -> ConfigurationSource {
99
334
        Self::Verbatim(Arc::new(text))
100
334
    }
101

            
102
    /// Return a reference to the inner `Path`, if there is one.
103
70
    pub fn as_path(&self) -> Option<&Path> {
104
        use ConfigurationSource as CS;
105
70
        match self {
106
70
            CS::File(p) | CS::Dir(p) => Some(p),
107
            CS::Verbatim(_) => None,
108
        }
109
70
    }
110
}
111

            
112
/// Configuration files and directories we found in the filesystem
113
///
114
/// Result of [`ConfigurationSources::scan`].
115
///
116
/// When loading configuration files and also watching for filesystem updates,
117
/// this type encapsulates all the actual filesystem objects that need watching.
118
#[derive(Debug)]
119
pub struct FoundConfigFiles<'srcs> {
120
    /// The things we found
121
    ///
122
    /// This includes both:
123
    ///  * Files which ought to be read
124
    ///  * Directories, which may or may not contain any currently-relevant files
125
    ///
126
    /// The directories are retained for the purpose of watching for config changes:
127
    /// we will want to detect files being created within them,
128
    /// so our caller needs to discover them (via [`FoundConfigFiles::iter()`]).
129
    files: Vec<FoundConfigFile>,
130

            
131
    /// Our parent, which contains details we need for `load`
132
    sources: &'srcs ConfigurationSources,
133
}
134

            
135
/// A configuration source file or directory, found or not found on the filesystem
136
#[derive(Debug, Clone)]
137
struct FoundConfigFile {
138
    /// The path of the (putative) object
139
    source: ConfigurationSource,
140

            
141
    /// Were we expecting this to definitely exist
142
    must_read: MustRead,
143
}
144

            
145
impl ConfigurationSources {
146
    /// Create a new empty [`ConfigurationSources`].
147
548
    pub fn new_empty() -> Self {
148
548
        Self::default()
149
548
    }
150

            
151
    /// Establish a [`ConfigurationSources`] the from an infallible command line and defaults
152
    ///
153
    /// Convenience method for if the default config file location(s) can be infallibly computed.
154
4
    pub fn from_cmdline<F, O>(
155
4
        default_config_files: impl IntoIterator<Item = ConfigurationSource>,
156
4
        config_files_options: impl IntoIterator<Item = F>,
157
4
        cmdline_toml_override_options: impl IntoIterator<Item = O>,
158
4
    ) -> Self
159
4
    where
160
4
        F: Into<PathBuf>,
161
4
        O: Into<String>,
162
4
    {
163
4
        ConfigurationSources::try_from_cmdline(
164
4
            || Ok(default_config_files),
165
4
            config_files_options,
166
4
            cmdline_toml_override_options,
167
4
        )
168
4
        .void_unwrap()
169
4
    }
170

            
171
    /// Establish a [`ConfigurationSources`] the usual way from a command line and defaults
172
    ///
173
    /// The caller should have parsed the program's command line, and extracted (inter alia)
174
    ///
175
    ///  * `config_files_options`: Paths of config file(s) (or directories of `.toml` files)
176
    ///  * `cmdline_toml_override_options`: Overrides ("key=value")
177
    ///
178
    /// The caller should also provide `default_config_files`,
179
    /// which returns the default locations of the configuration files.
180
    /// This used if no file(s) are specified on the command line.
181
    //
182
    // The other inputs are always used and therefore
183
    // don't need to be lifted into FnOnce() -> Result.
184
    ///
185
    /// `mistrust` is used to check whether the configuration files have appropriate permissions.
186
    ///
187
    /// `ConfigurationSource::Dir`s
188
    /// will be scanned for files whose name ends in `.toml`.
189
    /// All those files (if any) will be read (in lexical order by filename).
190
34
    pub fn try_from_cmdline<F, O, DEF, E>(
191
34
        default_config_files: impl FnOnce() -> Result<DEF, E>,
192
34
        config_files_options: impl IntoIterator<Item = F>,
193
34
        cmdline_toml_override_options: impl IntoIterator<Item = O>,
194
34
    ) -> Result<Self, E>
195
34
    where
196
34
        F: Into<PathBuf>,
197
34
        O: Into<String>,
198
34
        DEF: IntoIterator<Item = ConfigurationSource>,
199
34
    {
200
34
        let mut cfg_sources = ConfigurationSources::new_empty();
201
34

            
202
34
        let mut any_files = false;
203
68
        for f in config_files_options {
204
34
            let f = f.into();
205
34
            cfg_sources.push_source(ConfigurationSource::from_path(f), MustRead::MustRead);
206
34
            any_files = true;
207
34
        }
208
34
        if !any_files {
209
2
            for default in default_config_files()? {
210
2
                cfg_sources.push_source(default, MustRead::TolerateAbsence);
211
2
            }
212
32
        }
213

            
214
42
        for s in cmdline_toml_override_options {
215
8
            cfg_sources.push_option(s);
216
8
        }
217

            
218
34
        Ok(cfg_sources)
219
34
    }
220

            
221
    /// Add `src` to the list of files or directories that we want to read configuration from.
222
    ///
223
    /// Configuration files are loaded and applied in the order that they are
224
    /// added to this object.
225
    ///
226
    /// If the listed file is absent, loading the configuration won't succeed.
227
520
    pub fn push_source(&mut self, src: ConfigurationSource, must_read: MustRead) {
228
520
        self.files.push((src, must_read));
229
520
    }
230

            
231
    /// Add `s` to the list of overridden options to apply to our configuration.
232
    ///
233
    /// Options are applied after all configuration files are loaded, in the
234
    /// order that they are added to this object.
235
    ///
236
    /// The format for `s` is as in [`CmdLine`].
237
8
    pub fn push_option(&mut self, option: impl Into<String>) {
238
8
        self.options.push(option.into());
239
8
    }
240

            
241
    /// Sets the filesystem permission mistrust
242
150
    pub fn set_mistrust(&mut self, mistrust: fs_mistrust::Mistrust) {
243
150
        self.mistrust = mistrust;
244
150
    }
245

            
246
    /// Reads the filesystem permission mistrust
247
    pub fn mistrust(&self) -> &fs_mistrust::Mistrust {
248
        &self.mistrust
249
    }
250

            
251
    /// Scan for files and load the configuration into a new [`ConfigurationTree`].
252
    ///
253
    /// This is a convenience method for [`scan()`](Self::scan)
254
    /// followed by [`files.load`].
255
522
    pub fn load(&self) -> Result<ConfigurationTree, ConfigError> {
256
522
        let files = self.scan()?;
257
520
        files.load()
258
522
    }
259

            
260
    /// Scan for configuration source files (including scanning any directories)
261
704
    pub fn scan(&self) -> Result<FoundConfigFiles, ConfigError> {
262
704
        let mut out = vec![];
263

            
264
1384
        for &(ref source, must_read) in &self.files {
265
682
            let required = must_read == MustRead::MustRead;
266
682

            
267
682
            // Returns Err(error) if we should bail,
268
682
            // or Ok(()) if we should ignore the error and skip the file.
269
684
            let handle_io_error = |e: io::Error, p: &Path| {
270
4
                if e.kind() == io::ErrorKind::NotFound && !required {
271
2
                    Result::<_, crate::ConfigError>::Ok(())
272
                } else {
273
2
                    Err(crate::ConfigError::Io {
274
2
                        action: "reading",
275
2
                        path: p.to_owned(),
276
2
                        err: Arc::new(e),
277
2
                    })
278
                }
279
704
            };
280
702

            
281
702
            use ConfigurationSource as CS;
282
682
            match &source {
283
186
                CS::Dir(dirname) => {
284
186
                    let dir = match fs::read_dir(dirname) {
285
182
                        Ok(y) => y,
286
4
                        Err(e) => {
287
4
                            handle_io_error(e, dirname.as_ref())?;
288
2
                            continue;
289
                        }
290
                    };
291
182
                    out.push(FoundConfigFile {
292
182
                        source: source.clone(),
293
182
                        must_read,
294
182
                    });
295
182
                    // Rebinding `found` avoids using the directory name by mistake.
296
182
                    let mut entries = vec![];
297
486
                    for found in dir {
298
                        // reuse map_io_err, which embeds the directory name,
299
                        // since if we have Err we don't have an entry name.
300
304
                        let found = match found {
301
304
                            Ok(y) => y,
302
                            Err(e) => {
303
                                handle_io_error(e, dirname.as_ref())?;
304
                                continue;
305
                            }
306
                        };
307
304
                        let leaf = found.file_name();
308
304
                        let leaf: &Path = leaf.as_ref();
309
304
                        match leaf.extension() {
310
302
                            Some(e) if e == "toml" => {}
311
2
                            _ => continue,
312
                        }
313
302
                        entries.push(found.path());
314
                    }
315
182
                    entries.sort();
316
689
                    out.extend(entries.into_iter().map(|path| FoundConfigFile {
317
682
                        source: CS::File(path),
318
682
                        must_read: MustRead::TolerateAbsence,
319
689
                    }));
320
182
                }
321
496
                CS::File(_) | CS::Verbatim(_) => {
322
496
                    out.push(FoundConfigFile {
323
496
                        source: source.clone(),
324
496
                        must_read,
325
496
                    });
326
496
                }
327
            }
328
        }
329

            
330
702
        Ok(FoundConfigFiles {
331
702
            files: out,
332
702
            sources: self,
333
702
        })
334
704
    }
335
}
336

            
337
impl FoundConfigFiles<'_> {
338
    /// Iterate over the filesystem objects that the scan found
339
    //
340
    // This ought really to be `impl IntoIterator for &Self` but that's awkward without TAIT
341
182
    pub fn iter(&self) -> impl Iterator<Item = &ConfigurationSource> {
342
213
        self.files.iter().map(|f| &f.source)
343
182
    }
344

            
345
    /// Add every file and commandline source to `builder`, returning a new
346
    /// builder.
347
672
    fn add_sources(self, mut builder: Figment) -> Result<Figment, ConfigError> {
348
        use figment::providers::Format;
349

            
350
        // Note that we're using `merge` here.  It causes later sources' options
351
        // to replace those in earlier sources, and causes arrays to be replaced
352
        // rather than extended.
353
        //
354
        // TODO #1337: This array behavior is not necessarily ideal for all
355
        // cases, but doing something smarter would probably require us to hack
356
        // figment-rs or toml.
357

            
358
1592
        for FoundConfigFile { source, must_read } in self.files {
359
430
            use ConfigurationSource as CS;
360

            
361
920
            let required = must_read == MustRead::MustRead;
362

            
363
920
            let file = match source {
364
434
                CS::File(file) => file,
365
152
                CS::Dir(_) => continue,
366
334
                CS::Verbatim(text) => {
367
334
                    builder = builder.merge(figment::providers::Toml::string(&text));
368
334
                    continue;
369
                }
370
            };
371

            
372
434
            match self
373
434
                .sources
374
434
                .mistrust
375
434
                .verifier()
376
434
                .permit_readable()
377
434
                .check(&file)
378
            {
379
430
                Ok(()) => {}
380
4
                Err(fs_mistrust::Error::NotFound(_)) if !required => {
381
4
                    continue;
382
                }
383
                Err(e) => return Err(ConfigError::FileAccess(e)),
384
            }
385

            
386
            // We use file_exact here so that figment won't look in parent
387
            // directories if the target file can't be found.
388
430
            let f = figment::providers::Toml::file_exact(file);
389
430
            builder = builder.merge(f);
390
        }
391

            
392
672
        let mut cmdline = CmdLine::new();
393
674
        for opt in &self.sources.options {
394
2
            cmdline.push_toml_line(opt.clone());
395
2
        }
396
672
        builder = builder.merge(cmdline);
397
672

            
398
672
        Ok(builder)
399
672
    }
400

            
401
    /// Load the configuration into a new [`ConfigurationTree`].
402
672
    pub fn load(self) -> Result<ConfigurationTree, ConfigError> {
403
672
        let mut builder = Figment::new();
404
672
        builder = self.add_sources(builder)?;
405

            
406
672
        Ok(ConfigurationTree(builder))
407
672
    }
408
}
409

            
410
/// Does it end in a slash?  (Or some other way of saying this is a directory.)
411
748
fn is_syntactically_directory(p: &Path) -> bool {
412
    use std::path::Component as PC;
413

            
414
748
    match p.components().next_back() {
415
2
        None => false,
416
12
        Some(PC::Prefix(_)) | Some(PC::RootDir) | Some(PC::CurDir) | Some(PC::ParentDir) => true,
417
        Some(PC::Normal(_)) => {
418
            // Does it end in a slash?
419
734
            let l = p.components().count();
420
734

            
421
734
            // stdlib doesn't let us tell if the thing ends in a path separator.
422
734
            // components() normalises, so doesn't give us an empty component
423
734
            // But, if it ends in a path separator, adding a path component char will
424
734
            // mean adding a component.
425
734
            // This will work regardless of the path separator, on any platform where
426
734
            // paths naming directories are like those for files.
427
734
            // It would even work on some others, eg VMS.
428
734
            let mut appended = OsString::from(p);
429
734
            appended.push("a");
430
734
            let l2 = PathBuf::from(appended).components().count();
431
734
            l2 != l
432
        }
433
    }
434
748
}
435

            
436
#[cfg(test)]
437
mod test {
438
    // @@ begin test lint list maintained by maint/add_warning @@
439
    #![allow(clippy::bool_assert_comparison)]
440
    #![allow(clippy::clone_on_copy)]
441
    #![allow(clippy::dbg_macro)]
442
    #![allow(clippy::mixed_attributes_style)]
443
    #![allow(clippy::print_stderr)]
444
    #![allow(clippy::print_stdout)]
445
    #![allow(clippy::single_char_pattern)]
446
    #![allow(clippy::unwrap_used)]
447
    #![allow(clippy::unchecked_duration_subtraction)]
448
    #![allow(clippy::useless_vec)]
449
    #![allow(clippy::needless_pass_by_value)]
450
    //! <!-- @@ end test lint list maintained by maint/add_warning @@ -->
451

            
452
    use super::*;
453
    use itertools::Itertools;
454
    use tempfile::tempdir;
455

            
456
    static EX_TOML: &str = "
457
[hello]
458
world = \"stuff\"
459
friends = 4242
460
";
461

            
462
    /// Make a ConfigurationSources (that doesn't include the arti defaults)
463
    fn sources_nodefaults<P: AsRef<Path>>(
464
        files: &[(P, MustRead)],
465
        opts: &[String],
466
    ) -> ConfigurationSources {
467
        let mistrust = fs_mistrust::Mistrust::new_dangerously_trust_everyone();
468
        let files = files
469
            .iter()
470
            .map(|(p, m)| (ConfigurationSource::from_path(p.as_ref()), *m))
471
            .collect_vec();
472
        let options = opts.iter().cloned().collect_vec();
473
        ConfigurationSources {
474
            files,
475
            options,
476
            mistrust,
477
        }
478
    }
479

            
480
    /// Load from a set of files and option strings, without taking
481
    /// the arti defaults into account.
482
    fn load_nodefaults<P: AsRef<Path>>(
483
        files: &[(P, MustRead)],
484
        opts: &[String],
485
    ) -> Result<ConfigurationTree, crate::ConfigError> {
486
        sources_nodefaults(files, opts).load()
487
    }
488

            
489
    #[test]
490
    fn non_required_file() {
491
        let td = tempdir().unwrap();
492
        let dflt = td.path().join("a_file");
493
        let files = vec![(dflt, MustRead::TolerateAbsence)];
494
        load_nodefaults(&files, Default::default()).unwrap();
495
    }
496

            
497
    static EX2_TOML: &str = "
498
[hello]
499
world = \"nonsense\"
500
";
501

            
502
    #[test]
503
    fn both_required_and_not() {
504
        let td = tempdir().unwrap();
505
        let dflt = td.path().join("a_file");
506
        let cf = td.path().join("other_file");
507
        std::fs::write(&cf, EX2_TOML).unwrap();
508
        let files = vec![(dflt, MustRead::TolerateAbsence), (cf, MustRead::MustRead)];
509
        let c = load_nodefaults(&files, Default::default()).unwrap();
510

            
511
        assert!(c.get_string("hello.friends").is_err());
512
        assert_eq!(c.get_string("hello.world").unwrap(), "nonsense");
513
    }
514

            
515
    #[test]
516
    fn dir_with_some() {
517
        let td = tempdir().unwrap();
518
        let cf = td.path().join("1.toml");
519
        let d = td.path().join("extra.d/");
520
        let df = d.join("2.toml");
521
        let xd = td.path().join("nonexistent.d/");
522
        std::fs::create_dir(&d).unwrap();
523
        std::fs::write(&cf, EX_TOML).unwrap();
524
        std::fs::write(df, EX2_TOML).unwrap();
525
        std::fs::write(d.join("not-toml"), "SYNTAX ERROR").unwrap();
526

            
527
        let files = vec![
528
            (cf, MustRead::MustRead),
529
            (d, MustRead::MustRead),
530
            (xd.clone(), MustRead::TolerateAbsence),
531
        ];
532
        let c = sources_nodefaults(&files, Default::default());
533
        let found = c.scan().unwrap();
534

            
535
        assert_eq!(
536
            found
537
                .iter()
538
                .map(|p| p
539
                    .as_path()
540
                    .unwrap()
541
                    .strip_prefix(&td)
542
                    .unwrap()
543
                    .to_str()
544
                    .unwrap())
545
                .collect_vec(),
546
            &["1.toml", "extra.d", "extra.d/2.toml"]
547
        );
548

            
549
        let c = found.load().unwrap();
550

            
551
        assert_eq!(c.get_string("hello.friends").unwrap(), "4242");
552
        assert_eq!(c.get_string("hello.world").unwrap(), "nonsense");
553

            
554
        let files = vec![(xd, MustRead::MustRead)];
555
        let e = load_nodefaults(&files, Default::default())
556
            .unwrap_err()
557
            .to_string();
558
        assert!(dbg!(e).contains("nonexistent.d"));
559
    }
560

            
561
    #[test]
562
    fn load_two_files_with_cmdline() {
563
        let td = tempdir().unwrap();
564
        let cf1 = td.path().join("a_file");
565
        let cf2 = td.path().join("other_file");
566
        std::fs::write(&cf1, EX_TOML).unwrap();
567
        std::fs::write(&cf2, EX2_TOML).unwrap();
568
        let v = vec![(cf1, MustRead::TolerateAbsence), (cf2, MustRead::MustRead)];
569
        let v2 = vec!["other.var=present".to_string()];
570
        let c = load_nodefaults(&v, &v2).unwrap();
571

            
572
        assert_eq!(c.get_string("hello.friends").unwrap(), "4242");
573
        assert_eq!(c.get_string("hello.world").unwrap(), "nonsense");
574
        assert_eq!(c.get_string("other.var").unwrap(), "present");
575
    }
576

            
577
    #[test]
578
    fn from_cmdline() {
579
        // Try one with specified files
580
        let sources = ConfigurationSources::from_cmdline(
581
            [ConfigurationSource::from_path("/etc/loid.toml")],
582
            ["/family/yor.toml", "/family/anya.toml"],
583
            ["decade=1960", "snack=peanuts"],
584
        );
585
        let files: Vec<_> = sources
586
            .files
587
            .iter()
588
            .map(|file| file.0.as_path().unwrap().to_str().unwrap())
589
            .collect();
590
        assert_eq!(files, vec!["/family/yor.toml", "/family/anya.toml"]);
591
        assert_eq!(sources.files[0].1, MustRead::MustRead);
592
        assert_eq!(
593
            &sources.options,
594
            &vec!["decade=1960".to_owned(), "snack=peanuts".to_owned()]
595
        );
596

            
597
        // Try once with default only.
598
        let sources = ConfigurationSources::from_cmdline(
599
            [ConfigurationSource::from_path("/etc/loid.toml")],
600
            Vec::<PathBuf>::new(),
601
            ["decade=1960", "snack=peanuts"],
602
        );
603
        assert_eq!(
604
            &sources.files,
605
            &vec![(
606
                ConfigurationSource::from_path("/etc/loid.toml"),
607
                MustRead::TolerateAbsence
608
            )]
609
        );
610
    }
611

            
612
    #[test]
613
    fn dir_syntax() {
614
        let chk = |tf, s: &str| assert_eq!(tf, is_syntactically_directory(s.as_ref()), "{:?}", s);
615

            
616
        chk(false, "");
617
        chk(false, "1");
618
        chk(false, "1/2");
619
        chk(false, "/1");
620
        chk(false, "/1/2");
621

            
622
        chk(true, "/");
623
        chk(true, ".");
624
        chk(true, "./");
625
        chk(true, "..");
626
        chk(true, "../");
627
        chk(true, "/");
628
        chk(true, "1/");
629
        chk(true, "1/2/");
630
        chk(true, "/1/");
631
        chk(true, "/1/2/");
632
    }
633
}