tor_dirmgr/
storage.rs

1//! Methods for storing and loading directory information from disk.
2//!
3//! We have code implemented for a flexible storage format based on sqlite.
4
5// (There was once a read-only format based on the C tor implementation's
6// storage: Search the git history for tor-dirmgr/src/storage/legacy.rs
7// if you ever need to reinstate it.)
8
9use tor_netdoc::doc::authcert::AuthCertKeyIds;
10use tor_netdoc::doc::microdesc::MdDigest;
11use tor_netdoc::doc::netstatus::{ConsensusFlavor, ProtoStatuses};
12
13#[cfg(feature = "routerdesc")]
14use tor_netdoc::doc::routerdesc::RdDigest;
15
16#[cfg(feature = "bridge-client")]
17pub(crate) use tor_guardmgr::bridge::BridgeConfig;
18
19use crate::docmeta::{AuthCertMeta, ConsensusMeta};
20use crate::{Error, Result};
21use std::cell::RefCell;
22use std::collections::HashMap;
23use std::fs::File;
24use std::io::Result as IoResult;
25use std::str::Utf8Error;
26use std::time::SystemTime;
27use time::Duration;
28
29pub(crate) mod sqlite;
30
31pub(crate) use sqlite::SqliteStore;
32
33/// Convenient Sized & dynamic [`Store`]
34pub(crate) type DynStore = Box<dyn Store>;
35
36/// A document returned by a directory manager.
37///
38/// This document may be in memory, or may be mapped from a cache.  It is
39/// not necessarily valid UTF-8.
40pub struct DocumentText {
41    /// The underlying InputString.  We only wrap this type to make it
42    /// opaque to other crates, so they don't have to worry about the
43    /// implementation details.
44    s: InputString,
45}
46
47impl From<InputString> for DocumentText {
48    fn from(s: InputString) -> DocumentText {
49        DocumentText { s }
50    }
51}
52
53impl AsRef<[u8]> for DocumentText {
54    fn as_ref(&self) -> &[u8] {
55        self.s.as_ref()
56    }
57}
58
59impl DocumentText {
60    /// Try to return a view of this document as a string.
61    pub(crate) fn as_str(&self) -> std::result::Result<&str, Utf8Error> {
62        self.s.as_str_impl()
63    }
64
65    /// Create a new DocumentText holding the provided string.
66    pub(crate) fn from_string(s: String) -> Self {
67        DocumentText {
68            s: InputString::Utf8(s),
69        }
70    }
71}
72
73/// An abstraction over a possible string that we've loaded or mapped from
74/// a cache.
75#[derive(Debug)]
76pub(crate) enum InputString {
77    /// A string that's been validated as UTF-8
78    Utf8(String),
79    /// A set of unvalidated bytes.
80    UncheckedBytes {
81        /// The underlying bytes
82        bytes: Vec<u8>,
83        /// Whether the bytes have been validated previously as UTF-8
84        validated: RefCell<bool>,
85    },
86    #[cfg(feature = "mmap")]
87    /// A set of memory-mapped bytes (not yet validated as UTF-8).
88    MappedBytes {
89        /// The underlying bytes
90        bytes: memmap2::Mmap,
91        /// Whether the bytes have been validated previously as UTF-8
92        validated: RefCell<bool>,
93    },
94}
95
96impl InputString {
97    /// Return a view of this InputString as a &str, if it is valid UTF-8.
98    pub(crate) fn as_str(&self) -> Result<&str> {
99        self.as_str_impl().map_err(Error::BadUtf8InCache)
100    }
101
102    /// Helper for [`Self::as_str()`], with unwrapped error type.
103    fn as_str_impl(&self) -> std::result::Result<&str, Utf8Error> {
104        // It is not necessary to re-check the UTF8 every time
105        // this function is called so remember the result
106        // we got with `validated`
107
108        match self {
109            InputString::Utf8(s) => Ok(&s[..]),
110            InputString::UncheckedBytes { bytes, validated } => {
111                if *validated.borrow() {
112                    unsafe { Ok(std::str::from_utf8_unchecked(&bytes[..])) }
113                } else {
114                    let result = std::str::from_utf8(&bytes[..])?;
115                    validated.replace(true);
116                    Ok(result)
117                }
118            }
119            #[cfg(feature = "mmap")]
120            InputString::MappedBytes { bytes, validated } => {
121                if *validated.borrow() {
122                    unsafe { Ok(std::str::from_utf8_unchecked(&bytes[..])) }
123                } else {
124                    let result = std::str::from_utf8(&bytes[..])?;
125                    validated.replace(true);
126                    Ok(result)
127                }
128            }
129        }
130    }
131    /// Try to create an [`InputString`] from an open [`File`].
132    ///
133    /// We'll try to memory-map the file if we can.  If that fails, or if we
134    /// were built without the `mmap` feature, we'll fall back to reading the
135    /// file into memory.
136    pub(crate) fn load(file: File) -> IoResult<Self> {
137        #[cfg(feature = "mmap")]
138        {
139            let mapping = unsafe {
140                // I'd rather have a safe option, but that's not possible
141                // with mmap, since other processes could in theory replace
142                // the contents of the file while we're using it.
143                memmap2::Mmap::map(&file)
144            };
145            if let Ok(bytes) = mapping {
146                return Ok(InputString::MappedBytes {
147                    bytes,
148                    validated: RefCell::new(false),
149                });
150            }
151        }
152        use std::io::{BufReader, Read};
153        let mut f = BufReader::new(file);
154        let mut result = String::new();
155        f.read_to_string(&mut result)?;
156        Ok(InputString::Utf8(result))
157    }
158}
159
160impl AsRef<[u8]> for InputString {
161    fn as_ref(&self) -> &[u8] {
162        match self {
163            InputString::Utf8(s) => s.as_ref(),
164            InputString::UncheckedBytes { bytes, .. } => &bytes[..],
165            #[cfg(feature = "mmap")]
166            InputString::MappedBytes { bytes, .. } => &bytes[..],
167        }
168    }
169}
170
171impl From<String> for InputString {
172    fn from(s: String) -> InputString {
173        InputString::Utf8(s)
174    }
175}
176
177impl From<Vec<u8>> for InputString {
178    fn from(bytes: Vec<u8>) -> InputString {
179        InputString::UncheckedBytes {
180            bytes,
181            validated: RefCell::new(false),
182        }
183    }
184}
185
186/// Configuration of expiration of each element of a [`Store`].
187pub(crate) struct ExpirationConfig {
188    /// How long to keep router descriptors.
189    ///
190    /// This timeout is measured since the publication date of the router
191    /// descriptor.
192    ///
193    /// TODO(nickm): We may want a better approach in the future; see notes in
194    /// `EXPIRATION_DEFAULTS`.
195    pub(super) router_descs: Duration,
196    /// How long to keep unlisted microdescriptors.
197    ///
198    /// This timeout counts the amount of time since a microdescriptor is no
199    /// longer listed in a live consensus. Shorter values save storage at the
200    /// expense of extra bandwidth spent re-downloading microdescriptors; higher
201    /// values save bandwidth at the expense of storage used to store old
202    /// microdescriptors that might become listed again.
203    pub(super) microdescs: Duration,
204    /// How long to keep expired authority certificate.
205    pub(super) authcerts: Duration,
206    /// How long to keep expired consensus.
207    pub(super) consensuses: Duration,
208}
209
210/// Configuration of expiration shared between [`Store`] implementations.
211pub(crate) const EXPIRATION_DEFAULTS: ExpirationConfig = {
212    ExpirationConfig {
213        // TODO: This is the value that C Tor uses here, but it may be desirable
214        // to adjust it depending on what we find in practice.  For relays,
215        // instead of looking at publication date, we might want to use an
216        // approach more similar to the "last-listed" approach taken by
217        // microdescriptors.  For bridges, we can keep descriptors for a longer
218        // time.  In either case, we may be able to discard all but the most
219        // recent descriptor from each identity.
220        router_descs: Duration::days(5),
221        // This value is a compromise between saving bandwidth (by not having to
222        // re-download microdescs) and saving space (by not having to store too
223        // many microdescs).  It's the same one that C tor uses; experiments on
224        // 2022 data suggest that it winds up using only 1% more microdesc dl
225        // bandwidth than strictly necessary, at the cost of storing 40% more
226        // microdescriptors than will be immediately useful at any given time.
227        microdescs: Duration::days(7),
228        authcerts: Duration::ZERO,
229        consensuses: Duration::days(2),
230    }
231};
232
233/// Representation of a storage.
234///
235/// When creating an instance of this [`Store`], it should try to grab the lock during
236/// initialization (`is_readonly() iff some other implementation grabbed it`).
237pub(crate) trait Store: Send + 'static {
238    /// Return true if this [`Store`] is opened in read-only mode.
239    fn is_readonly(&self) -> bool;
240    /// Try to upgrade from a read-only connection to a read-write connection.
241    ///
242    /// Return true on success; false if another process had the lock.
243    fn upgrade_to_readwrite(&mut self) -> Result<bool>;
244
245    /// Delete all completely-expired objects from the database.
246    ///
247    /// This is pretty conservative, and only removes things that are
248    /// definitely past their good-by date.
249    fn expire_all(&mut self, expiration: &ExpirationConfig) -> Result<()>;
250
251    /// Load the latest consensus from disk.
252    ///
253    /// If `pending` is given, we will only return a consensus with
254    /// the given "pending" status.  (A pending consensus doesn't have
255    /// enough descriptors yet.)  If `pending_ok` is None, we'll
256    /// return a consensus with any pending status.
257    fn latest_consensus(
258        &self,
259        flavor: ConsensusFlavor,
260        pending: Option<bool>,
261    ) -> Result<Option<InputString>>;
262    /// Return the information about the latest non-pending consensus,
263    /// including its valid-after time and digest.
264    fn latest_consensus_meta(&self, flavor: ConsensusFlavor) -> Result<Option<ConsensusMeta>>;
265    /// Try to read the consensus corresponding to the provided metadata object.
266    #[cfg(test)]
267    fn consensus_by_meta(&self, cmeta: &ConsensusMeta) -> Result<InputString>;
268    /// Try to read the consensus whose SHA3-256 digests is the provided
269    /// value, and its metadata.
270    fn consensus_by_sha3_digest_of_signed_part(
271        &self,
272        d: &[u8; 32],
273    ) -> Result<Option<(InputString, ConsensusMeta)>>;
274    /// Write a consensus to disk.
275    fn store_consensus(
276        &mut self,
277        cmeta: &ConsensusMeta,
278        flavor: ConsensusFlavor,
279        pending: bool,
280        contents: &str,
281    ) -> Result<()>;
282    /// Mark the consensus generated from `cmeta` as no longer pending.
283    fn mark_consensus_usable(&mut self, cmeta: &ConsensusMeta) -> Result<()>;
284    /// Remove the consensus generated from `cmeta`.
285    //
286    // Nothing uses this yet; removal is handled from `expire_all`.
287    #[allow(dead_code)] // see also allow on REMOVE_CONSENSUS
288    fn delete_consensus(&mut self, cmeta: &ConsensusMeta) -> Result<()>;
289
290    /// Read all of the specified authority certs from the cache.
291    fn authcerts(&self, certs: &[AuthCertKeyIds]) -> Result<HashMap<AuthCertKeyIds, String>>;
292    /// Save a list of authority certificates to the cache.
293    fn store_authcerts(&mut self, certs: &[(AuthCertMeta, &str)]) -> Result<()>;
294
295    /// Read all the microdescriptors listed in `input` from the cache.
296    fn microdescs(&self, digests: &[MdDigest]) -> Result<HashMap<MdDigest, String>>;
297    /// Store every microdescriptor in `input` into the cache, and say that
298    /// it was last listed at `when`.
299    fn store_microdescs(&mut self, digests: &[(&str, &MdDigest)], when: SystemTime) -> Result<()>;
300    /// Update the `last-listed` time of every microdescriptor in
301    /// `input` to `when` or later.
302    fn update_microdescs_listed(&mut self, digests: &[MdDigest], when: SystemTime) -> Result<()>;
303
304    /// Read all the microdescriptors listed in `input` from the cache.
305    ///
306    /// Only available when the `routerdesc` feature is present.
307    #[cfg(feature = "routerdesc")]
308    fn routerdescs(&self, digests: &[RdDigest]) -> Result<HashMap<RdDigest, String>>;
309    /// Store every router descriptors in `input` into the cache.
310    #[cfg(feature = "routerdesc")]
311    #[allow(unused)]
312    fn store_routerdescs(&mut self, digests: &[(&str, SystemTime, &RdDigest)]) -> Result<()>;
313
314    /// Look up a cached bridge descriptor.
315    #[cfg(feature = "bridge-client")]
316    fn lookup_bridgedesc(&self, bridge: &BridgeConfig) -> Result<Option<CachedBridgeDescriptor>>;
317
318    /// Store a cached bridge descriptor.
319    ///
320    /// This entry will be deleted some time after `until`
321    /// (but the caller is not allowed to rely on either timely deletion,
322    /// or retention until that time).
323    #[cfg(feature = "bridge-client")]
324    fn store_bridgedesc(
325        &mut self,
326        bridge: &BridgeConfig,
327        entry: CachedBridgeDescriptor,
328        until: SystemTime,
329    ) -> Result<()>;
330
331    /// Delete a cached bridge descriptor for this bridge.
332    ///
333    /// It's not an error if it's not present.
334    #[cfg(feature = "bridge-client")]
335    // Nothing uses this yet; removal is handled from `expire_all`.
336    #[allow(dead_code)] // see also allow on DELETE_BRIDGEDESC
337    fn delete_bridgedesc(&mut self, bridge: &BridgeConfig) -> Result<()>;
338
339    /// Try to update our cached protocol recommendations to those listed in `protocols`.
340    fn update_protocol_recommendations(
341        &mut self,
342        valid_after: SystemTime,
343        protocols: &ProtoStatuses,
344    ) -> Result<()>;
345
346    /// Return our most recent cached protocol recommendations.
347    fn cached_protocol_recommendations(&self) -> Result<Option<(SystemTime, ProtoStatuses)>>;
348}
349
350/// Value in the bridge descriptor cache
351#[derive(Clone, Debug)]
352#[cfg_attr(not(feature = "bridge-client"), allow(dead_code))]
353pub(crate) struct CachedBridgeDescriptor {
354    /// When we fetched this
355    pub(crate) fetched: SystemTime,
356
357    /// The document text, as we fetched it
358    pub(crate) document: String,
359}
360
361#[cfg(test)]
362mod test {
363    // @@ begin test lint list maintained by maint/add_warning @@
364    #![allow(clippy::bool_assert_comparison)]
365    #![allow(clippy::clone_on_copy)]
366    #![allow(clippy::dbg_macro)]
367    #![allow(clippy::mixed_attributes_style)]
368    #![allow(clippy::print_stderr)]
369    #![allow(clippy::print_stdout)]
370    #![allow(clippy::single_char_pattern)]
371    #![allow(clippy::unwrap_used)]
372    #![allow(clippy::unchecked_duration_subtraction)]
373    #![allow(clippy::useless_vec)]
374    #![allow(clippy::needless_pass_by_value)]
375    //! <!-- @@ end test lint list maintained by maint/add_warning @@ -->
376    use super::*;
377    use tempfile::tempdir;
378
379    #[test]
380    fn strings() {
381        let s: InputString = "Hello world".to_string().into();
382        assert_eq!(s.as_ref(), b"Hello world");
383        assert_eq!(s.as_str().unwrap(), "Hello world");
384        assert_eq!(s.as_str().unwrap(), "Hello world");
385
386        let s: InputString = b"Hello world".to_vec().into();
387        assert_eq!(s.as_ref(), b"Hello world");
388        assert_eq!(s.as_str().unwrap(), "Hello world");
389        assert_eq!(s.as_str().unwrap(), "Hello world");
390
391        // bad utf-8
392        let s: InputString = b"Hello \xff world".to_vec().into();
393        assert_eq!(s.as_ref(), b"Hello \xff world");
394        assert!(s.as_str().is_err());
395    }
396
397    #[test]
398    fn files() {
399        let td = tempdir().unwrap();
400
401        let goodstr = td.path().join("goodstr");
402        std::fs::write(&goodstr, "This is a reasonable file.\n").unwrap();
403        let s = InputString::load(File::open(goodstr).unwrap());
404        let s = s.unwrap();
405        assert_eq!(s.as_str().unwrap(), "This is a reasonable file.\n");
406        assert_eq!(s.as_str().unwrap(), "This is a reasonable file.\n");
407        assert_eq!(s.as_ref(), b"This is a reasonable file.\n");
408
409        let badutf8 = td.path().join("badutf8");
410        std::fs::write(&badutf8, b"Not good \xff UTF-8.\n").unwrap();
411        let s = InputString::load(File::open(badutf8).unwrap());
412        assert!(s.is_err() || s.unwrap().as_str().is_err());
413    }
414
415    #[test]
416    fn doctext() {
417        let s: InputString = "Hello universe".to_string().into();
418        let dt: DocumentText = s.into();
419        assert_eq!(dt.as_ref(), b"Hello universe");
420        assert_eq!(dt.as_str(), Ok("Hello universe"));
421        assert_eq!(dt.as_str(), Ok("Hello universe"));
422
423        let s: InputString = b"Hello \xff universe".to_vec().into();
424        let dt: DocumentText = s.into();
425        assert_eq!(dt.as_ref(), b"Hello \xff universe");
426        assert!(dt.as_str().is_err());
427    }
428}