1
//! Based on a set of rules, validate a token stream and collect the
2
//! tokens by type.
3
//!
4
//! See the "rules" module for definitions of keywords types and
5
//! per-keyword rules.
6
//!
7
//! The key types in this module are SectionRules, which explains how to
8
//! validate and partition a stream of Item, and Section, which contains
9
//! a validated set of Item, ready to be interpreted.
10
//!
11
//! # Example
12
//!
13
//! (This is an internal API, so see the routerdesc.rs source for an
14
//! example of use.)
15

            
16
use crate::parse::keyword::Keyword;
17
use crate::parse::rules::*;
18
use crate::parse::tokenize::*;
19
use crate::{NetdocErrorKind as EK, Result};
20

            
21
use educe::Educe;
22

            
23
/// Describe the rules for one section of a document.
24
///
25
/// The rules are represented as a mapping from token index to
26
/// rules::TokenFmt.
27
#[derive(Clone)]
28
pub(crate) struct SectionRules<T: Keyword> {
29
    /// A set of rules for decoding a series of tokens into a Section
30
    /// object.  Each element of this array corresponds to the
31
    /// token with the corresponding index values.
32
    ///
33
    /// When an array element is None, the corresponding keyword is
34
    /// not allowed in this kind section.  Otherwise, the array
35
    /// element is a TokenFmt describing how many of the corresponding
36
    /// token may appear, and what they need to look like.
37
    rules: Vec<Option<TokenFmt<T>>>,
38
}
39

            
40
/// The entry or entries for a particular keyword within a document.
41
9173
#[derive(Clone, Educe)]
42
#[educe(Default)]
43
struct TokVal<'a, K: Keyword>(Vec<Item<'a, K>>);
44

            
45
impl<'a, K: Keyword> TokVal<'a, K> {
46
    /// Return the number of Items for this value.
47
9173
    fn none() -> Self {
48
9173
        Default::default()
49
9173
    }
50
    /// Return the number of Items for this value.
51
158696
    fn count(&self) -> usize {
52
158696
        self.0.len()
53
158696
    }
54
    /// Return the first Item for this value, or None if there wasn't one.
55
2812
    fn first(&self) -> Option<&Item<'a, K>> {
56
2812
        self.0.first()
57
2812
    }
58
    /// Return the Item for this value, if there is exactly one.
59
71637
    fn singleton(&self) -> Option<&Item<'a, K>> {
60
71637
        match &*self.0 {
61
60448
            [x] => Some(x),
62
11189
            _ => None,
63
        }
64
71637
    }
65
    /// Return all the Items for this value, as a slice.
66
318329
    fn as_slice(&self) -> &[Item<'a, K>] {
67
318329
        &self.0
68
318329
    }
69
    /// Return the last Item for this value, if any.
70
1077
    fn last(&self) -> Option<&Item<'a, K>> {
71
1077
        self.0.last()
72
1077
    }
73
}
74

            
75
/// A Section is the result of sorting a document's entries by keyword.
76
///
77
/// TODO: I'd rather have this be pub(crate), but I haven't figured out
78
/// how to make that work.
79
pub struct Section<'a, T: Keyword> {
80
    /// Map from Keyword index to TokVal
81
    v: Vec<TokVal<'a, T>>,
82
    /// The keyword that appeared first in this section.  This will
83
    /// be set if `v` is nonempty.
84
    first: Option<T>,
85
    /// The keyword that appeared last in this section.  This will
86
    /// be set if `v` is nonempty.
87
    last: Option<T>,
88
}
89

            
90
impl<'a, T: Keyword> Section<'a, T> {
91
    /// Make a new empty Section.
92
9173
    fn new() -> Self {
93
9173
        let n = T::n_vals();
94
9173
        let mut v = Vec::with_capacity(n);
95
9173
        v.resize(n, TokVal::none());
96
9173
        Section {
97
9173
            v,
98
9173
            first: None,
99
9173
            last: None,
100
9173
        }
101
9173
    }
102
    /// Helper: return the tokval for some Keyword.
103
111334
    fn tokval(&self, t: T) -> &TokVal<'a, T> {
104
111334
        let idx = t.idx();
105
111334
        &self.v[idx]
106
111334
    }
107
    /// Return all the Items for some Keyword, as a slice.
108
35808
    pub(crate) fn slice(&self, t: T) -> &[Item<'a, T>] {
109
35808
        self.tokval(t).as_slice()
110
35808
    }
111
    /// Return a single Item for some Keyword, if there is exactly one.
112
71637
    pub(crate) fn get(&self, t: T) -> Option<&Item<'a, T>> {
113
71637
        self.tokval(t).singleton()
114
71637
    }
115
    /// Return a single Item for some Keyword, giving an error if there
116
    /// is not exactly one.
117
    ///
118
    /// It is usually a mistake to use this function on a Keyword that is
119
    /// not required.
120
43755
    pub(crate) fn required(&self, t: T) -> Result<&Item<'a, T>> {
121
43755
        self.get(t)
122
43755
            .ok_or_else(|| EK::MissingToken.with_msg(t.to_str()))
123
43755
    }
124
    /// Return a proxy MaybeItem object for some keyword.
125
    //
126
    /// A MaybeItem is used to represent an object that might or might
127
    /// not be there.
128
10965
    pub(crate) fn maybe<'b>(&'b self, t: T) -> MaybeItem<'b, 'a, T> {
129
10965
        MaybeItem::from_option(self.get(t))
130
10965
    }
131
    /// Return the first item that was accepted for this section, or None
132
    /// if no items were accepted for this section.
133
2812
    pub(crate) fn first_item(&self) -> Option<&Item<'a, T>> {
134
2812
        match self.first {
135
            None => None,
136
2812
            Some(t) => self.tokval(t).first(),
137
        }
138
2812
    }
139
    /// Return the last item that was accepted for this section, or None
140
    /// if no items were accepted for this section.
141
1077
    pub(crate) fn last_item(&self) -> Option<&Item<'a, T>> {
142
1077
        match self.last {
143
            None => None,
144
1077
            Some(t) => self.tokval(t).last(),
145
        }
146
1077
    }
147
    /// Insert an `item`.
148
    ///
149
    /// The `item` must have parsed Keyword `t`.
150
59390
    fn add_tok(&mut self, t: T, item: Item<'a, T>) {
151
59390
        let idx = Keyword::idx(t);
152
59390
        if idx >= self.v.len() {
153
            self.v.resize(idx + 1, TokVal::none());
154
59390
        }
155
59390
        self.v[idx].0.push(item);
156
59390
        if self.first.is_none() {
157
9063
            self.first = Some(t);
158
50327
        }
159
59390
        self.last = Some(t);
160
59390
    }
161
}
162

            
163
/// A builder for a set of section rules.
164
#[derive(Clone)]
165
pub(crate) struct SectionRulesBuilder<T: Keyword> {
166
    /// Have we been told, explicitly, to reject unrecognized tokens?
167
    strict: bool,
168
    /// The rules we're building.
169
    rules: SectionRules<T>,
170
}
171

            
172
impl<T: Keyword> SectionRulesBuilder<T> {
173
    /// Add a rule to this SectionRulesBuilder, based on a TokenFmtBuilder.
174
    ///
175
    /// Requires that no rule yet exists for the provided keyword.
176
6152
    pub(crate) fn add(&mut self, t: TokenFmtBuilder<T>) {
177
6152
        let rule: TokenFmt<_> = t.into();
178
6152
        let idx = rule.kwd().idx();
179
6152
        assert!(self.rules.rules[idx].is_none());
180
6152
        self.rules.rules[idx] = Some(rule);
181
6152
    }
182

            
183
    /// Explicitly reject any unrecognized tokens.
184
    ///
185
    /// To avoid errors, you must either explicitly reject unrecognized tokens,
186
    /// or you must define how they are handled.
187
104
    pub(crate) fn reject_unrecognized(&mut self) {
188
104
        self.strict = true;
189
104
    }
190

            
191
    /// Construct the SectionRules from this builder.
192
    ///
193
    /// # Panics
194
    ///
195
    /// Panics if you did not specify the behavior for unrecognized tokens,
196
    /// using either `reject_unrecognized` or `add(UNRECOGNIZED.rule()...)`
197
833
    pub(crate) fn build(self) -> SectionRules<T> {
198
833
        let unrecognized_idx = T::unrecognized().idx();
199
833
        assert!(
200
833
            self.strict || self.rules.rules[unrecognized_idx].is_some(),
201
            "BUG: Section has to handle UNRECOGNIZED tokens explicitly."
202
        );
203
833
        self.rules
204
833
    }
205
}
206

            
207
impl<T: Keyword> SectionRules<T> {
208
    /// Create a new builder for a SectionRules with no rules.
209
    ///
210
    /// By default, no Keyword is allowed by this SectionRules.
211
831
    pub(crate) fn builder() -> SectionRulesBuilder<T> {
212
831
        let n = T::n_vals();
213
831
        let mut rules = Vec::with_capacity(n);
214
831
        rules.resize(n, None);
215
831
        SectionRulesBuilder {
216
831
            strict: false,
217
831
            rules: SectionRules { rules },
218
831
        }
219
831
    }
220

            
221
    /// Parse a stream of tokens into a Section object without (fully)
222
    /// verifying them.
223
    ///
224
    /// Some errors are detected early, but others only show up later
225
    /// when we validate more carefully.
226
9083
    fn parse_unverified<'a, I>(&self, tokens: I, section: &mut Section<'a, T>) -> Result<()>
227
9083
    where
228
9083
        I: Iterator<Item = Result<Item<'a, T>>>,
229
9083
    {
230
67661
        for item in tokens {
231
58682
            let item = item?;
232

            
233
58582
            let tok = item.kwd();
234
58582
            let tok_idx = tok.idx();
235
58582
            if let Some(rule) = &self.rules[tok_idx] {
236
                // we want this token.
237
58580
                assert!(rule.kwd() == tok);
238
58580
                section.add_tok(tok, item);
239
58580
                rule.check_multiplicity(section.v[tok_idx].as_slice())?;
240
            } else {
241
                // We don't have a rule for this token.
242
2
                return Err(EK::UnexpectedToken
243
2
                    .with_msg(tok.to_str())
244
2
                    .at_pos(item.pos()));
245
            }
246
        }
247
8979
        Ok(())
248
9083
    }
249

            
250
    /// Check whether the tokens in a section we've parsed conform to
251
    /// these rules.
252
9069
    fn validate(&self, s: &Section<'_, T>) -> Result<()> {
253
9069
        // These vectors are both generated from T::n_vals().
254
9069
        assert_eq!(s.v.len(), self.rules.len());
255

            
256
        // Iterate over every item, and make sure it matches the
257
        // corresponding rule.
258
234737
        for (rule, t) in self.rules.iter().zip(s.v.iter()) {
259
234737
            match rule {
260
                None => {
261
                    // We aren't supposed to have any of these.
262
158696
                    if t.count() > 0 {
263
                        unreachable!(
264
                            "This item should have been rejected earlier, in parse_unverified()"
265
                        );
266
158696
                    }
267
                }
268
76041
                Some(rule) => {
269
76041
                    // We're allowed to have this. Is the number right?
270
76041
                    rule.check_multiplicity(t.as_slice())?;
271
                    // The number is right. Check each individual item.
272
76035
                    for item in t.as_slice() {
273
59372
                        rule.check_item(item)?;
274
                    }
275
                }
276
            }
277
        }
278

            
279
9053
        Ok(())
280
9069
    }
281

            
282
    /// Check all the base64-encoded objects on a given keyword.
283
    ///
284
    /// We use this to validate objects on unrecognized items, since
285
    /// otherwise nothing would check that they are well-formed.
286
18106
    fn validate_objects(&self, s: &Section<'_, T>, kwd: T) -> Result<()> {
287
18106
        for item in s.slice(kwd).iter() {
288
2140
            let _ = item.obj_raw()?;
289
        }
290
18106
        Ok(())
291
18106
    }
292

            
293
    /// Parse a stream of tokens into a validated section.
294
9083
    pub(crate) fn parse<'a, I>(&self, tokens: I) -> Result<Section<'a, T>>
295
9083
    where
296
9083
        I: Iterator<Item = Result<Item<'a, T>>>,
297
9083
    {
298
9083
        let mut section = Section::new();
299
9083
        self.parse_unverified(tokens, &mut section)?;
300
8979
        self.validate(&section)?;
301
8963
        self.validate_objects(&section, T::unrecognized())?;
302
8963
        self.validate_objects(&section, T::ann_unrecognized())?;
303
8963
        Ok(section)
304
9083
    }
305
}
306

            
307
#[cfg(test)]
308
mod test {
309
    // @@ begin test lint list maintained by maint/add_warning @@
310
    #![allow(clippy::bool_assert_comparison)]
311
    #![allow(clippy::clone_on_copy)]
312
    #![allow(clippy::dbg_macro)]
313
    #![allow(clippy::mixed_attributes_style)]
314
    #![allow(clippy::print_stderr)]
315
    #![allow(clippy::print_stdout)]
316
    #![allow(clippy::single_char_pattern)]
317
    #![allow(clippy::unwrap_used)]
318
    #![allow(clippy::unchecked_duration_subtraction)]
319
    #![allow(clippy::useless_vec)]
320
    #![allow(clippy::needless_pass_by_value)]
321
    //! <!-- @@ end test lint list maintained by maint/add_warning @@ -->
322
    use super::SectionRules;
323
    use crate::parse::keyword::Keyword;
324
    use crate::parse::macros::test::Fruit;
325
    use crate::parse::tokenize::{Item, NetDocReader};
326
    use crate::{Error, NetdocErrorKind as EK, Result};
327
    use once_cell::sync::Lazy;
328

            
329
    /// Rules for parsing a set of router annotations.
330
    static FRUIT_SALAD: Lazy<SectionRules<Fruit>> = Lazy::new(|| {
331
        use Fruit::*;
332
        let mut rules = SectionRules::builder();
333
        rules.add(ANN_TASTY.rule().required().args(1..=1));
334
        rules.add(ORANGE.rule().args(1..));
335
        rules.add(STONEFRUIT.rule().may_repeat());
336
        rules.add(GUAVA.rule().obj_optional());
337
        rules.add(LEMON.rule().no_args().obj_required());
338
        rules.reject_unrecognized();
339
        rules.build()
340
    });
341

            
342
    #[test]
343
    fn parse_section() -> Result<()> {
344
        use Fruit::*;
345
        let s = "\
346
@tasty yes
347
orange soda
348
cherry cobbler
349
cherry pie
350
plum compote
351
guava fresh from 7 trees
352
-----BEGIN GUAVA MANIFESTO-----
353
VGhlIGd1YXZhIGVtb2ppIGlzIG5vdCBjdXJyZW50bHkgc3VwcG9ydGVkIGluI
354
HVuaWNvZGUgMTMuMC4gTGV0J3MgZmlnaHQgYWdhaW5zdCBhbnRpLWd1YXZhIG
355
JpYXMu
356
-----END GUAVA MANIFESTO-----
357
lemon
358
-----BEGIN LEMON-----
359
8J+Niw==
360
-----END LEMON-----
361
";
362
        let r: NetDocReader<'_, Fruit> = NetDocReader::new(s).unwrap();
363
        let sec = FRUIT_SALAD.parse(r).unwrap();
364

            
365
        assert_eq!(sec.required(ANN_TASTY)?.arg(0), Some("yes"));
366

            
367
        assert!(sec.get(ORANGE).is_some());
368
        assert_eq!(sec.get(ORANGE).unwrap().args_as_str(), "soda");
369

            
370
        let stonefruit_slice = sec.slice(STONEFRUIT);
371
        assert_eq!(stonefruit_slice.len(), 3);
372
        let kwds: Vec<&str> = stonefruit_slice.iter().map(Item::kwd_str).collect();
373
        assert_eq!(kwds, &["cherry", "cherry", "plum"]);
374

            
375
        assert_eq!(sec.maybe(GUAVA).args_as_str(), Some("fresh from 7 trees"));
376
        assert_eq!(sec.maybe(GUAVA).parse_arg::<u32>(2).unwrap(), Some(7));
377
        assert!(sec.maybe(GUAVA).parse_arg::<u32>(1).is_err());
378

            
379
        // Try the `obj` accessor.
380
        assert_eq!(sec.get(GUAVA).unwrap().obj("GUAVA MANIFESTO").unwrap(),
381
                   &b"The guava emoji is not currently supported in unicode 13.0. Let's fight against anti-guava bias."[..]);
382
        assert!(matches!(
383
            sec.get(ORANGE)
384
                .unwrap()
385
                .obj("ORANGE MANIFESTO")
386
                .unwrap_err()
387
                .netdoc_error_kind(),
388
            EK::MissingObject // orange you glad there isn't a manifesto?
389
        ));
390

            
391
        // Try `maybe_item` a bit.
392
        let maybe_banana = sec.maybe(BANANA);
393
        assert!(maybe_banana.parse_arg::<u32>(3).unwrap().is_none()); // yes! we have none.
394
        let maybe_guava = sec.maybe(GUAVA);
395
        assert_eq!(maybe_guava.parse_arg::<u32>(2).unwrap(), Some(7));
396

            
397
        assert_eq!(
398
            sec.get(ANN_TASTY).unwrap() as *const Item<'_, _>,
399
            sec.first_item().unwrap() as *const Item<'_, _>
400
        );
401

            
402
        assert_eq!(
403
            sec.get(LEMON).unwrap() as *const Item<'_, _>,
404
            sec.last_item().unwrap() as *const Item<'_, _>
405
        );
406

            
407
        Ok(())
408
    }
409

            
410
    #[test]
411
    fn rejected() {
412
        use crate::Pos;
413
        fn check(s: &str, e: &Error) {
414
            let r: NetDocReader<'_, Fruit> = NetDocReader::new(s).unwrap();
415
            let res = FRUIT_SALAD.parse(r);
416
            assert!(res.is_err());
417
            assert_eq!(&res.err().unwrap().within(s), e);
418
        }
419

            
420
        // unrecognized tokens aren't allowed here
421
        check(
422
            "orange foo\nfoobar x\n@tasty yes\n",
423
            &EK::UnexpectedToken
424
                .with_msg("<unrecognized>")
425
                .at_pos(Pos::from_line(2, 1)),
426
        );
427

            
428
        // Only one orange per customer.
429
        check(
430
            "@tasty yes\norange foo\norange bar\n",
431
            &EK::DuplicateToken
432
                .with_msg("orange")
433
                .at_pos(Pos::from_line(3, 1)),
434
        );
435

            
436
        // There needs to be a declaration of tastiness.
437
        check("orange foo\n", &EK::MissingToken.with_msg("@tasty"));
438

            
439
        // You can't have an orange without an argument.
440
        check(
441
            "@tasty nope\norange\n",
442
            &EK::TooFewArguments
443
                .with_msg("orange")
444
                .at_pos(Pos::from_line(2, 1)),
445
        );
446
        // You can't have an more than one argument on "tasty".
447
        check(
448
            "@tasty yup indeed\norange normal\n",
449
            &EK::TooManyArguments
450
                .with_msg("@tasty")
451
                .at_pos(Pos::from_line(1, 1)),
452
        );
453

            
454
        // Every lemon needs an object
455
        check(
456
            "@tasty yes\nlemon\norange no\n",
457
            &EK::MissingObject
458
                .with_msg("lemon")
459
                .at_pos(Pos::from_line(2, 1)),
460
        );
461

            
462
        // oranges don't take an object.
463
        check(
464
            "@tasty yes\norange no\n-----BEGIN ORANGE-----\naaa\n-----END ORANGE-----\n",
465
            &EK::UnexpectedObject
466
                .with_msg("orange")
467
                .at_pos(Pos::from_line(2, 1)),
468
        );
469
    }
470
}