feat(testing-framework): testing parser improvements (#375)

* Introduce multiline call annotations

Call annotations have same format as `;`-annotations, but can allow
multiline JSON literals.  They look like `#| ...  |#` (that's Common Lisp syntax for multiline comments).

For example:

```
(call "peer_id" ("serv" "func") [a b] var)  #|
      map = {
        "0": null,
        "default": 42,
      }
   |#
```

* A JSON in the assertion parser doesn't have be last

We have been using nom's `rest` for JSON values.  Now we have a simple JSON parser
that recognizes JSON to be parsed with `serde_json`, and may have multiple JSON objects
within same assertion.

* Allow annotation after a compound form

It is attached to a rightmost nested element that has to be a `call` special form.
This commit is contained in:
Ivan Boldyrev 2022-11-25 13:56:23 +03:00 committed by GitHub
parent becdedc364
commit 843d2a1d7b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 514 additions and 34 deletions

View File

@ -0,0 +1,228 @@
/*
* Copyright 2022 Fluence Labs Limited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*
* Based on the JSON parser from nom examples:
*
* Copyright (c) 2014-2019 Geoffroy Couprie
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
use nom::{
branch::alt,
bytes::complete::{escaped, tag, take_while},
character::complete::{char, one_of, satisfy},
combinator::{cut, map, opt, value},
error::{context, ContextError, ParseError},
multi::separated_list0,
number::complete::double,
sequence::{delimited, preceded, separated_pair, terminated},
IResult,
};
use std::collections::HashMap;
use std::str;
#[derive(Debug, PartialEq)]
pub enum JsonValue {
Null,
Str(String),
Boolean(bool),
Num(f64),
Array(Vec<JsonValue>),
Object(HashMap<String, JsonValue>),
}
/// parser combinators are constructed from the bottom up:
/// first we write parsers for the smallest elements (here a space character),
/// then we'll combine them in larger parsers
fn sp<'a, E: ParseError<&'a str>>(i: &'a str) -> IResult<&'a str, &'a str, E> {
let chars = " \t\r\n";
// nom combinators like `take_while` return a function. That function is the
// parser,to which we can pass the input
take_while(move |c| chars.contains(c))(i)
}
/// A nom parser has the following signature:
/// `Input -> IResult<Input, Output, Error>`, with `IResult` defined as:
/// `type IResult<I, O, E = (I, ErrorKind)> = Result<(I, O), Err<E>>;`
///
/// most of the times you can ignore the error type and use the default (but this
/// examples shows custom error types later on!)
///
/// Here we use `&str` as input type, but nom parsers can be generic over
/// the input type, and work directly with `&[u8]` or any other type that
/// implements the required traits.
///
/// Finally, we can see here that the input and output type are both `&str`
/// with the same lifetime tag. This means that the produced value is a subslice
/// of the input data. and there is no allocation needed. This is the main idea
/// behind nom's performance.
fn parse_str<'a, E: ParseError<&'a str>>(i: &'a str) -> IResult<&'a str, &'a str, E> {
escaped(
satisfy(|c| ('\u{20}'..='\u{10FFFF}').contains(&c) && (c != '\\') && (c != '"')),
'\\',
one_of("\"n\\"),
)(i)
}
/// `tag(string)` generates a parser that recognizes the argument string.
///
/// we can combine it with other functions, like `value` that takes another
/// parser, and if that parser returns without an error, returns a given
/// constant value.
///
/// `alt` is another combinator that tries multiple parsers one by one, until
/// one of them succeeds
fn boolean<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, bool, E> {
// This is a parser that returns `true` if it sees the string "true", and
// an error otherwise
let parse_true = value(true, tag("true"));
// This is a parser that returns `false` if it sees the string "false", and
// an error otherwise
let parse_false = value(false, tag("false"));
// `alt` combines the two parsers. It returns the result of the first
// successful parser, or an error
alt((parse_true, parse_false))(input)
}
fn null<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, (), E> {
value((), tag("null"))(input)
}
/// this parser combines the previous `parse_str` parser, that recognizes the
/// interior of a string, with a parse to recognize the double quote character,
/// before the string (using `preceded`) and after the string (using `terminated`).
///
/// `context` and `cut` are related to error management:
/// - `cut` transforms an `Err::Error(e)` in `Err::Failure(e)`, signaling to
/// combinators like `alt` that they should not try other parsers. We were in the
/// right branch (since we found the `"` character) but encountered an error when
/// parsing the string
/// - `context` lets you add a static string to provide more information in the
/// error chain (to indicate which parser had an error)
fn string<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
i: &'a str,
) -> IResult<&'a str, &'a str, E> {
context(
"string",
preceded(char('\"'), cut(terminated(parse_str, char('\"')))),
)(i)
}
/// some combinators, like `separated_list0` or `many0`, will call a parser repeatedly,
/// accumulating results in a `Vec`, until it encounters an error.
/// If you want more control on the parser application, check out the `iterator`
/// combinator (cf `examples/iterator.rs`)
fn array<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
i: &'a str,
) -> IResult<&'a str, Vec<JsonValue>, E> {
context(
"array",
preceded(
char('['),
cut(terminated(
separated_list0(preceded(sp, char(',')), json_value),
preceded(sp, char(']')),
)),
),
)(i)
}
fn key_value<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
i: &'a str,
) -> IResult<&'a str, (&'a str, JsonValue), E> {
separated_pair(
preceded(sp, string),
cut(preceded(sp, char(':'))),
json_value,
)(i)
}
pub fn hash<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
i: &'a str,
) -> IResult<&'a str, HashMap<String, JsonValue>, E> {
context(
"map",
preceded(
char('{'),
cut(terminated(
map(
separated_list0(preceded(sp, char(',')), key_value),
|tuple_vec| {
tuple_vec
.into_iter()
.map(|(k, v)| (String::from(k), v))
.collect()
},
),
preceded(sp, char('}')),
)),
),
)(i)
}
/// here, we apply the space parser before trying to parse a value
pub fn json_value<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
i: &'a str,
) -> IResult<&'a str, JsonValue, E> {
preceded(
sp,
alt((
map(hash, JsonValue::Object),
map(array, JsonValue::Array),
map(string, |s| JsonValue::Str(String::from(s))),
map(double, JsonValue::Num),
map(boolean, JsonValue::Boolean),
map(null, |_| JsonValue::Null),
)),
)(i)
}
/// the root element of a JSON parser is either an object or an array
#[allow(dead_code)]
pub fn root<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
i: &'a str,
) -> IResult<&'a str, JsonValue, E> {
delimited(
sp,
alt((
map(hash, JsonValue::Object),
map(array, JsonValue::Array),
map(null, |_| JsonValue::Null),
)),
opt(sp),
)(i)
}

View File

@ -14,6 +14,7 @@
* limitations under the License.
*/
mod json;
pub(crate) mod parser;
use crate::services::JValue;

View File

@ -40,28 +40,38 @@ impl FromStr for ServiceDefinition {
pub fn parse_kw(inp: &str) -> IResult<&str, ServiceDefinition, ParseError> {
use nom::branch::alt;
use nom::bytes::complete::tag;
use nom::combinator::{cut, map_res, rest};
use nom::character::complete::alphanumeric1;
use nom::combinator::{cut, map_res, recognize};
use nom::error::context;
use nom::sequence::separated_pair;
let equal = || delim_ws(tag("="));
let json_value = || {
cut(context(
"result value has to be a valid JSON",
recognize(super::json::json_value),
))
};
let json_map = || {
cut(context(
"result value has to be a valid JSON hash",
recognize(super::json::hash),
))
};
delim_ws(map_res(
separated_pair(
alt((
tag(ServiceTagName::Ok.as_ref()),
tag(ServiceTagName::Error.as_ref()),
tag(ServiceTagName::SeqOk.as_ref()),
tag(ServiceTagName::SeqError.as_ref()),
alt((
separated_pair(tag(ServiceTagName::Ok.as_ref()), equal(), json_value()),
separated_pair(tag(ServiceTagName::Error.as_ref()), equal(), json_map()),
separated_pair(tag(ServiceTagName::SeqOk.as_ref()), equal(), json_map()),
separated_pair(tag(ServiceTagName::SeqError.as_ref()), equal(), json_map()),
separated_pair(
tag(ServiceTagName::Behaviour.as_ref()),
tag(ServiceTagName::Map.as_ref()),
)),
equal(),
cut(context(
"result value is consumed to end and has to be a valid JSON",
rest,
)),
),
equal(),
cut(alphanumeric1),
),
separated_pair(tag(ServiceTagName::Map.as_ref()), equal(), json_map()),
)),
|(tag, value): (&str, &str)| {
let value = value.trim();
match ServiceTagName::from_str(tag) {
@ -221,4 +231,22 @@ mod tests {
}))
);
}
#[test]
fn test_composable() {
use nom::bytes::complete::tag;
use nom::multi::separated_list1;
let res = separated_list1(tag(";"), parse_kw)(r#"ok={"ret_code": 0};map={"default": 42}"#);
assert_eq!(
res,
Ok((
"",
vec![
ServiceDefinition::Ok(json!({"ret_code":0,})),
ServiceDefinition::Map(maplit::hashmap! {"default".to_owned()=>json!(42),})
]
))
)
}
}

View File

@ -49,6 +49,27 @@ impl Sexp {
pub(crate) fn string(value: impl ToString) -> Self {
Self::String(value.to_string())
}
pub(crate) fn inject(&mut self, service_definition: ServiceDefinition) -> Result<(), String> {
match self {
Sexp::Call(ref mut call) => {
call.service_desc = Some(service_definition);
Ok(())
}
Sexp::List(ref mut list) => match list.last_mut() {
Some(last) => last.inject(service_definition),
None => Err("cannot attach a service definition an empty list".to_owned()),
},
Sexp::Symbol(s) => Err(format!(
"cannot attach a service definition to a symbol {:?}",
s
)),
Sexp::String(ref s) => Err(format!(
r#"cannot attach a service definition to a string: "{:?}""#,
s
)),
}
}
}
impl std::fmt::Display for Sexp {

View File

@ -18,9 +18,9 @@ use super::{Call, Sexp, Triplet};
use crate::asserts::ServiceDefinition;
use nom::branch::alt;
use nom::bytes::complete::{is_not, tag};
use nom::bytes::complete::{is_not, tag, take_until};
use nom::character::complete::{alphanumeric1, multispace0, multispace1, one_of, space1};
use nom::combinator::{cut, map, map_res, opt, recognize, value};
use nom::combinator::{cut, map, map_parser, map_res, opt, recognize, rest, value};
use nom::error::{context, VerboseError, VerboseErrorKind};
use nom::multi::{many0, many1, many1_count, separated_list0};
use nom::sequence::{delimited, pair, preceded, separated_pair, terminated};
@ -88,13 +88,24 @@ fn parse_sexp_list(inp: Input<'_>) -> IResult<Input<'_>, Sexp, ParseError<'_>> {
"within generic list",
preceded(
terminated(tag("("), sexp_multispace0),
cut(terminated(
map(separated_list0(sexp_multispace1, parse_sexp), Sexp::list),
preceded(
sexp_multispace0,
context("closing parentheses not found", tag(")")),
),
)),
map_res(
cut(pair(
map(separated_list0(sexp_multispace1, parse_sexp), Sexp::list),
preceded(
preceded(
sexp_multispace0,
context("closing parentheses not found", tag(")")),
),
parse_annotation_comment,
),
)),
|(mut sexp, annotation)| {
if let Some(service_definition) = annotation {
sexp.inject(service_definition)?;
}
Ok::<_, String>(sexp)
},
),
),
)(inp)
}
@ -156,10 +167,7 @@ fn parse_sexp_call_content(inp: Input<'_>) -> IResult<Input<'_>, Sexp, ParseErro
opt(preceded(sexp_multispace1, map(parse_sexp_symbol, Box::new))),
preceded(sexp_multispace0, tag(")")),
),
alt((
opt(preceded(pair(space1, tag("; ")), parse_annotation)),
value(None, sexp_multispace0),
)),
parse_annotation_comment,
),
),
|((triplet, args), (var, annotation))| {
@ -173,12 +181,53 @@ fn parse_sexp_call_content(inp: Input<'_>) -> IResult<Input<'_>, Sexp, ParseErro
)(inp)
}
fn parse_annotation(inp: Input<'_>) -> IResult<Input<'_>, ServiceDefinition, ParseError<'_>> {
map_res(
is_not("\r\n"),
|span: Input<'_>| -> Result<ServiceDefinition, ParseError<'_>> {
Ok(ServiceDefinition::from_str(&span).unwrap())
},
fn parse_annotation_comment(
inp: Input<'_>,
) -> IResult<Input<'_>, Option<ServiceDefinition>, ParseError<'_>> {
use nom::combinator::success;
alt((
preceded(
pair(space1, tag("; ")),
map(cut(parse_singleline_annotation), Some),
),
delimited(
pair(space1, tag("#|")),
map(
cut(map_parser(take_until("|#"), parse_multiline_annotation)),
Some,
),
tag("|#"),
),
success(None),
))(inp)
}
fn parse_singleline_annotation(
inp: Input<'_>,
) -> IResult<Input<'_>, ServiceDefinition, ParseError<'_>> {
context(
"single-line annotation",
map_res(
is_not("\r\n"),
|span: Input<'_>| -> Result<ServiceDefinition, ParseError<'_>> {
Ok(ServiceDefinition::from_str(&span).expect("invalid service definition"))
},
),
)(inp)
}
fn parse_multiline_annotation(
inp: Input<'_>,
) -> IResult<Input<'_>, ServiceDefinition, ParseError<'_>> {
context(
"multiline annotation",
map_res(
recognize(rest),
|span: Input<'_>| -> Result<ServiceDefinition, ParseError<'_>> {
Ok(ServiceDefinition::from_str(&span).expect("invalid service definition"))
},
),
)(inp)
}
@ -669,4 +718,157 @@ mod tests {
])]))
);
}
#[test]
fn test_annotation_multiline() {
let multiline_annotation = r#" #|
map = {
"0": null
} |#"#;
let res = parse_annotation_comment(multiline_annotation.into());
assert!(res.is_ok(), "{:?}", res);
}
#[test]
fn test_annotation_multiline_with_call() {
let sexp_str = r#"(call "peer_id" ("serv" "func") [a b] var) #|
map = {
"0": null
}
|#"#;
let expected_annotation = ServiceDefinition::Map(maplit::hashmap! {
"0".to_owned() => json!(None::<()>),
});
let res = Sexp::from_str(sexp_str);
assert_eq!(
res,
Ok(Sexp::Call(Call {
triplet: Box::new((
Sexp::string("peer_id"),
Sexp::string("serv"),
Sexp::string("func"),
)),
args: vec![Sexp::symbol("a"), Sexp::symbol("b")],
var: Some(Box::new(Sexp::symbol("var"))),
service_desc: Some(expected_annotation),
}))
);
}
#[test]
fn test_annotation_multiline_with_many_calls() {
let sexp_str = r#"(seq
(call "peer_id" ("serv" "func") [a b] var) #|
map = {
"0": null
}
|#
(call "peer_id" ("serv" "func") [a b] var)
)"#;
let expected_annotation = ServiceDefinition::Map(maplit::hashmap! {
"0".to_owned() => json!(None::<()>),
});
let res = Sexp::from_str(sexp_str);
assert_eq!(
res,
Ok(Sexp::List(vec![
Sexp::symbol("seq"),
Sexp::Call(Call {
triplet: Box::new((
Sexp::string("peer_id"),
Sexp::string("serv"),
Sexp::string("func"),
)),
args: vec![Sexp::symbol("a"), Sexp::symbol("b")],
var: Some(Box::new(Sexp::symbol("var"))),
service_desc: Some(expected_annotation),
}),
Sexp::Call(Call {
triplet: Box::new((
Sexp::string("peer_id"),
Sexp::string("serv"),
Sexp::string("func"),
)),
args: vec![Sexp::symbol("a"), Sexp::symbol("b")],
var: Some(Box::new(Sexp::symbol("var"))),
service_desc: None,
}),
])),
);
}
#[test]
fn test_call_with_annotation_last_form() {
let res = Sexp::from_str(
r#"(par
(call peerid ("serv" "func") [a b] var)
(call peerid2 ("serv" "func") [])) ; ok=42
"#,
);
assert_eq!(
res,
Ok(Sexp::List(vec![
Sexp::symbol("par"),
Sexp::Call(Call {
triplet: Box::new((
Sexp::symbol("peerid"),
Sexp::string("serv"),
Sexp::string("func"),
)),
args: vec![Sexp::symbol("a"), Sexp::symbol("b")],
var: Some(Box::new(Sexp::symbol("var"))),
service_desc: None,
}),
Sexp::Call(Call {
triplet: Box::new((
Sexp::symbol("peerid2"),
Sexp::string("serv"),
Sexp::string("func"),
)),
args: vec![],
var: None,
service_desc: Some(ServiceDefinition::Ok(json!(42))),
}),
]))
);
}
#[test]
fn test_call_with_annotation_last_form_multiline() {
let res = Sexp::from_str(
r#"(par
(call peerid ("serv" "func") [a b] var)
(call peerid2 ("serv" "func") [])) #|
ok=42
|#
"#,
);
assert_eq!(
res,
Ok(Sexp::List(vec![
Sexp::symbol("par"),
Sexp::Call(Call {
triplet: Box::new((
Sexp::symbol("peerid"),
Sexp::string("serv"),
Sexp::string("func"),
)),
args: vec![Sexp::symbol("a"), Sexp::symbol("b")],
var: Some(Box::new(Sexp::symbol("var"))),
service_desc: None,
}),
Sexp::Call(Call {
triplet: Box::new((
Sexp::symbol("peerid2"),
Sexp::string("serv"),
Sexp::string("func"),
)),
args: vec![],
var: None,
service_desc: Some(ServiceDefinition::Ok(json!(42))),
}),
]))
);
}
}