From 5402da7da9891e8ccb5ea2926fcb1b41e4a9a9f1 Mon Sep 17 00:00:00 2001 From: freestrings Date: Sat, 7 Mar 2020 22:48:21 +0900 Subject: [PATCH] First commit of filter 'in' operator --- src/parser/mod.rs | 79 ++++++++++++++++++++++++++++++------ src/parser/tokenizer.rs | 8 ++-- src/select/mod.rs | 86 ++++++++++++++++++++++++++++++++++++---- tests/extended_filter.rs | 34 ++++++++++++++++ 4 files changed, 183 insertions(+), 24 deletions(-) create mode 100644 tests/extended_filter.rs diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 174a5d0..6d05b76 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -64,6 +64,7 @@ pub enum FilterToken { LittleOrEqual, Greater, GreaterOrEqual, + In, And, Or, } @@ -487,6 +488,9 @@ impl Parser { | Ok(Token::LittleOrEqual(_)) | Ok(Token::Greater(_)) | Ok(Token::GreaterOrEqual(_)) => true, + Ok(Token::Key(_, key)) => { + Self::get_filter_token(key).is_some() + } _ => false, } { Self::op(node, tokenizer) @@ -527,6 +531,41 @@ impl Parser { } } + fn term_array(tokenizer: &mut TokenReader) -> ParseResult { + debug!("#term_array"); + + Self::eat_token(tokenizer); + Self::eat_whitespace(tokenizer); + + let mut keys = vec![]; + loop { + match tokenizer.next_token() { + Ok(Token::SingleQuoted(_, val)) | Ok(Token::DoubleQuoted(_, val)) => { + keys.push(val); + } + Ok(Token::Key(_, val)) => { + keys.push(val); + } + _ => return Err(tokenizer.err_msg()), + } + + Self::eat_whitespace(tokenizer); + + match tokenizer.peek_token() { + Ok(Token::Comma(_)) => { + Self::eat_token(tokenizer); + Self::eat_whitespace(tokenizer); + } + Ok(Token::CloseArray(_)) => break, + _ => {} + } + } + + Self::eat_whitespace(tokenizer); + + Self::close_token(Self::node(ParseToken::Keys(keys)), Token::CloseArray(DUMMY), tokenizer) + } + fn term(tokenizer: &mut TokenReader) -> ParseResult { debug!("#term"); @@ -545,16 +584,19 @@ impl Parser { } Ok(Token::Absolute(_)) => { Self::json_path(tokenizer) - }, + } Ok(Token::DoubleQuoted(_, _)) | Ok(Token::SingleQuoted(_, _)) => { Self::array_quote_value(tokenizer) - }, + } Ok(Token::Key(_, key)) => { match key.as_bytes()[0] { b'-' | b'0'..=b'9' => Self::term_num(tokenizer), _ => Self::boolean(tokenizer), } } + Ok(Token::OpenArray(_)) => { + Self::term_array(tokenizer) + } _ => { Err(tokenizer.err_msg()) } @@ -570,6 +612,10 @@ impl Parser { Ok(Token::LittleOrEqual(_)) => ParseToken::Filter(FilterToken::LittleOrEqual), Ok(Token::Greater(_)) => ParseToken::Filter(FilterToken::Greater), Ok(Token::GreaterOrEqual(_)) => ParseToken::Filter(FilterToken::GreaterOrEqual), + Ok(Token::Key(_, key)) => match Self::get_filter_token(&key) { + Some(filter_token) => ParseToken::Filter(filter_token), + _ => return Err(tokenizer.err_msg()) + } _ => { return Err(tokenizer.err_msg()); } @@ -609,10 +655,19 @@ impl Parser { _ => Err(tokenizer.err_msg()), } } + + fn get_filter_token(op_candidate: &str) -> Option { + match op_candidate { + "in" | "In" | "iN" | "IN" => Some(FilterToken::In), + _ => None + } + } } pub trait NodeVisitor { - fn visit(&mut self, node: &Node) { + fn visit(&mut self, node: &Node, indent: usize, prefix: &str) { + debug!("{:indent$}{} {:?}", "", prefix, node.token, indent = indent); + match &node.token { ParseToken::Absolute | ParseToken::Relative @@ -627,48 +682,48 @@ pub trait NodeVisitor { } ParseToken::In | ParseToken::Leaves => { if let Some(n) = &node.left { - self.visit(&*n); + self.visit(&*n, indent + 1, "1.LEFT"); } self.visit_token(&node.token); if let Some(n) = &node.right { - self.visit(&*n); + self.visit(&*n, indent + 1, "1.RIGHT"); } } ParseToken::Array => { if let Some(n) = &node.left { - self.visit(&*n); + self.visit(&*n, indent + 1, "2.LEFT"); } self.visit_token(&node.token); if let Some(n) = &node.right { - self.visit(&*n); + self.visit(&*n, indent + 1, "2.RIGHT"); } self.visit_token(&ParseToken::ArrayEof); } ParseToken::Filter(FilterToken::And) | ParseToken::Filter(FilterToken::Or) => { if let Some(n) = &node.left { - self.visit(&*n); + self.visit(&*n, indent + 1, "3.LEFT"); } if let Some(n) = &node.right { - self.visit(&*n); + self.visit(&*n, indent + 1, "3.RIGTH"); } self.visit_token(&node.token); } ParseToken::Filter(_) => { if let Some(n) = &node.left { - self.visit(&*n); + self.visit(&*n, indent + 1, "4.LEFT"); } self.end_term(); if let Some(n) = &node.right { - self.visit(&*n); + self.visit(&*n, indent + 1, "4.RIGTH"); } self.end_term(); @@ -702,7 +757,7 @@ mod parser_tests { fn start(&mut self) -> Result, String> { let node = Parser::compile(self.input)?; - self.visit(&node); + self.visit(&node, 0, "-"); Ok(self.stack.split_off(0)) } } diff --git a/src/parser/tokenizer.rs b/src/parser/tokenizer.rs index 3b3215a..7981b77 100644 --- a/src/parser/tokenizer.rs +++ b/src/parser/tokenizer.rs @@ -371,11 +371,11 @@ impl<'a> TokenReader<'a> { pub fn peek_token(&self) -> Result<&Token, TokenError> { match self.tokens.last() { Some((_, t)) => { - trace!("%{:?}", t); + // trace!("[PEEK].{:?}", t); Ok(t) } _ => { - trace!("%{:?}", self.err); + // trace!("[PEEK]!{:?}", self.err); Err(self.err.clone()) } } @@ -385,11 +385,11 @@ impl<'a> TokenReader<'a> { match self.tokens.pop() { Some((pos, t)) => { self.curr_pos = Some(pos); - trace!("@{:?}", t); + trace!("[NEXT].{:?}", t); Ok(t) } _ => { - trace!("@{:?}", self.err); + trace!("[NEXT]!{:?}", self.err); Err(self.err.clone()) } } diff --git a/src/select/mod.rs b/src/select/mod.rs index 142b973..26cb36e 100644 --- a/src/select/mod.rs +++ b/src/select/mod.rs @@ -2,8 +2,8 @@ use std::collections::HashSet; use std::fmt; use array_tool::vec::{Intersect, Union}; -use serde_json::map::Entry; use serde_json::{Number, Value}; +use serde_json::map::Entry; use parser::*; @@ -31,6 +31,14 @@ trait Cmp { } } +trait CmpRight { + fn cmp<'a>(&self, v1: &[&'a Value], v2: &[String]) -> Vec<&'a Value>; + + fn default(&self) -> bool { + false + } +} + struct CmpEq; impl Cmp for CmpEq { @@ -191,11 +199,40 @@ impl Cmp for CmpOr { } } +struct CmpIn; + +impl CmpRight for CmpIn { + fn cmp<'a>(&self, v1: &[&'a Value], v2: &[String]) -> Vec<&'a Value> { + v1.iter() + .filter(|v| match v { + Value::Object(map) => { + for value in map.values() { + if match value { + Value::String(s) => v2.contains(&s), + _ => false + } { + return true; + } + } + + false + } + Value::String(s) => { + v2.contains(s) + } + _ => false + }) + .map(|v| *v) + .collect() + } +} + #[derive(Debug, PartialEq)] enum ExprTerm<'a> { String(String), Number(Number), Bool(bool), + Array(Vec), Json(Option>, Option, Vec<&'a Value>), } @@ -222,6 +259,9 @@ impl<'a> ExprTerm<'a> { ExprTerm::Json(_, _, _) => other.cmp(&self, reverse_cmp_fn, cmp_fn), _ => ExprTerm::Bool(cmp_fn.default()), }, + ExprTerm::Array(_) => { + unreachable!("#ExprTerm::Array, - unreachable!!"); + } ExprTerm::Json(rel, fk1, vec1) => { let ret: Vec<&Value> = match &other { ExprTerm::String(s2) => vec1 @@ -272,6 +312,9 @@ impl<'a> ExprTerm<'a> { }) .cloned() .collect(), + ExprTerm::Array(_) => { + unreachable!("#ExprTerm::Json, ExprTerm::Array unreachable!!"); + } ExprTerm::Json(parent, _, vec2) => { if let Some(vec1) = rel { cmp_fn.cmp_json(vec1, vec2) @@ -294,6 +337,23 @@ impl<'a> ExprTerm<'a> { } } + fn cmp_right(&self, other: &Self, cmp_fn: &C) -> ExprTerm<'a> { + match &self { + ExprTerm::Json(rel, _, vec) => { + if let ExprTerm::Array(keys) = &other { + if let Some(parent) = rel { + ExprTerm::Json(Some(parent.to_vec()), None, cmp_fn.cmp(&vec, keys)) + } else { + ExprTerm::Json(None, None, cmp_fn.cmp(&vec, keys)) + } + } else { + unreachable!("#cmp_right") + } + } + _ => ExprTerm::Bool(cmp_fn.default()) + } + } + fn eq(&self, other: &Self, ret: &mut Option>) { debug!("eq - {:?} : {:?}", &self, &other); let _ = ret.take(); @@ -342,6 +402,14 @@ impl<'a> ExprTerm<'a> { *ret = Some(tmp); } + fn inn(&self, other: &Self, ret: &mut Option>) { + debug!("in - {:?} : {:?}", &self, &other); + let _ = ret.take(); + let tmp = self.cmp_right(other, &CmpIn); + debug!("in = {:?}", tmp); + *ret = Some(tmp); + } + fn and(&self, other: &Self, ret: &mut Option>) { debug!("and - {:?} : {:?}", &self, &other); let _ = ret.take(); @@ -529,7 +597,7 @@ impl<'a, 'b> Selector<'a, 'b> { fn _select(&mut self) -> Result<(), JsonPathError> { if self.node_ref.is_some() { let node_ref = self.node_ref.take().unwrap(); - self.visit(node_ref); + self.visit(node_ref, 0, "-"); return Ok(()); } @@ -538,7 +606,7 @@ impl<'a, 'b> Selector<'a, 'b> { } let node = self.node.take().unwrap(); - self.visit(&node); + self.visit(&node, 0, "-"); self.node = Some(node); Ok(()) @@ -954,7 +1022,9 @@ impl<'a, 'b> Selector<'a, 'b> { fn visit_keys(&mut self, keys: &[String]) { if !self.terms.is_empty() { - unimplemented!("keys in filter"); + debug!("\t - array in key {:?}", self.tokens); + self.terms.push(Some(ExprTerm::Array(keys.to_vec()))); + return; } if let Some(ParseToken::Array) = self.tokens.pop() { @@ -964,7 +1034,7 @@ impl<'a, 'b> Selector<'a, 'b> { } } - fn visit_filter(&mut self, ft: &FilterToken) { + fn visit_filter(&mut self, filter_token: &FilterToken) { let right = match self.terms.pop() { Some(Some(right)) => right, Some(None) => ExprTerm::Json( @@ -992,13 +1062,14 @@ impl<'a, 'b> Selector<'a, 'b> { }; let mut ret = None; - match ft { + match filter_token { FilterToken::Equal => left.eq(&right, &mut ret), FilterToken::NotEqual => left.ne(&right, &mut ret), FilterToken::Greater => left.gt(&right, &mut ret), FilterToken::GreaterOrEqual => left.ge(&right, &mut ret), FilterToken::Little => left.lt(&right, &mut ret), FilterToken::LittleOrEqual => left.le(&right, &mut ret), + FilterToken::In => left.inn(&right, &mut ret), FilterToken::And => left.and(&right, &mut ret), FilterToken::Or => left.or(&right, &mut ret), }; @@ -1095,8 +1166,7 @@ impl<'a, 'b> NodeVisitor for Selector<'a, 'b> { ParseToken::Key(key) => self.visit_key(key), ParseToken::Keys(keys) => self.visit_keys(keys), ParseToken::Number(v) => { - self.terms - .push(Some(ExprTerm::Number(Number::from_f64(*v).unwrap()))); + self.terms.push(Some(ExprTerm::Number(Number::from_f64(*v).unwrap()))); } ParseToken::Filter(ref ft) => self.visit_filter(ft), ParseToken::Range(from, to, step) => self.visit_range(from, to, step), diff --git a/tests/extended_filter.rs b/tests/extended_filter.rs new file mode 100644 index 0000000..8fb09c2 --- /dev/null +++ b/tests/extended_filter.rs @@ -0,0 +1,34 @@ +#[macro_use] +extern crate serde_json; + +use common::{select_and_then_compare, setup}; + +mod common; + +#[test] +fn extended_filter_in() { + setup(); + + select_and_then_compare( + "$..[?(@.size in ['M', 'L', 0])]", + json!({ + "red" : { + "size": "M" + }, + "blue" : { + "size" : "L" + }, + "yellow" : { + "size" : "XL" + } + }), + json!([ + { + "size" : "M" + }, + { + "size" : "L" + } + ]), + ); +} \ No newline at end of file