-
Notifications
You must be signed in to change notification settings - Fork 101
fix(datadog search): Fix improper handling of conjunctions by the datadog query parser #1542
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 2 commits
595a4a3
e82fc30
330b6a2
6f62a5a
4901a11
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| fix handling of OR conjunctions in the datadog search query parser |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,9 +3,9 @@ use itertools::Itertools; | |
| use pest::iterators::Pair; | ||
| use pest_derive::Parser; | ||
|
|
||
| use super::node::{ | ||
| BooleanBuilder, Comparison, ComparisonValue, LuceneClause, LuceneOccur, QueryNode, Range, | ||
| }; | ||
| use crate::datadog_search_syntax::BooleanType; | ||
|
|
||
| use super::node::{Comparison, ComparisonValue, QueryNode, Range}; | ||
|
|
||
| #[derive(Debug, Parser)] | ||
| #[grammar = "src/datadog/search/grammar.pest"] | ||
|
|
@@ -21,6 +21,19 @@ const MISSING_FIELD: &str = "_missing_"; | |
| /// via a Visitor pattern and walking our way through the syntax tree. | ||
| pub struct QueryVisitor; | ||
|
|
||
| /// Group a list of nodes into a single node, using the given conjunction. | ||
| /// If the group has only one node, return it as-is. | ||
| fn group_nodes(mut node_group: Vec<QueryNode>, conjunction: BooleanType) -> QueryNode { | ||
| if node_group.len() == 1 { | ||
| return node_group.pop().unwrap(); | ||
| } | ||
|
|
||
| QueryNode::Boolean { | ||
| oper: conjunction, | ||
| nodes: node_group, | ||
| } | ||
| } | ||
|
|
||
| impl QueryVisitor { | ||
| pub fn visit_queryroot(token: Pair<Rule>, default_field: &str) -> QueryNode { | ||
| let contents = token.into_inner().next().unwrap(); | ||
|
|
@@ -33,32 +46,28 @@ impl QueryVisitor { | |
|
|
||
| fn visit_query(token: Pair<Rule>, default_field: &str) -> QueryNode { | ||
| let contents = token.into_inner(); | ||
| let mut clauses: Vec<LuceneClause> = Vec::new(); | ||
| let mut modifier: Option<LuceneOccur> = None; | ||
| let mut is_not: bool = false; | ||
|
|
||
| // AND takes precedence over OR. | ||
| // We will combine each consecutive clause in an AND group, | ||
| // and create a new and_group every time we encounter an OR. | ||
| // Finally, we will combine all the and_groups with OR. | ||
|
|
||
| let mut and_groups: Vec<QueryNode> = Vec::new(); | ||
|
|
||
| let mut and_group: Vec<QueryNode> = Vec::new(); | ||
|
|
||
| for node in contents { | ||
| let clause: Option<LuceneClause> = match node.as_rule() { | ||
| let query_node: Option<QueryNode> = match node.as_rule() { | ||
| Rule::multiterm => Some(Self::visit_multiterm(node, default_field)), | ||
| Rule::conjunction => { | ||
| let inner = node.into_inner().next().unwrap(); | ||
| match inner.as_rule() { | ||
| Rule::AND => { | ||
| // If our conjunction is AND and the previous clause was | ||
| // just a SHOULD, we make the previous clause a MUST and | ||
| // our new clause will also be a MUST | ||
| let lastitem = clauses.last_mut().unwrap(); | ||
| if let LuceneOccur::Should = lastitem.occur { | ||
| lastitem.occur = LuceneOccur::Must; | ||
| }; | ||
| } | ||
| Rule::AND => (), | ||
| Rule::OR => { | ||
| // If our conjunction is OR and the previous clause was | ||
| // a MUST, we make the previous clause a SHOULD and our | ||
| // new clause will also be a SHOULD | ||
| let lastitem = clauses.last_mut().unwrap(); | ||
| if let LuceneOccur::Must = lastitem.occur { | ||
| lastitem.occur = LuceneOccur::Should; | ||
| }; | ||
| modifier.get_or_insert(LuceneOccur::Should); | ||
| // close the current and_group and create a new one | ||
| and_groups.push(group_nodes(and_group, BooleanType::And)); | ||
bruceg marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| and_group = Vec::new(); | ||
bruceg marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } | ||
| _ => unreachable!(), | ||
| }; | ||
|
|
@@ -69,77 +78,42 @@ impl QueryVisitor { | |
| match inner.as_rule() { | ||
| Rule::PLUS => (), | ||
| Rule::NOT => { | ||
| modifier = Some(LuceneOccur::MustNot); | ||
| is_not = true; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this need to toggle? i.e.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Like, a query containing "NOT NOT"? I just checked, the lexer does not allow it (it fails on "NOT -foo" and interprets NOT NOT foo as NOT "NOT" foo)
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That still makes me nervous, given the prevalence of double-negation in other such expression parsers, but it's good to know it's at least not wrong. |
||
| } | ||
| _ => unreachable!(), | ||
| }; | ||
| None | ||
| } | ||
| Rule::clause => { | ||
| let query_node = Self::visit_clause(node, default_field); | ||
| Some(LuceneClause { | ||
| occur: modifier.take().unwrap_or(LuceneOccur::Must), | ||
| node: query_node, | ||
| }) | ||
| } | ||
| Rule::clause => Some(Self::visit_clause(node, default_field)), | ||
| _ => unreachable!(), | ||
| }; | ||
| // If we found a clause to add to our list, add it | ||
| if let Some(c) = clause { | ||
| clauses.push(c); | ||
| } | ||
| } | ||
| if clauses.len() == 1 { | ||
| let single = clauses.pop().unwrap(); | ||
| match single { | ||
| LuceneClause { | ||
| occur: LuceneOccur::MustNot, | ||
| node: QueryNode::MatchAllDocs, | ||
| } => QueryNode::MatchNoDocs, | ||
| // I hate Boxing! Every allocation is a personal failing :( | ||
| LuceneClause { | ||
| occur: LuceneOccur::MustNot, | ||
| node, | ||
| } => QueryNode::NegatedNode { | ||
| node: Box::new(node), | ||
| }, | ||
| LuceneClause { occur: _, node } => node, | ||
| } | ||
| } else { | ||
| let mut and_builder = BooleanBuilder::and(); | ||
| let mut or_builder = BooleanBuilder::or(); | ||
| let (mut has_must, mut has_must_not, mut has_should) = (false, false, false); | ||
| for c in clauses { | ||
| let LuceneClause { node, occur } = c; | ||
| match occur { | ||
| LuceneOccur::Must => { | ||
| and_builder.add_node(node); | ||
| has_must = true; | ||
| } | ||
| LuceneOccur::MustNot => { | ||
| and_builder.add_node(QueryNode::NegatedNode { | ||
| node: Box::new(node), | ||
| }); | ||
| has_must_not = true; | ||
| } | ||
| LuceneOccur::Should => { | ||
| or_builder.add_node(node); | ||
| has_should = true; | ||
| } | ||
| if let Some(mut n) = query_node { | ||
| if is_not { | ||
| is_not = false; | ||
bruceg marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| n = QueryNode::NegatedNode { node: Box::new(n) } | ||
| } | ||
|
|
||
| and_group.push(n); | ||
| } | ||
| if has_must || !has_should { | ||
| and_builder.build() | ||
| } else if !has_must_not { | ||
| or_builder.build() | ||
| } else { | ||
| and_builder.add_node(or_builder.build()); | ||
| and_builder.build() | ||
| } | ||
|
|
||
| and_groups.push(group_nodes(and_group, BooleanType::And)); | ||
| let query_node = group_nodes(and_groups, BooleanType::Or); | ||
|
|
||
| if let QueryNode::NegatedNode { node } = query_node { | ||
| // if the node is a negated MatchAllDocs, return MatchNoDocs | ||
| if let QueryNode::MatchAllDocs = *node { | ||
| return QueryNode::MatchNoDocs; | ||
| } | ||
| return QueryNode::NegatedNode { node }; | ||
| } | ||
|
|
||
| query_node | ||
| } | ||
|
|
||
| fn visit_multiterm(token: Pair<Rule>, default_field: &str) -> LuceneClause { | ||
| fn visit_multiterm(token: Pair<Rule>, default_field: &str) -> QueryNode { | ||
| let contents = token.into_inner(); | ||
| let mut terms: Vec<String> = Vec::new(); | ||
| for node in contents { | ||
|
|
@@ -149,12 +123,9 @@ impl QueryVisitor { | |
| _ => unreachable!(), | ||
| } | ||
| } | ||
| LuceneClause { | ||
| occur: LuceneOccur::Must, | ||
| node: QueryNode::AttributeTerm { | ||
| attr: String::from(default_field), | ||
| value: terms.join(" "), | ||
| }, | ||
| QueryNode::AttributeTerm { | ||
| attr: String::from(default_field), | ||
| value: terms.join(" "), | ||
| } | ||
| } | ||
|
|
||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.