Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/1542.fix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
fix handling of OR conjunctions in the datadog search query parser
143 changes: 57 additions & 86 deletions src/datadog/search/grammar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ use itertools::Itertools;
use pest::iterators::Pair;
use pest_derive::Parser;

use super::node::{
BooleanBuilder, Comparison, ComparisonValue, LuceneClause, LuceneOccur, QueryNode, Range,
};
use crate::datadog_search_syntax::BooleanType;

use super::node::{Comparison, ComparisonValue, QueryNode, Range};

#[derive(Debug, Parser)]
#[grammar = "src/datadog/search/grammar.pest"]
Expand All @@ -21,6 +21,19 @@ const MISSING_FIELD: &str = "_missing_";
/// via a Visitor pattern and walking our way through the syntax tree.
pub struct QueryVisitor;

/// Group a list of nodes into a single node, using the given conjunction.
/// If the group has only one node, return it as-is.
fn group_nodes(mut node_group: Vec<QueryNode>, conjunction: BooleanType) -> QueryNode {
if node_group.len() == 1 {
return node_group.pop().unwrap();
}

QueryNode::Boolean {
oper: conjunction,
nodes: node_group,
}
}

impl QueryVisitor {
pub fn visit_queryroot(token: Pair<Rule>, default_field: &str) -> QueryNode {
let contents = token.into_inner().next().unwrap();
Expand All @@ -33,32 +46,28 @@ impl QueryVisitor {

fn visit_query(token: Pair<Rule>, default_field: &str) -> QueryNode {
let contents = token.into_inner();
let mut clauses: Vec<LuceneClause> = Vec::new();
let mut modifier: Option<LuceneOccur> = None;
let mut is_not: bool = false;

// AND takes precedence over OR.
// We will combine each consecutive clause in an AND group,
// and create a new and_group every time we encounter an OR.
// Finally, we will combine all the and_groups with OR.

let mut and_groups: Vec<QueryNode> = Vec::new();

let mut and_group: Vec<QueryNode> = Vec::new();

for node in contents {
let clause: Option<LuceneClause> = match node.as_rule() {
let query_node: Option<QueryNode> = match node.as_rule() {
Rule::multiterm => Some(Self::visit_multiterm(node, default_field)),
Rule::conjunction => {
let inner = node.into_inner().next().unwrap();
match inner.as_rule() {
Rule::AND => {
// If our conjunction is AND and the previous clause was
// just a SHOULD, we make the previous clause a MUST and
// our new clause will also be a MUST
let lastitem = clauses.last_mut().unwrap();
if let LuceneOccur::Should = lastitem.occur {
lastitem.occur = LuceneOccur::Must;
};
}
Rule::AND => (),
Rule::OR => {
// If our conjunction is OR and the previous clause was
// a MUST, we make the previous clause a SHOULD and our
// new clause will also be a SHOULD
let lastitem = clauses.last_mut().unwrap();
if let LuceneOccur::Must = lastitem.occur {
lastitem.occur = LuceneOccur::Should;
};
modifier.get_or_insert(LuceneOccur::Should);
// close the current and_group and create a new one
and_groups.push(group_nodes(and_group, BooleanType::And));
and_group = Vec::new();
}
_ => unreachable!(),
};
Expand All @@ -69,77 +78,42 @@ impl QueryVisitor {
match inner.as_rule() {
Rule::PLUS => (),
Rule::NOT => {
modifier = Some(LuceneOccur::MustNot);
is_not = true;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this need to toggle? i.e. is_not = !is_not;

Copy link
Author

@gwenaskell gwenaskell Oct 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Like, a query containing "NOT NOT"? I just checked, the lexer does not allow it (it fails on "NOT -foo" and interprets NOT NOT foo as NOT "NOT" foo)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That still makes me nervous, given the prevalence of double-negation in other such expression parsers, but it's good to know it's at least not wrong.

}
_ => unreachable!(),
};
None
}
Rule::clause => {
let query_node = Self::visit_clause(node, default_field);
Some(LuceneClause {
occur: modifier.take().unwrap_or(LuceneOccur::Must),
node: query_node,
})
}
Rule::clause => Some(Self::visit_clause(node, default_field)),
_ => unreachable!(),
};
// If we found a clause to add to our list, add it
if let Some(c) = clause {
clauses.push(c);
}
}
if clauses.len() == 1 {
let single = clauses.pop().unwrap();
match single {
LuceneClause {
occur: LuceneOccur::MustNot,
node: QueryNode::MatchAllDocs,
} => QueryNode::MatchNoDocs,
// I hate Boxing! Every allocation is a personal failing :(
LuceneClause {
occur: LuceneOccur::MustNot,
node,
} => QueryNode::NegatedNode {
node: Box::new(node),
},
LuceneClause { occur: _, node } => node,
}
} else {
let mut and_builder = BooleanBuilder::and();
let mut or_builder = BooleanBuilder::or();
let (mut has_must, mut has_must_not, mut has_should) = (false, false, false);
for c in clauses {
let LuceneClause { node, occur } = c;
match occur {
LuceneOccur::Must => {
and_builder.add_node(node);
has_must = true;
}
LuceneOccur::MustNot => {
and_builder.add_node(QueryNode::NegatedNode {
node: Box::new(node),
});
has_must_not = true;
}
LuceneOccur::Should => {
or_builder.add_node(node);
has_should = true;
}
if let Some(mut n) = query_node {
if is_not {
is_not = false;

n = QueryNode::NegatedNode { node: Box::new(n) }
}

and_group.push(n);
}
if has_must || !has_should {
and_builder.build()
} else if !has_must_not {
or_builder.build()
} else {
and_builder.add_node(or_builder.build());
and_builder.build()
}

and_groups.push(group_nodes(and_group, BooleanType::And));
let query_node = group_nodes(and_groups, BooleanType::Or);

if let QueryNode::NegatedNode { node } = query_node {
// if the node is a negated MatchAllDocs, return MatchNoDocs
if let QueryNode::MatchAllDocs = *node {
return QueryNode::MatchNoDocs;
}
return QueryNode::NegatedNode { node };
}

query_node
}

fn visit_multiterm(token: Pair<Rule>, default_field: &str) -> LuceneClause {
fn visit_multiterm(token: Pair<Rule>, default_field: &str) -> QueryNode {
let contents = token.into_inner();
let mut terms: Vec<String> = Vec::new();
for node in contents {
Expand All @@ -149,12 +123,9 @@ impl QueryVisitor {
_ => unreachable!(),
}
}
LuceneClause {
occur: LuceneOccur::Must,
node: QueryNode::AttributeTerm {
attr: String::from(default_field),
value: terms.join(" "),
},
QueryNode::AttributeTerm {
attr: String::from(default_field),
value: terms.join(" "),
}
}

Expand Down
53 changes: 0 additions & 53 deletions src/datadog/search/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,45 +94,6 @@ pub enum BooleanType {
Or,
}

/// Builder structure to create Boolean QueryNodes. Not strictly necessary,
/// however they're a bit more ergonomic to manipulate than reaching into
/// enums all the time.
pub struct BooleanBuilder {
/// The type of Boolean operation this node will represent.
oper: BooleanType,
/// A list of QueryNodes involved in this boolean operation.
nodes: Vec<QueryNode>,
}

impl BooleanBuilder {
/// Create a BooleanBuilder to produce an AND-type Boolean QueryNode.
pub fn and() -> Self {
Self {
oper: BooleanType::And,
nodes: vec![],
}
}

/// Create a BooleanBuilder to produce an OR-type Boolean QueryNode.
pub fn or() -> Self {
Self {
oper: BooleanType::Or,
nodes: vec![],
}
}

/// Add a QueryNode to this boolean conjunction.
pub fn add_node(&mut self, node: QueryNode) {
self.nodes.push(node);
}

/// Consume this builder and output the finished QueryNode.
pub fn build(self) -> QueryNode {
let Self { oper, nodes } = self;
QueryNode::Boolean { oper, nodes }
}
}

/// QueryNodes represent specific search criteria to be enforced.
#[derive(Clone, Debug, PartialEq)]
pub enum QueryNode {
Expand Down Expand Up @@ -368,20 +329,6 @@ impl Serialize for QueryNode {
}
}

/// Enum representing Lucene's concept of whether a node should occur.
#[derive(Debug)]
pub enum LuceneOccur {
Must,
Should,
MustNot,
}

#[derive(Debug)]
pub struct LuceneClause {
pub occur: LuceneOccur,
pub node: QueryNode,
}

static ESCAPE_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new("^\"(.+)\"$").unwrap());

/// Escapes surrounding `"` quotes when distinguishing between quoted terms isn't needed.
Expand Down
Loading
Loading