Skip to content

Commit b388a87

Browse files
committed
chore: enforce find by similarity logic and typesense collection
1 parent fee1339 commit b388a87

File tree

5 files changed

+80
-39
lines changed

5 files changed

+80
-39
lines changed

internal/app/feed.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@ type FeedRepository interface {
88
// Search returns the results of a search query.
99
FindByKeyword(query string) ([]Feed, error)
1010
// Search returns the results of a search query by similarity.
11-
FindBySimilarity(doc Feed) ([]Feed, error)
11+
FindBySimilarity(feedID string) ([]Feed, error)
12+
// FindByID returns a document by its ID.
1213
FindByID(id string) (Feed, error)
1314
// Index indexes a document.
1415
Save(doc Feed) error
@@ -21,7 +22,7 @@ type FeedRepository interface {
2122
}
2223

2324
type Feed struct {
24-
ID string `json:"id"`
25+
FeedID string `json:"feed_id"`
2526
Title string `json:"title"`
2627
Link string `json:"link"`
2728
Language string `json:"language"`

internal/repository/typesense/feed.go

Lines changed: 52 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ func (f *FeedRepository) FindByKeyword(query string) ([]app.Feed, error) {
4444
}
4545

4646
f := app.Feed{
47-
ID: doc["id"].(string),
47+
FeedID: doc["feedID"].(string),
4848
Title: doc["title"].(string),
4949
Link: doc["link"].(string),
5050
Source: doc["source"].(string),
@@ -60,45 +60,78 @@ func (f *FeedRepository) FindByKeyword(query string) ([]app.Feed, error) {
6060
}
6161

6262
func (f *FeedRepository) FindByID(id string) (app.Feed, error) {
63-
d, err := f.client.Collection("feeds").Document(id).Retrieve(f.ctx)
63+
searchParameters := &api.SearchCollectionParams{
64+
Q: id,
65+
QueryBy: "feedID",
66+
}
67+
searchResult, err := f.client.Collection("feeds").Documents().Search(f.ctx, searchParameters)
6468
if err != nil {
6569
return app.Feed{}, err
6670
}
6771

68-
date, err := time.Parse(time.RFC3339, d["date"].(string))
72+
if searchResult.Hits == nil || len(*searchResult.Hits) == 0 {
73+
return app.Feed{}, fmt.Errorf("feed with id %s not found", id)
74+
}
75+
76+
doc := *(*searchResult.Hits)[0].Document
77+
78+
date, err := time.Parse(time.RFC3339, doc["date"].(string))
6979
if err != nil {
7080
return app.Feed{}, err
7181
}
7282

7383
fStruct := app.Feed{
74-
ID: d["id"].(string),
75-
Title: d["title"].(string),
76-
Link: d["link"].(string),
77-
Source: d["source"].(string),
78-
Language: d["language"].(string),
79-
Summary: d["summary"].(string),
84+
FeedID: doc["feedID"].(string),
85+
Title: doc["title"].(string),
86+
Link: doc["link"].(string),
87+
Source: doc["source"].(string),
88+
Language: doc["language"].(string),
89+
Summary: doc["summary"].(string),
8090
Date: date,
8191
}
8292

8393
return fStruct, nil
8494
}
8595

86-
func (f *FeedRepository) FindBySimilarity(feed app.Feed) ([]app.Feed, error) {
87-
vQ := fmt.Sprintf("title_summary_embedding:([], id: %s, distance_threshold:0.183)", feed.ID)
88-
eF := "title_summary_embedding"
96+
func (f *FeedRepository) FindBySimilarity(feedID string) ([]app.Feed, error) {
97+
feed, err := f.FindByID(feedID)
98+
if err != nil {
99+
return nil, fmt.Errorf("failed to find feed by ID: %w", err)
100+
}
101+
89102
searchParameters := &api.SearchCollectionParams{
90-
Q: feed.Title,
91-
QueryBy: "title,title_summary_embedding",
92-
VectorQuery: &vQ,
93-
ExcludeFields: &eF,
103+
Q: feed.Title + " " + feed.Summary,
104+
QueryBy: "title_summary_embedding",
94105
}
95106
searchResult, err := f.client.Collection("feeds").Documents().Search(f.ctx, searchParameters)
96107
if err != nil {
97108
return nil, err
98109
}
99110

111+
maxVectorDistance := float32(0.16576248) // Define a threshold for vector distance
112+
hits := *searchResult.Hits
113+
n := 0
114+
for _, hit := range hits {
115+
if hit.VectorDistance == nil || *hit.VectorDistance <= maxVectorDistance {
116+
hits[n] = hit
117+
n++
118+
}
119+
}
120+
hits = hits[:n]
121+
searchResult.Hits = &hits
122+
100123
feeds := make([]app.Feed, len(*searchResult.Hits))
101124
for i, x := range *searchResult.Hits {
125+
// If VectorDistance is present, filter by threshold
126+
if x.Document != nil && x.VectorDistance != nil {
127+
doc := *x.Document
128+
title, _ := doc["title"].(string)
129+
fmt.Printf("Title: %s, VectorDistance: %v\n", title, *x.VectorDistance)
130+
}
131+
if x.VectorDistance != nil && *x.VectorDistance > maxVectorDistance {
132+
continue
133+
}
134+
102135
doc := *x.Document
103136

104137
date, err := time.Parse(time.RFC3339, doc["date"].(string))
@@ -107,6 +140,7 @@ func (f *FeedRepository) FindBySimilarity(feed app.Feed) ([]app.Feed, error) {
107140
}
108141

109142
f := app.Feed{
143+
FeedID: doc["feedID"].(string),
110144
Title: doc["title"].(string),
111145
Link: doc["link"].(string),
112146
Source: doc["source"].(string),
@@ -123,7 +157,7 @@ func (f *FeedRepository) FindBySimilarity(feed app.Feed) ([]app.Feed, error) {
123157

124158
func (f *FeedRepository) Save(doc app.Feed) error {
125159
docMap := map[string]interface{}{
126-
"id": generateUniqueID(doc.Link),
160+
"feedID": generateUniqueID(doc.Link),
127161
"title": doc.Title,
128162
"link": doc.Link,
129163
"source": doc.Source,
@@ -147,7 +181,7 @@ func (f *FeedRepository) Update(docs ...app.Feed) error {
147181
for i, doc := range docs {
148182
// Convert app.Feed to map[string]interface{} for updating
149183
docMap := map[string]interface{}{
150-
"id": generateUniqueID(doc.Link),
184+
"feedID": generateUniqueID(doc.Link),
151185
"title": doc.Title,
152186
"link": doc.Link,
153187
"source": doc.Source,

internal/repository/typesense/schema.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@ import (
88
func GetFeedSchema(client *typesense.Client) *api.CollectionSchema {
99
schema := &api.CollectionSchema{
1010
Fields: []api.Field{
11+
{
12+
Name: "feedID",
13+
Type: "string",
14+
},
1115
{
1216
Name: "title",
1317
Type: "string",
@@ -51,7 +55,7 @@ func GetFeedSchema(client *typesense.Client) *api.CollectionSchema {
5155
ModelName string "json:\"model_name\""
5256
ProjectId *string "json:\"project_id,omitempty\""
5357
}{
54-
ModelName: "ts/multilingual-e5-large",
58+
ModelName: "ts/e5-small-v2",
5559
},
5660
},
5761
},

internal/webserver/server.go

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ func (s *Server) GetV1SearchFeedQuery(ctx echo.Context, query string) error {
8888
fi := make([]api.FeedItem, len(feeds))
8989
for i, f := range feeds {
9090
fi[i] = api.FeedItem{
91-
Id: f.ID,
91+
Id: f.FeedID,
9292
Source: f.Source,
9393
Date: f.Date,
9494
Language: f.Language,
@@ -112,19 +112,19 @@ func (s *Server) GetV1SearchFeedSimilarities(ctx echo.Context, feedID string) er
112112
var feeds []app.Feed
113113
var err error
114114

115-
f, err := s.feedRepo.FindByID(feedID)
116-
if err != nil {
117-
e := api.Error{
118-
Code: http.StatusInternalServerError,
119-
Message: "Internal Server Error",
120-
}
115+
// f, err := s.feedRepo.FindByID(feedID)
116+
// if err != nil {
117+
// e := api.Error{
118+
// Code: http.StatusInternalServerError,
119+
// Message: "Internal Server Error",
120+
// }
121121

122-
s.logger.Error("feed search", zap.Error(err))
122+
// s.logger.Error("feed search", zap.Error(err))
123123

124-
return ctx.JSON(http.StatusInternalServerError, e)
125-
}
124+
// return ctx.JSON(http.StatusInternalServerError, e)
125+
// }
126126

127-
feeds, err = s.feedRepo.FindBySimilarity(f)
127+
feeds, err = s.feedRepo.FindBySimilarity(feedID)
128128
if err != nil {
129129
e := api.Error{
130130
Code: http.StatusInternalServerError,
@@ -139,7 +139,7 @@ func (s *Server) GetV1SearchFeedSimilarities(ctx echo.Context, feedID string) er
139139
fi := make([]api.FeedItem, len(feeds))
140140
for i, f := range feeds {
141141
fi[i] = api.FeedItem{
142-
Id: f.ID,
142+
Id: f.FeedID,
143143
Source: f.Source,
144144
Date: f.Date,
145145
Language: f.Language,
@@ -156,19 +156,19 @@ func (s *Server) GetV1SearchFeedSimilarities(ctx echo.Context, feedID string) er
156156
}
157157
}
158158

159-
fItem := api.FeedItem{
159+
/* fItem := api.FeedItem{
160160
Date: f.Date,
161-
Id: f.ID,
161+
Id: f.FeedID,
162162
Language: f.Language,
163163
Link: f.Link,
164164
Source: f.Source,
165165
Summary: f.Summary,
166166
Title: f.Title,
167167
}
168-
168+
*/
169169
fd := api.FeedDetails{
170170
Similarities: &fi,
171-
Source: &fItem,
171+
//Source: &fItem,
172172
}
173173

174174
return ctx.JSON(http.StatusOK, fd)

internal/x/typesense/schema.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,10 @@ func updateCollection(client *typesense.Client, schema *api.CollectionSchema) er
3131

3232
if _, err := client.Collection(schema.Name).Update(context.Background(), u); err != nil {
3333
if strings.Contains(err.Error(), "is already part of the schema") {
34-
// TODO: capture the log of error
35-
return nil
34+
// This error indicates that the field is already part of the schema,
35+
// So we need to delete the collection and recreate it.
36+
_, err := client.Collection(schema.Name).Delete(context.Background())
37+
return err
3638
}
3739

3840
return err

0 commit comments

Comments
 (0)