From 12e3d0fc6943e2708c3aaf08390dd9e0011a01fa Mon Sep 17 00:00:00 2001 From: Luigi Date: Sat, 9 Mar 2024 13:49:44 +0100 Subject: [PATCH 1/3] feat: add by similarity --- Dockerfile | 26 ++++---- Makefile | 5 ++ api/api.gen.go | 50 +++++++++------ api/swagger.yaml | 87 ++++++++++++++------------- internal/app/feed.go | 2 + internal/repository/typesense/feed.go | 36 +++++++++++ internal/webserver/server.go | 11 +++- scripts/deploy.sh | 6 +- 8 files changed, 144 insertions(+), 79 deletions(-) diff --git a/Dockerfile b/Dockerfile index d5d9dda..54407b3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -26,20 +26,20 @@ WORKDIR /data COPY --from=builder /app/main /app/main ARG UNCONDITIONAL_API_SOURCE_REPO -ARG UNCONDITIONAL_API_SOURCE_CLIENT_KEY -ARG UNCONDITIONAL_API_FEED_REPO_HOST -ARG UNCONDITIONAL_API_FEED_REPO_KEY +ARG UNCONDITIONAL_API_FEED_REPO_INDEX ARG UNCONDITIONAL_API_LOG_ENV ENV UNCONDITIONAL_API_SOURCE_REPO=${UNCONDITIONAL_API_SOURCE_REPO} -ENV UNCONDITIONAL_API_SOURCE_CLIENT_KEY=${UNCONDITIONAL_API_SOURCE_CLIENT_KEY} -ENV UNCONDITIONAL_API_FEED_REPO_HOST=${UNCONDITIONAL_API_FEED_REPO_HOST} -ENV UNCONDITIONAL_API_FEED_REPO_KEY=${UNCONDITIONAL_API_FEED_REPO_KEY} +ENV UNCONDITIONAL_API_FEED_REPO_INDEX=${UNCONDITIONAL_API_FEED_REPO_INDEX} ENV UNCONDITIONAL_API_LOG_ENV=${UNCONDITIONAL_API_LOG_ENV} -RUN /app/main index create --name feeds \ - --feed-repo-key="${UNCONDITIONAL_API_FEED_REPO_KEY}" \ - --feed-repo-host="${UNCONDITIONAL_API_FEED_REPO_HOST}" +RUN --mount=type=secret,id=UNCONDITIONAL_API_SOURCE_CLIENT_KEY \ + --mount=type=secret,id=UNCONDITIONAL_API_FEED_REPO_HOST \ + --mount=type=secret,id=UNCONDITIONAL_API_FEED_REPO_KEY \ + UNCONDITIONAL_API_SOURCE_CLIENT_KEY="$(cat /run/secrets/UNCONDITIONAL_API_SOURCE_CLIENT_KEY)" \ + UNCONDITIONAL_API_FEED_REPO_HOST="$(cat /run/secrets/UNCONDITIONAL_API_FEED_REPO_HOST)" \ + UNCONDITIONAL_API_FEED_REPO_KEY="$(cat /run/secrets/UNCONDITIONAL_API_FEED_REPO_KEY)" \ + /app/main index create --name feeds FROM scratch as release COPY --from=certificator /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ @@ -50,18 +50,14 @@ ARG UNCONDITIONAL_API_ADDRESS ARG UNCONDITIONAL_API_ALLOWED_ORIGINS ARG UNCONDITIONAL_API_PORT ARG UNCONDITIONAL_API_SOURCE_REPO -ARG UNCONDITIONAL_API_SOURCE_CLIENT_KEY ARG UNCONDITIONAL_API_LOG_ENV -ARG UNCONDITIONAL_API_FEED_REPO_HOST -ARG UNCONDITIONAL_API_FEED_REPO_KEY +ARG UNCONDITIONAL_API_FEED_REPO_INDEX ENV UNCONDITIONAL_API_ADDRESS=${UNCONDITIONAL_API_ADDRESS} ENV UNCONDITIONAL_API_ALLOWED_ORIGINS=${UNCONDITIONAL_API_ALLOWED_ORIGINS} ENV UNCONDITIONAL_API_PORT=${UNCONDITIONAL_API_PORT} ENV UNCONDITIONAL_API_SOURCE_REPO=${UNCONDITIONAL_API_SOURCE_REPO} -ENV UNCONDITIONAL_API_SOURCE_CLIENT_KEY=${UNCONDITIONAL_API_SOURCE_CLIENT_KEY} ENV UNCONDITIONAL_API_LOG_ENV=${UNCONDITIONAL_API_LOG_ENV} -ENV UNCONDITIONAL_API_FEED_REPO_HOST=${UNCONDITIONAL_API_FEED_REPO_HOST} -ENV UNCONDITIONAL_API_FEED_REPO_KEY=${UNCONDITIONAL_API_FEED_REPO_KEY} +ENV UNCONDITIONAL_API_FEED_REPO_INDEX=${UNCONDITIONAL_API_FEED_REPO_INDEX} ENTRYPOINT ["./app/main","serve", "--address", "0.0.0.0", "--port","8080","--feed-repo-index","feeds"] diff --git a/Makefile b/Makefile index cb0d66d..0aa0799 100644 --- a/Makefile +++ b/Makefile @@ -56,6 +56,11 @@ test-integration: build: @go build --tags=release -o ${_PROJECT_DIRECTORY}/bin/unconditional-server +.PHONY: deploy + +deploy: + @sh ./scripts/deploy.sh + # Helpers check-variable-%: # detection of undefined variables. @[[ "${${*}}" ]] || (echo '*** Please define variable `${*}` ***' && exit 1) diff --git a/api/api.gen.go b/api/api.gen.go index 9b4e2c4..61ab4e3 100644 --- a/api/api.gen.go +++ b/api/api.gen.go @@ -69,6 +69,11 @@ type SourceReleaseVersion struct { Version string `json:"version"` } +// GetV1SearchFeedQueryParams defines parameters for GetV1SearchFeedQuery. +type GetV1SearchFeedQueryParams struct { + BySimilarity *bool `json:"bySimilarity,omitempty"` +} + // GetV1VersionJSONBody defines parameters for GetV1Version. type GetV1VersionJSONBody map[string]interface{} @@ -82,7 +87,7 @@ type ServerInterface interface { GetV1SearchContextQuery(ctx echo.Context, query string) error // (GET /v1/search/feed/{query}) - GetV1SearchFeedQuery(ctx echo.Context, query string) error + GetV1SearchFeedQuery(ctx echo.Context, query string, params GetV1SearchFeedQueryParams) error // Your GET endpoint // (GET /v1/version) GetV1Version(ctx echo.Context) error @@ -120,8 +125,17 @@ func (w *ServerInterfaceWrapper) GetV1SearchFeedQuery(ctx echo.Context) error { return echo.NewHTTPError(http.StatusBadRequest, fmt.Sprintf("Invalid format for parameter query: %s", err)) } + // Parameter object where we will unmarshal all parameters from the context + var params GetV1SearchFeedQueryParams + // ------------- Optional query parameter "bySimilarity" ------------- + + err = runtime.BindQueryParameter("form", true, false, "bySimilarity", ctx.QueryParams(), ¶ms.BySimilarity) + if err != nil { + return echo.NewHTTPError(http.StatusBadRequest, fmt.Sprintf("Invalid format for parameter bySimilarity: %s", err)) + } + // Invoke the callback with all the unmarshalled arguments - err = w.Handler.GetV1SearchFeedQuery(ctx, query) + err = w.Handler.GetV1SearchFeedQuery(ctx, query, params) return err } @@ -171,22 +185,22 @@ func RegisterHandlersWithBaseURL(router EchoRouter, si ServerInterface, baseURL // Base64 encoded, gzipped, json marshaled Swagger object var swaggerSpec = []string{ - "H4sIAAAAAAAC/9SW246jRhPHX2XU33fJGnwYH7jLJpvVKBdRDrtStJqLhi6gPfTB3QVjPOLdowbG4DGe", - "9aw2UnJliy6q/vWrA/1EYiW0kiDRkvCJ2DgDQZu/H4xRxv3RRmkwyKF5HCsG7hcrDSQkXCKkYEjtEQHW", - "0nR4aNFwmZK69oiBXcENMBJ+ORp6rbN779leRVuIkXhk/86i0jlPM3TuOCMhKdKt2WblAxMgZ43PnwHY", - "nehCnqpEjvmYEo8UJv+6QmfkdU6ulBcLUUW4o6La8aqXhyDO1TGKjbhEGUGRhM2Dd8iFY3KmmD+n+H8D", - "CQnJ//y+Zn5XML9nUXskpzItxkvhkZzLh9EDqwoTj79jCyGoqUbPLqF+gbQ16111Qo5hB6q9FtCV4Pd0", - "sRfRMtJpQZMm7B9ATZz9qCTCHn8CpDy350X4RkivkcgKEUnK8+/B6ZnOEVfvfYDq/uh3POvrEM4ioCs2", - "Xa6XyWreITQlmPcFz9lnMJYrObYJhOB4ntF4DFD8YbZZRgFj2MYoe8fXONgWenp72NLyIZ1TUr/k1qnp", - "3Z6gOcvmOjB6FVUHinZp5wccgLnIJHIxvjarI3JOxu/Vdxur3yEHauH49gsWx5Fq5ZyReBuE2WKnponY", - "zDdRettCGBMxMmAWP2k3yuyHq9tktswSvk5xu4kY/5Y2weVer6CcS23Y9rxNymPqp/KGjMayu/Ibldg4", - "f1yUxXynHSoXnctEDT5J5JOMlWQcuZI0H/RrSKaTwDWC0iCp5iQk80kwmRGPaIpZg9Qvp75t5tyP20H3", - "n3YFmKp2pyk0SlwNqHN/5yR9BPw8PdkNv7kXGq+GCkAwloRfngh3Elwk4hFJhZO66yx7gGgK8Lpbwtgu", - "u3fGVitp2x6YBUG7KiSCbNRRrXMeN/r8rW3r2vt7fWxGFlyDmIGNDdfYYvz1F0fx9jtGbq9CI6HuJIKR", - "NL9p5+rm2XC8OzhTsN6ZdQDRnrXuBgVNANhbqum++P/mUnIEYa+6trg7Un0cMGoMrf5TdX1UNp8lbLHP", - "MjOo62BxXa5lv19cZcDie8WqNyV4upfqbuP9YzM4/H5cKNLwmkT+UoW5+fjhzxuQTCsuL63O3SIvD/FB", - "p3LzmHVp2CZa29PNvZ1kiNqGvk81nxTDRTphtPLLKam9oWno+7mKaZ4pi+E6WAeD8+lsNQkmwWTaHdyP", - "61osHjdFsAhuo9s5kLr+OwAA//+q0t+IMg0AAA==", + "H4sIAAAAAAAC/9SXzW7jNhDHXyVge9RasqPEsW7ddrsIeija7S5QLHKgpJHEiF8mR3KcQO9eUJItOZaz", + "ziJ76CmGOJz58zcfZJ5IooRWEiRaEj0RmxQgaPvzgzHKuB/aKA0GGbSfE5WC+4tbDSQiTCLkYEjjEQHW", + "0ny8aNEwmZOm8YiBdcUMpCT6ujf0Omd33s5exfeQIPHIwzuLSnOWF+jcsZRERGSZvTdlfDNXeNX6/B0g", + "vRV9yEOVyJBPKfFIZfi3FTojr3dypjzK461MCobJ+rEa5CGIY3UpxVZcpoygSKL2wztkwjE5Usx2R/zZ", + "QEYi8pM/5MzvE+YPLBqPcCrzajoVHuFMlpMLVlUmmd5jKyGo2U6unUL9DGlnNrjqhezDjlR7HaAzwZcb", + "2GQiCx/y9SW0YT8BNUnxq5IID/gbIGXcHifhOyG9RKKoRCwp42/BaUdnj2vwPkJ1t/c7ferzEGZFZjiF", + "uFxRW/YITQ3mfcV4+gWMZUpOTQIhGB6faDpGeIP8cllXmypc6jZGPTg+x4HYrB9MfoVBXVskzXNuvZrB", + "7QGao9Oc2dTXeVjbe6uu1eNmBOYkk9jF+FavTsg5aL8X97ZWfwMHamG/+xmLfUt1co5IvA5CKEKZJkwo", + "MEncQZgSMdFgFj9r18rpL2eXiXrM+TZYas4fN/H3lEkeLkotsqsVrGx8XCb1/uiH8saMpk53Hqq1kWyV", + "UFiurbl2wd34lpkaXUnks0yUTBkyJSkf1WtE5rPAFYLSIKlmJCKXs2C2IB7RFIsWqV/Pfdv2uZ90je4/", + "rSsw28at5tAqcTmgzv2tk/QR8Mv8YDb85Ta0Xg0VgGAsib4+EeYkuEjEI5IKJ3XdWw4A0VTg9a+EqVl2", + "54ytVtJ2NbAIgm5USATZqqNac5a0+vx72+V18Pdy20wMuBZxCjYxTGOH8c8/HMWrN4zcPYUmQt1KBCMp", + "v+j66mJnOF0dy+RyFdbXpbnCLOvcjRKaAaSvyaa78V9K5S55fS7j7ScmGKeGoft6lMJYKQ5UvkEOGYKw", + "Z71X3OOo2XcWNYZu/1cJhesKZFEuxGJedk/SH9JTfZ2MBuHp2hjmlQsAFt+rdPsqbodzrukn6A/r6fF9", + "dCL342cX+VdV5uLjh38uQKZaMXlqFKsqAR3WqypeLx77Y9g2Wpea9v8AUiBqG/k+1WxWjQfzLKVbv56T", + "xhubRr7PVUJ5oSxGN8FNMFqfL5azYBbM5v3C3Yn7KSjFKriXsQpZQJrmvwAAAP//A98fT4INAAA=", } // GetSwagger returns the content of the embedded swagger specification file diff --git a/api/swagger.yaml b/api/swagger.yaml index d4de152..aeb301f 100644 --- a/api/swagger.yaml +++ b/api/swagger.yaml @@ -1,57 +1,62 @@ openapi: 3.0.2 x-stoplight: - id: 44w9u0405b53e + id: g0km90jnbo4i0 info: title: Unconditional - version: "1.0" + version: '1.0' servers: - - url: "https://api.unconditional.day/v1" - - url: "http://localhost:8080" - - url: "127.0.0.1:8080" + - url: 'https://api.unconditional.day/v1' + - url: 'http://localhost:8080' + - url: '127.0.0.1:8080' paths: - "/v1/search/feed/{query}": + '/v1/search/feed/{query}': get: responses: - "200": + '200': description: OK content: application/json: schema: type: array items: - $ref: "#/components/schemas/FeedItem" - "500": + $ref: '#/components/schemas/FeedItem' + '500': description: Internal Server Error content: application/json: schema: type: object - $ref: "#/components/schemas/Error" - parameters: - - name: query - in: path - required: true - schema: - type: string + $ref: '#/components/schemas/Error' x-stoplight: - id: wosl2fd4xhhrd - "/v1/search/context/{query}": + id: e6uenhk2m21k5 + parameters: + - schema: + type: boolean + in: query + name: bySimilarity + parameters: + - schema: + type: string + name: query + in: path + required: true + '/v1/search/context/{query}': get: responses: - "200": + '200': description: OK content: application/json: schema: type: object - $ref: "#/components/schemas/SearchContextDetails" - "500": + $ref: '#/components/schemas/SearchContextDetails' + '500': description: Internal Server Error content: application/json: schema: type: object - $ref: "#/components/schemas/Error" + $ref: '#/components/schemas/Error' parameters: - name: query in: path @@ -59,22 +64,22 @@ paths: schema: type: string x-stoplight: - id: idoe8qr80ebxd - "/v1/version": + id: 7c394v6kr5tff + /v1/version: get: summary: Your GET endpoint tags: [] responses: - "200": + '200': description: OK content: application/json: schema: type: object - $ref: "#/components/schemas/ServerVersion" + $ref: '#/components/schemas/ServerVersion' operationId: get-v1-version x-stoplight: - id: q4lvzczpgn9wh + id: oucep4v9ubq2z requestBody: content: application/json: @@ -98,7 +103,7 @@ components: type: string image: type: object - $ref: "#/components/schemas/FeedImage" + $ref: '#/components/schemas/FeedImage' date: type: string format: date-time @@ -110,7 +115,7 @@ components: - language - date x-stoplight: - id: xa4xmb6bpguaf + id: kwewfmf4xgq3e FeedImage: type: object properties: @@ -122,11 +127,11 @@ components: - url - title x-stoplight: - id: cmmybtqamyqiy + id: albynchitcqzu SearchContextDetails: type: object x-stoplight: - id: 2bea7d1686f73 + id: fhfrlaebk9ask properties: title: type: string @@ -156,51 +161,51 @@ components: - message - code x-stoplight: - id: ugjrjhvkdmen2 + id: mffsjrkb81ot5 ServerVersion: title: ServerVersion x-stoplight: - id: 24qo1fm939bg5 + id: 4m4ndcimoercb type: object properties: source: - $ref: "#/components/schemas/SourceReleaseVersion" + $ref: '#/components/schemas/SourceReleaseVersion' build: - $ref: "#/components/schemas/ServerBuildVersion" + $ref: '#/components/schemas/ServerBuildVersion' required: - source - build SourceReleaseVersion: title: SourceReleaseVersion x-stoplight: - id: ufsclw4vu3qp5 + id: qrni9cae7qsr6 type: object properties: version: type: string x-stoplight: - id: t6xp7ev3nprdj + id: g42kpmf59e9sb lastUpdatedAt: type: string x-stoplight: - id: 26hfi8gtj9bdi + id: ozgly07pllzwb required: - version - lastUpdatedAt ServerBuildVersion: title: ServerBuildVersion x-stoplight: - id: p7byzats6s3zt + id: a6g4vsjso6ozw type: object properties: commit: type: string x-stoplight: - id: eoik296b0ddt3 + id: 48tl37vuwu47p version: type: string x-stoplight: - id: jup15zjavkg3a + id: mwqxrg5t0vvst required: - commit - version diff --git a/internal/app/feed.go b/internal/app/feed.go index 8ec1116..3a32757 100644 --- a/internal/app/feed.go +++ b/internal/app/feed.go @@ -7,6 +7,8 @@ import ( type FeedRepository interface { // Search returns the results of a search query. Find(query string) ([]Feed, error) + // Search returns the results of a search query by similarity. + FindBySimilarity(query string) ([]Feed, error) // Index indexes a document. Save(doc Feed) error // Update a document in index. diff --git a/internal/repository/typesense/feed.go b/internal/repository/typesense/feed.go index 533722e..fdec117 100644 --- a/internal/repository/typesense/feed.go +++ b/internal/repository/typesense/feed.go @@ -55,6 +55,42 @@ func (f *FeedRepository) Find(query string) ([]app.Feed, error) { return feeds, nil } + +func (f *FeedRepository) FindBySimilarity(query string) ([]app.Feed, error){ + searchParameters := &api.SearchCollectionParams{ + Q: query, + QueryBy: "title_summary_embedding", + } + searchResult, err := f.client.Collection("feeds").Documents().Search(f.ctx, searchParameters) + if err != nil { + return nil, err + } + + feeds := make([]app.Feed, len(*searchResult.Hits)) + for i, x := range *searchResult.Hits { + doc := *x.Document + + date, err := time.Parse(time.RFC3339, doc["date"].(string)) + if err != nil { + return nil, err + } + + f := app.Feed{ + Title: doc["title"].(string), + Link: doc["link"].(string), + Source: doc["source"].(string), + Language: doc["language"].(string), + Summary: doc["summary"].(string), + Date: date, + } + + feeds[i] = f + } + + return feeds, nil +} + + func (f *FeedRepository) Save(doc app.Feed) error { docMap := map[string]interface{}{ "id": doc.Link, diff --git a/internal/webserver/server.go b/internal/webserver/server.go index d5dc853..c58362c 100644 --- a/internal/webserver/server.go +++ b/internal/webserver/server.go @@ -67,8 +67,15 @@ func (s *Server) Start() error { } // (GET /v1/search/feed/{query}) -func (s *Server) GetV1SearchFeedQuery(ctx echo.Context, query string) error { - feeds, err := s.feedRepo.Find(query) +func (s *Server) GetV1SearchFeedQuery(ctx echo.Context, query string, params api.GetV1SearchFeedQueryParams) error { + var feeds []app.Feed + var err error + + if params.BySimilarity != nil && *params.BySimilarity { + feeds, err = s.feedRepo.FindBySimilarity(query) + } else { + feeds, err = s.feedRepo.Find(query) + } if err != nil { e := api.Error{ Code: http.StatusInternalServerError, diff --git a/scripts/deploy.sh b/scripts/deploy.sh index eaddca4..f17668e 100755 --- a/scripts/deploy.sh +++ b/scripts/deploy.sh @@ -9,9 +9,9 @@ fi # Deploy flyctl deploy --remote-only \ - --build-arg UNCONDITIONAL_API_SOURCE_CLIENT_KEY="$UNCONDITIONAL_API_SOURCE_CLIENT_KEY" \ - --build-arg UNCONDITIONAL_API_FEED_REPO_KEY="$UNCONDITIONAL_API_FEED_REPO_KEY" \ - --build-arg UNCONDITIONAL_API_FEED_REPO_HOST="$UNCONDITIONAL_API_FEED_REPO_HOST" \ + --build-secret UNCONDITIONAL_API_SOURCE_CLIENT_KEY="$UNCONDITIONAL_API_SOURCE_CLIENT_KEY" \ + --build-secret UNCONDITIONAL_API_FEED_REPO_KEY="$UNCONDITIONAL_API_FEED_REPO_KEY" \ + --build-secret UNCONDITIONAL_API_FEED_REPO_HOST="$UNCONDITIONAL_API_FEED_REPO_HOST" \ --build-arg UNCONDITIONAL_API_FEED_REPO_INDEX="$UNCONDITIONAL_API_FEED_REPO_INDEX" \ --build-arg UNCONDITIONAL_API_BUILD_COMMIT_VERSION="$UNCONDITIONAL_API_BUILD_COMMIT_VERSION" \ --build-arg UNCONDITIONAL_API_BUILD_RELEASE_VERSION="$UNCONDITIONAL_API_BUILD_RELEASE_VERSION" From fee1339a0a4df00ae759786b060ccf8593011bf6 Mon Sep 17 00:00:00 2001 From: Luigi Date: Fri, 12 Apr 2024 12:14:10 +0200 Subject: [PATCH 2/3] feat: use identifier for the feed, change llm, update swagger --- api/api.gen.go | 81 +++++++++++++++---------- api/swagger.yaml | 72 +++++++++++++++++----- internal/app/feed.go | 6 +- internal/repository/typesense/feed.go | 54 ++++++++++++++--- internal/repository/typesense/schema.go | 2 +- internal/webserver/server.go | 77 +++++++++++++++++++++-- 6 files changed, 230 insertions(+), 62 deletions(-) diff --git a/api/api.gen.go b/api/api.gen.go index 61ab4e3..a5dedbe 100644 --- a/api/api.gen.go +++ b/api/api.gen.go @@ -25,6 +25,12 @@ type Error struct { Message string `json:"message"` } +// FeedDetails defines model for FeedDetails. +type FeedDetails struct { + Similarities *[]FeedItem `json:"similarities,omitempty"` + Source *FeedItem `json:"source,omitempty"` +} + // FeedImage defines model for FeedImage. type FeedImage struct { Title string `json:"title"` @@ -33,7 +39,9 @@ type FeedImage struct { // FeedItem defines model for FeedItem. type FeedItem struct { + *string `json:",omitempty"` Date time.Time `json:"date"` + Id string `json:"id"` Image *FeedImage `json:"image,omitempty"` Language string `json:"language"` Link string `json:"link"` @@ -69,11 +77,6 @@ type SourceReleaseVersion struct { Version string `json:"version"` } -// GetV1SearchFeedQueryParams defines parameters for GetV1SearchFeedQuery. -type GetV1SearchFeedQueryParams struct { - BySimilarity *bool `json:"bySimilarity,omitempty"` -} - // GetV1VersionJSONBody defines parameters for GetV1Version. type GetV1VersionJSONBody map[string]interface{} @@ -85,9 +88,12 @@ type ServerInterface interface { // (GET /v1/search/context/{query}) GetV1SearchContextQuery(ctx echo.Context, query string) error + // Your GET endpoint + // (GET /v1/search/feed/similarities/{feedID}) + GetV1SearchFeedSimilarities(ctx echo.Context, feedID string) error // (GET /v1/search/feed/{query}) - GetV1SearchFeedQuery(ctx echo.Context, query string, params GetV1SearchFeedQueryParams) error + GetV1SearchFeedQuery(ctx echo.Context, query string) error // Your GET endpoint // (GET /v1/version) GetV1Version(ctx echo.Context) error @@ -114,6 +120,22 @@ func (w *ServerInterfaceWrapper) GetV1SearchContextQuery(ctx echo.Context) error return err } +// GetV1SearchFeedSimilarities converts echo context to params. +func (w *ServerInterfaceWrapper) GetV1SearchFeedSimilarities(ctx echo.Context) error { + var err error + // ------------- Path parameter "feedID" ------------- + var feedID string + + err = runtime.BindStyledParameterWithLocation("simple", false, "feedID", runtime.ParamLocationPath, ctx.Param("feedID"), &feedID) + if err != nil { + return echo.NewHTTPError(http.StatusBadRequest, fmt.Sprintf("Invalid format for parameter feedID: %s", err)) + } + + // Invoke the callback with all the unmarshalled arguments + err = w.Handler.GetV1SearchFeedSimilarities(ctx, feedID) + return err +} + // GetV1SearchFeedQuery converts echo context to params. func (w *ServerInterfaceWrapper) GetV1SearchFeedQuery(ctx echo.Context) error { var err error @@ -125,17 +147,8 @@ func (w *ServerInterfaceWrapper) GetV1SearchFeedQuery(ctx echo.Context) error { return echo.NewHTTPError(http.StatusBadRequest, fmt.Sprintf("Invalid format for parameter query: %s", err)) } - // Parameter object where we will unmarshal all parameters from the context - var params GetV1SearchFeedQueryParams - // ------------- Optional query parameter "bySimilarity" ------------- - - err = runtime.BindQueryParameter("form", true, false, "bySimilarity", ctx.QueryParams(), ¶ms.BySimilarity) - if err != nil { - return echo.NewHTTPError(http.StatusBadRequest, fmt.Sprintf("Invalid format for parameter bySimilarity: %s", err)) - } - // Invoke the callback with all the unmarshalled arguments - err = w.Handler.GetV1SearchFeedQuery(ctx, query, params) + err = w.Handler.GetV1SearchFeedQuery(ctx, query) return err } @@ -177,6 +190,7 @@ func RegisterHandlersWithBaseURL(router EchoRouter, si ServerInterface, baseURL } router.GET(baseURL+"/v1/search/context/:query", wrapper.GetV1SearchContextQuery) + router.GET(baseURL+"/v1/search/feed/similarities/:feedID", wrapper.GetV1SearchFeedSimilarities) router.GET(baseURL+"/v1/search/feed/:query", wrapper.GetV1SearchFeedQuery) router.GET(baseURL+"/v1/version", wrapper.GetV1Version) @@ -185,22 +199,25 @@ func RegisterHandlersWithBaseURL(router EchoRouter, si ServerInterface, baseURL // Base64 encoded, gzipped, json marshaled Swagger object var swaggerSpec = []string{ - "H4sIAAAAAAAC/9SXzW7jNhDHXyVge9RasqPEsW7ddrsIeija7S5QLHKgpJHEiF8mR3KcQO9eUJItOZaz", - "ziJ76CmGOJz58zcfZJ5IooRWEiRaEj0RmxQgaPvzgzHKuB/aKA0GGbSfE5WC+4tbDSQiTCLkYEjjEQHW", - "0ny8aNEwmZOm8YiBdcUMpCT6ujf0Omd33s5exfeQIPHIwzuLSnOWF+jcsZRERGSZvTdlfDNXeNX6/B0g", - "vRV9yEOVyJBPKfFIZfi3FTojr3dypjzK461MCobJ+rEa5CGIY3UpxVZcpoygSKL2wztkwjE5Usx2R/zZ", - "QEYi8pM/5MzvE+YPLBqPcCrzajoVHuFMlpMLVlUmmd5jKyGo2U6unUL9DGlnNrjqhezDjlR7HaAzwZcb", - "2GQiCx/y9SW0YT8BNUnxq5IID/gbIGXcHifhOyG9RKKoRCwp42/BaUdnj2vwPkJ1t/c7ferzEGZFZjiF", - "uFxRW/YITQ3mfcV4+gWMZUpOTQIhGB6faDpGeIP8cllXmypc6jZGPTg+x4HYrB9MfoVBXVskzXNuvZrB", - "7QGao9Oc2dTXeVjbe6uu1eNmBOYkk9jF+FavTsg5aL8X97ZWfwMHamG/+xmLfUt1co5IvA5CKEKZJkwo", - "MEncQZgSMdFgFj9r18rpL2eXiXrM+TZYas4fN/H3lEkeLkotsqsVrGx8XCb1/uiH8saMpk53Hqq1kWyV", - "UFiurbl2wd34lpkaXUnks0yUTBkyJSkf1WtE5rPAFYLSIKlmJCKXs2C2IB7RFIsWqV/Pfdv2uZ90je4/", - "rSsw28at5tAqcTmgzv2tk/QR8Mv8YDb85Ta0Xg0VgGAsib4+EeYkuEjEI5IKJ3XdWw4A0VTg9a+EqVl2", - "54ytVtJ2NbAIgm5USATZqqNac5a0+vx72+V18Pdy20wMuBZxCjYxTGOH8c8/HMWrN4zcPYUmQt1KBCMp", - "v+j66mJnOF0dy+RyFdbXpbnCLOvcjRKaAaSvyaa78V9K5S55fS7j7ScmGKeGoft6lMJYKQ5UvkEOGYKw", - "Z71X3OOo2XcWNYZu/1cJhesKZFEuxGJedk/SH9JTfZ2MBuHp2hjmlQsAFt+rdPsqbodzrukn6A/r6fF9", - "dCL342cX+VdV5uLjh38uQKZaMXlqFKsqAR3WqypeLx77Y9g2Wpea9v8AUiBqG/k+1WxWjQfzLKVbv56T", - "xhubRr7PVUJ5oSxGN8FNMFqfL5azYBbM5v3C3Yn7KSjFKriXsQpZQJrmvwAAAP//A98fT4INAAA=", + "H4sIAAAAAAAC/9RX33ObRhD+VzzXPhKBJCRZvDVNmvH0oZOkyUwn44cDFnHS/YC7Axl59L937kCAIhRj", + "u3nokzG32v32+3b3lkcUCZYJDlwrFDwiFaXAsH18L6WQ5iGTIgOpCdjXkYjB/NVVBihAhGvYgERHBzFQ", + "Cm/6h0pLwjfoeHSQhLwgEmIUfGsNndrZvXOyF+EWIo0c9PBGaZFRskm1cUdiFCCWJGord+HtVOiF9fkH", + "QPwONCZUXeJUhBGKJTn9TzQw+/CrhAQF6Be3y9xt0naNxzsNzKTTYMJS4uoKJE/NSLxmaX6oyrWFpEQh", + "IxgfxsQhmppA/WzGMbKNE1zu/HyZx0vRMnLHGhHO+WjCXGjjoELSpzUzRiesIwXDNKx4lBId5Yeig2fy", + "vkB3CWDY57I4pFTTRTLFVQ0yxtqmlQjJsEaBffFGEwYdi12uxsm4SMSf4pWcL6b7glQ2EjkR+6S01vDo", + "IIr5phhuCQdRwneDB10NXR4VjGFZDZ5dE/g7IUnc6tj5a9C0sXvQG4ZHar7bwz5hif+wyedgY38GLKP0", + "d8E1POir3fpCpn5ER1qwkGNCX0PWiacTOy1dnfceVfddMw9mPY7CJE0kxRDu1ljtGgplCfJtQWj8FaQi", + "gg+NZcaIHlvc/q2m81VZ7At/ldkYZed4jAO2zx/kZqG9slQaHb/nrUHTuT2j5iKbkfNkufFLtVViKQ77", + "HjFXOQlNjKcadgDO6Dn+2Vp9AgpYQfvr77hoW6qGc8HE80jwmc/jiDABMgprEoZADDSY0l8y08rxb6PL", + "RBw2tPJWGaWHffiSMtn4s13GksUa1iq8LJOyTf0cXp+joezGUZVLTtYRhlWu5NIENzOcJ6J3G6IvPBI8", + "JpoIjmmvXgM0nXimEEQGHGcEBWg+8SYz5KAM69RS6pZTV9k+d6O60d3HvABZHc3pBiwSowE27u8MpA+g", + "v07PZsNH8wPrVWIGGqRCwbdHRAwEEwk5iGNmoOaNZUeglgU4zco2NMvujbHKBFd1Dcw8rx4VXAO36HCW", + "URJZfO5W1bp2/n7cNgMDzlIcg4okyXRN419/GhYX/2Hkei8dCHXHNUiO6U3dVzcnw+HqWEXztV8ud3Kh", + "k6R21xM0AYjd/grpPppXd+9GaWuWgM/9/fMnCtFfG6/y79cBz4/e4vjmE+QFmAneajSW0/buRf+IQt58", + "eP/3DfA4E4Rf68eouA3hgZXzfF8urY+ni74m/XlVP6DkM/rS8PmxbbVXiPa6D47/UyvBsgCe7mZsNt0t", + "Rgr7gmnW6Nq7gq5r2d0Usq7wtyKunsXb+Q1zbO6unzZN+5vAFe1f1HSiiCDzy3UR5rNDk4ay0Wpp7Mcf", + "SrXOVOC6OCOTon8lTmJcueUUHZ2+aeC6VESYpkLp4Na79Xrn09lq4k28ybQ5uL+yGXg7tva2PBQ+8dDx", + "+G8AAAD//4T3I/yJEAAA", } // GetSwagger returns the content of the embedded swagger specification file diff --git a/api/swagger.yaml b/api/swagger.yaml index aeb301f..73d57b0 100644 --- a/api/swagger.yaml +++ b/api/swagger.yaml @@ -29,11 +29,7 @@ paths: $ref: '#/components/schemas/Error' x-stoplight: id: e6uenhk2m21k5 - parameters: - - schema: - type: boolean - in: query - name: bySimilarity + parameters: [] parameters: - schema: type: string @@ -86,11 +82,50 @@ paths: schema: type: object properties: {} + '/v1/search/feed/similarities/{feedID}': + get: + summary: Your GET endpoint + tags: [] + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/FeedDetails' + '400': + description: Bad Request + '500': + description: Internal Server Error + operationId: get-v1-search-feed-similarities + x-stoplight: + id: cu8bexmv3qwv6 + parameters: [] + parameters: + - schema: + type: string + name: feedID + in: path + required: true components: schemas: FeedItem: type: object + x-stoplight: + id: kwewfmf4xgq3e + required: + - id + - title + - summary + - link + - source + - language + - date properties: + id: + type: string + x-stoplight: + id: i41a7r351wuiy title: type: string summary: @@ -107,15 +142,10 @@ components: date: type: string format: date-time - required: - - title - - summary - - link - - source - - language - - date - x-stoplight: - id: kwewfmf4xgq3e + '': + type: string + x-stoplight: + id: 6uzhltl5f1ayg FeedImage: type: object properties: @@ -209,3 +239,17 @@ components: required: - commit - version + FeedDetails: + title: FeedDetails + x-stoplight: + id: jdfavk4q6qd6o + type: object + properties: + source: + $ref: '#/components/schemas/FeedItem' + similarities: + type: array + x-stoplight: + id: 0s2id9mhqzyv9 + items: + $ref: '#/components/schemas/FeedItem' diff --git a/internal/app/feed.go b/internal/app/feed.go index 3a32757..4033fec 100644 --- a/internal/app/feed.go +++ b/internal/app/feed.go @@ -6,9 +6,10 @@ import ( type FeedRepository interface { // Search returns the results of a search query. - Find(query string) ([]Feed, error) + FindByKeyword(query string) ([]Feed, error) // Search returns the results of a search query by similarity. - FindBySimilarity(query string) ([]Feed, error) + FindBySimilarity(doc Feed) ([]Feed, error) + FindByID(id string) (Feed, error) // Index indexes a document. Save(doc Feed) error // Update a document in index. @@ -20,6 +21,7 @@ type FeedRepository interface { } type Feed struct { + ID string `json:"id"` Title string `json:"title"` Link string `json:"link"` Language string `json:"language"` diff --git a/internal/repository/typesense/feed.go b/internal/repository/typesense/feed.go index fdec117..3ebbd21 100644 --- a/internal/repository/typesense/feed.go +++ b/internal/repository/typesense/feed.go @@ -2,6 +2,9 @@ package typesense import ( "context" + "crypto/sha256" + "encoding/hex" + "fmt" "time" "github.com/typesense/typesense-go/typesense" @@ -21,7 +24,7 @@ func NewFeedRepository(client *typesense.Client) *FeedRepository { } } -func (f *FeedRepository) Find(query string) ([]app.Feed, error) { +func (f *FeedRepository) FindByKeyword(query string) ([]app.Feed, error) { searchParameters := &api.SearchCollectionParams{ Q: query, QueryBy: "title, summary", @@ -41,6 +44,7 @@ func (f *FeedRepository) Find(query string) ([]app.Feed, error) { } f := app.Feed{ + ID: doc["id"].(string), Title: doc["title"].(string), Link: doc["link"].(string), Source: doc["source"].(string), @@ -55,11 +59,38 @@ func (f *FeedRepository) Find(query string) ([]app.Feed, error) { return feeds, nil } +func (f *FeedRepository) FindByID(id string) (app.Feed, error) { + d, err := f.client.Collection("feeds").Document(id).Retrieve(f.ctx) + if err != nil { + return app.Feed{}, err + } + + date, err := time.Parse(time.RFC3339, d["date"].(string)) + if err != nil { + return app.Feed{}, err + } + + fStruct := app.Feed{ + ID: d["id"].(string), + Title: d["title"].(string), + Link: d["link"].(string), + Source: d["source"].(string), + Language: d["language"].(string), + Summary: d["summary"].(string), + Date: date, + } -func (f *FeedRepository) FindBySimilarity(query string) ([]app.Feed, error){ + return fStruct, nil +} + +func (f *FeedRepository) FindBySimilarity(feed app.Feed) ([]app.Feed, error) { + vQ := fmt.Sprintf("title_summary_embedding:([], id: %s, distance_threshold:0.183)", feed.ID) + eF := "title_summary_embedding" searchParameters := &api.SearchCollectionParams{ - Q: query, - QueryBy: "title_summary_embedding", + Q: feed.Title, + QueryBy: "title,title_summary_embedding", + VectorQuery: &vQ, + ExcludeFields: &eF, } searchResult, err := f.client.Collection("feeds").Documents().Search(f.ctx, searchParameters) if err != nil { @@ -90,10 +121,9 @@ func (f *FeedRepository) FindBySimilarity(query string) ([]app.Feed, error){ return feeds, nil } - func (f *FeedRepository) Save(doc app.Feed) error { docMap := map[string]interface{}{ - "id": doc.Link, + "id": generateUniqueID(doc.Link), "title": doc.Title, "link": doc.Link, "source": doc.Source, @@ -117,7 +147,7 @@ func (f *FeedRepository) Update(docs ...app.Feed) error { for i, doc := range docs { // Convert app.Feed to map[string]interface{} for updating docMap := map[string]interface{}{ - "id": doc.Link, + "id": generateUniqueID(doc.Link), "title": doc.Title, "link": doc.Link, "source": doc.Source, @@ -160,3 +190,13 @@ func (f *FeedRepository) Delete(doc app.Feed) error { return nil } + +func generateUniqueID(link string) string { + hash := sha256.New() + hash.Write([]byte(link)) + hashBytes := hash.Sum(nil) + + uniqueID := hex.EncodeToString(hashBytes) + + return uniqueID +} diff --git a/internal/repository/typesense/schema.go b/internal/repository/typesense/schema.go index 2172b5e..946481e 100644 --- a/internal/repository/typesense/schema.go +++ b/internal/repository/typesense/schema.go @@ -51,7 +51,7 @@ func GetFeedSchema(client *typesense.Client) *api.CollectionSchema { ModelName string "json:\"model_name\"" ProjectId *string "json:\"project_id,omitempty\"" }{ - ModelName: "ts/all-MiniLM-L12-v2", + ModelName: "ts/multilingual-e5-large", }, }, }, diff --git a/internal/webserver/server.go b/internal/webserver/server.go index c58362c..4750a3e 100644 --- a/internal/webserver/server.go +++ b/internal/webserver/server.go @@ -67,15 +67,11 @@ func (s *Server) Start() error { } // (GET /v1/search/feed/{query}) -func (s *Server) GetV1SearchFeedQuery(ctx echo.Context, query string, params api.GetV1SearchFeedQueryParams) error { +func (s *Server) GetV1SearchFeedQuery(ctx echo.Context, query string) error { var feeds []app.Feed var err error - if params.BySimilarity != nil && *params.BySimilarity { - feeds, err = s.feedRepo.FindBySimilarity(query) - } else { - feeds, err = s.feedRepo.Find(query) - } + feeds, err = s.feedRepo.FindByKeyword(query) if err != nil { e := api.Error{ Code: http.StatusInternalServerError, @@ -87,9 +83,12 @@ func (s *Server) GetV1SearchFeedQuery(ctx echo.Context, query string, params api return ctx.JSON(http.StatusInternalServerError, e) } + fmt.Println(feeds) + fi := make([]api.FeedItem, len(feeds)) for i, f := range feeds { fi[i] = api.FeedItem{ + Id: f.ID, Source: f.Source, Date: f.Date, Language: f.Language, @@ -109,6 +108,72 @@ func (s *Server) GetV1SearchFeedQuery(ctx echo.Context, query string, params api return ctx.JSON(http.StatusOK, fi) } +func (s *Server) GetV1SearchFeedSimilarities(ctx echo.Context, feedID string) error { + var feeds []app.Feed + var err error + + f, err := s.feedRepo.FindByID(feedID) + if err != nil { + e := api.Error{ + Code: http.StatusInternalServerError, + Message: "Internal Server Error", + } + + s.logger.Error("feed search", zap.Error(err)) + + return ctx.JSON(http.StatusInternalServerError, e) + } + + feeds, err = s.feedRepo.FindBySimilarity(f) + if err != nil { + e := api.Error{ + Code: http.StatusInternalServerError, + Message: "Internal Server Error", + } + + s.logger.Error("feed search", zap.Error(err)) + + return ctx.JSON(http.StatusInternalServerError, e) + } + + fi := make([]api.FeedItem, len(feeds)) + for i, f := range feeds { + fi[i] = api.FeedItem{ + Id: f.ID, + Source: f.Source, + Date: f.Date, + Language: f.Language, + Link: f.Link, + Summary: f.Summary, + Title: f.Title, + } + + if f.Image != nil { + fi[i].Image = &api.FeedImage{ + Title: f.Image.Title, + Url: f.Image.URL, + } + } + } + + fItem := api.FeedItem{ + Date: f.Date, + Id: f.ID, + Language: f.Language, + Link: f.Link, + Source: f.Source, + Summary: f.Summary, + Title: f.Title, + } + + fd := api.FeedDetails{ + Similarities: &fi, + Source: &fItem, + } + + return ctx.JSON(http.StatusOK, fd) +} + func (s *Server) GetV1Version(ctx echo.Context) error { v := api.ServerVersion{ Build: api.ServerBuildVersion{ From b388a876dbb904dfdb9cb69279de4d69f2d11857 Mon Sep 17 00:00:00 2001 From: Luigi Date: Sat, 16 Aug 2025 22:58:21 +0200 Subject: [PATCH 3/3] chore: enforce find by similarity logic and typesense collection --- internal/app/feed.go | 5 +- internal/repository/typesense/feed.go | 70 ++++++++++++++++++------- internal/repository/typesense/schema.go | 6 ++- internal/webserver/server.go | 32 +++++------ internal/x/typesense/schema.go | 6 ++- 5 files changed, 80 insertions(+), 39 deletions(-) diff --git a/internal/app/feed.go b/internal/app/feed.go index 4033fec..400acb4 100644 --- a/internal/app/feed.go +++ b/internal/app/feed.go @@ -8,7 +8,8 @@ type FeedRepository interface { // Search returns the results of a search query. FindByKeyword(query string) ([]Feed, error) // Search returns the results of a search query by similarity. - FindBySimilarity(doc Feed) ([]Feed, error) + FindBySimilarity(feedID string) ([]Feed, error) + // FindByID returns a document by its ID. FindByID(id string) (Feed, error) // Index indexes a document. Save(doc Feed) error @@ -21,7 +22,7 @@ type FeedRepository interface { } type Feed struct { - ID string `json:"id"` + FeedID string `json:"feed_id"` Title string `json:"title"` Link string `json:"link"` Language string `json:"language"` diff --git a/internal/repository/typesense/feed.go b/internal/repository/typesense/feed.go index 3ebbd21..4dc2c8e 100644 --- a/internal/repository/typesense/feed.go +++ b/internal/repository/typesense/feed.go @@ -44,7 +44,7 @@ func (f *FeedRepository) FindByKeyword(query string) ([]app.Feed, error) { } f := app.Feed{ - ID: doc["id"].(string), + FeedID: doc["feedID"].(string), Title: doc["title"].(string), Link: doc["link"].(string), Source: doc["source"].(string), @@ -60,45 +60,78 @@ func (f *FeedRepository) FindByKeyword(query string) ([]app.Feed, error) { } func (f *FeedRepository) FindByID(id string) (app.Feed, error) { - d, err := f.client.Collection("feeds").Document(id).Retrieve(f.ctx) + searchParameters := &api.SearchCollectionParams{ + Q: id, + QueryBy: "feedID", + } + searchResult, err := f.client.Collection("feeds").Documents().Search(f.ctx, searchParameters) if err != nil { return app.Feed{}, err } - date, err := time.Parse(time.RFC3339, d["date"].(string)) + if searchResult.Hits == nil || len(*searchResult.Hits) == 0 { + return app.Feed{}, fmt.Errorf("feed with id %s not found", id) + } + + doc := *(*searchResult.Hits)[0].Document + + date, err := time.Parse(time.RFC3339, doc["date"].(string)) if err != nil { return app.Feed{}, err } fStruct := app.Feed{ - ID: d["id"].(string), - Title: d["title"].(string), - Link: d["link"].(string), - Source: d["source"].(string), - Language: d["language"].(string), - Summary: d["summary"].(string), + FeedID: doc["feedID"].(string), + Title: doc["title"].(string), + Link: doc["link"].(string), + Source: doc["source"].(string), + Language: doc["language"].(string), + Summary: doc["summary"].(string), Date: date, } return fStruct, nil } -func (f *FeedRepository) FindBySimilarity(feed app.Feed) ([]app.Feed, error) { - vQ := fmt.Sprintf("title_summary_embedding:([], id: %s, distance_threshold:0.183)", feed.ID) - eF := "title_summary_embedding" +func (f *FeedRepository) FindBySimilarity(feedID string) ([]app.Feed, error) { + feed, err := f.FindByID(feedID) + if err != nil { + return nil, fmt.Errorf("failed to find feed by ID: %w", err) + } + searchParameters := &api.SearchCollectionParams{ - Q: feed.Title, - QueryBy: "title,title_summary_embedding", - VectorQuery: &vQ, - ExcludeFields: &eF, + Q: feed.Title + " " + feed.Summary, + QueryBy: "title_summary_embedding", } searchResult, err := f.client.Collection("feeds").Documents().Search(f.ctx, searchParameters) if err != nil { return nil, err } + maxVectorDistance := float32(0.16576248) // Define a threshold for vector distance + hits := *searchResult.Hits + n := 0 + for _, hit := range hits { + if hit.VectorDistance == nil || *hit.VectorDistance <= maxVectorDistance { + hits[n] = hit + n++ + } + } + hits = hits[:n] + searchResult.Hits = &hits + feeds := make([]app.Feed, len(*searchResult.Hits)) for i, x := range *searchResult.Hits { + // If VectorDistance is present, filter by threshold + if x.Document != nil && x.VectorDistance != nil { + doc := *x.Document + title, _ := doc["title"].(string) + fmt.Printf("Title: %s, VectorDistance: %v\n", title, *x.VectorDistance) + } + if x.VectorDistance != nil && *x.VectorDistance > maxVectorDistance { + continue + } + doc := *x.Document date, err := time.Parse(time.RFC3339, doc["date"].(string)) @@ -107,6 +140,7 @@ func (f *FeedRepository) FindBySimilarity(feed app.Feed) ([]app.Feed, error) { } f := app.Feed{ + FeedID: doc["feedID"].(string), Title: doc["title"].(string), Link: doc["link"].(string), Source: doc["source"].(string), @@ -123,7 +157,7 @@ func (f *FeedRepository) FindBySimilarity(feed app.Feed) ([]app.Feed, error) { func (f *FeedRepository) Save(doc app.Feed) error { docMap := map[string]interface{}{ - "id": generateUniqueID(doc.Link), + "feedID": generateUniqueID(doc.Link), "title": doc.Title, "link": doc.Link, "source": doc.Source, @@ -147,7 +181,7 @@ func (f *FeedRepository) Update(docs ...app.Feed) error { for i, doc := range docs { // Convert app.Feed to map[string]interface{} for updating docMap := map[string]interface{}{ - "id": generateUniqueID(doc.Link), + "feedID": generateUniqueID(doc.Link), "title": doc.Title, "link": doc.Link, "source": doc.Source, diff --git a/internal/repository/typesense/schema.go b/internal/repository/typesense/schema.go index 946481e..e7bdd97 100644 --- a/internal/repository/typesense/schema.go +++ b/internal/repository/typesense/schema.go @@ -8,6 +8,10 @@ import ( func GetFeedSchema(client *typesense.Client) *api.CollectionSchema { schema := &api.CollectionSchema{ Fields: []api.Field{ + { + Name: "feedID", + Type: "string", + }, { Name: "title", Type: "string", @@ -51,7 +55,7 @@ func GetFeedSchema(client *typesense.Client) *api.CollectionSchema { ModelName string "json:\"model_name\"" ProjectId *string "json:\"project_id,omitempty\"" }{ - ModelName: "ts/multilingual-e5-large", + ModelName: "ts/e5-small-v2", }, }, }, diff --git a/internal/webserver/server.go b/internal/webserver/server.go index 4750a3e..4f3cf73 100644 --- a/internal/webserver/server.go +++ b/internal/webserver/server.go @@ -88,7 +88,7 @@ func (s *Server) GetV1SearchFeedQuery(ctx echo.Context, query string) error { fi := make([]api.FeedItem, len(feeds)) for i, f := range feeds { fi[i] = api.FeedItem{ - Id: f.ID, + Id: f.FeedID, Source: f.Source, Date: f.Date, Language: f.Language, @@ -112,19 +112,19 @@ func (s *Server) GetV1SearchFeedSimilarities(ctx echo.Context, feedID string) er var feeds []app.Feed var err error - f, err := s.feedRepo.FindByID(feedID) - if err != nil { - e := api.Error{ - Code: http.StatusInternalServerError, - Message: "Internal Server Error", - } + // f, err := s.feedRepo.FindByID(feedID) + // if err != nil { + // e := api.Error{ + // Code: http.StatusInternalServerError, + // Message: "Internal Server Error", + // } - s.logger.Error("feed search", zap.Error(err)) + // s.logger.Error("feed search", zap.Error(err)) - return ctx.JSON(http.StatusInternalServerError, e) - } + // return ctx.JSON(http.StatusInternalServerError, e) + // } - feeds, err = s.feedRepo.FindBySimilarity(f) + feeds, err = s.feedRepo.FindBySimilarity(feedID) if err != nil { e := api.Error{ Code: http.StatusInternalServerError, @@ -139,7 +139,7 @@ func (s *Server) GetV1SearchFeedSimilarities(ctx echo.Context, feedID string) er fi := make([]api.FeedItem, len(feeds)) for i, f := range feeds { fi[i] = api.FeedItem{ - Id: f.ID, + Id: f.FeedID, Source: f.Source, Date: f.Date, Language: f.Language, @@ -156,19 +156,19 @@ func (s *Server) GetV1SearchFeedSimilarities(ctx echo.Context, feedID string) er } } - fItem := api.FeedItem{ +/* fItem := api.FeedItem{ Date: f.Date, - Id: f.ID, + Id: f.FeedID, Language: f.Language, Link: f.Link, Source: f.Source, Summary: f.Summary, Title: f.Title, } - + */ fd := api.FeedDetails{ Similarities: &fi, - Source: &fItem, + //Source: &fItem, } return ctx.JSON(http.StatusOK, fd) diff --git a/internal/x/typesense/schema.go b/internal/x/typesense/schema.go index 3454e90..332dc55 100644 --- a/internal/x/typesense/schema.go +++ b/internal/x/typesense/schema.go @@ -31,8 +31,10 @@ func updateCollection(client *typesense.Client, schema *api.CollectionSchema) er if _, err := client.Collection(schema.Name).Update(context.Background(), u); err != nil { if strings.Contains(err.Error(), "is already part of the schema") { - // TODO: capture the log of error - return nil + // This error indicates that the field is already part of the schema, + // So we need to delete the collection and recreate it. + _, err := client.Collection(schema.Name).Delete(context.Background()) + return err } return err