Skip to content

Commit bf3e92b

Browse files
kyteinskymarcelklehr
authored andcommitted
fix: expand the source id regex item part
":" is required for the mail integration to work but the regex has been expanded to allow alphanumeric text and hyphen too. Signed-off-by: Anupam Kumar <[email protected]>
1 parent 592f5dc commit bf3e92b

File tree

2 files changed

+12
-1
lines changed

2 files changed

+12
-1
lines changed

context_chat_backend/chain/ingest/injest.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,16 @@ def embed_sources(
186186
f'{source.filename} ({_decode_latin_1(source.headers["title"])})'
187187
for source in sources_filtered
188188
],
189+
'invalid_source_ids': [
190+
source.filename for source in sources
191+
if not is_valid_source_id(source.filename) # pyright: ignore[reportArgumentType]
192+
],
193+
'not_allowed_file_ids': [
194+
source.filename for source in sources
195+
if not _allowed_file(source)
196+
],
197+
'len(source_ids)': len(sources_filtered),
198+
'len(total_source_ids)': len(sources),
189199
})
190200

191201
vectordb = vectordb_loader.load()

context_chat_backend/utils.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,8 @@ def exec_in_proc(group=None, target=None, name=None, args=(), kwargs={}, *, daem
100100

101101

102102
def is_valid_source_id(source_id: str) -> bool:
103-
return re.match(r'^[a-zA-Z0-9_-]+__[a-zA-Z0-9_-]+: \d+$', source_id) is not None
103+
# note the ":" in the item id part
104+
return re.match(r'^[a-zA-Z0-9_-]+__[a-zA-Z0-9_-]+: [a-zA-Z0-9:-]+$', source_id) is not None
104105

105106

106107
def is_valid_provider_id(provider_id: str) -> bool:

0 commit comments

Comments
 (0)