Skip to content

Commit 8b86929

Browse files
Merge pull request #27 from geraldohomero/transcript
Enhance transcript retrieval with retry logic and increase thread pool size for processing
2 parents ea4b2e3 + 141b5bd commit 8b86929

File tree

2 files changed

+30
-4
lines changed

2 files changed

+30
-4
lines changed

data/transcriptions/transcript.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ def main():
217217
processed_count = 0
218218

219219
# Use a thread pool with max 5 workers
220-
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
220+
with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
221221
# Submit all tasks to the executor
222222
future_to_video = {
223223
executor.submit(process_video_transcript, video_id): video_id

toDatabase.py

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -128,8 +128,18 @@ def get_video_details(video_id: str, channel_id: str) -> Optional[Dict[str, Any]
128128
# Determine if comments are enabled (the API returns 'commentCount' only if enabled)
129129
comments_enabled = 'commentCount' in video['statistics']
130130

131-
# Try to get the transcript
132-
success, transcript_text, transcript_lang = get_transcript(video_id)
131+
# Try to get the transcript with one retry on failure
132+
try:
133+
success, transcript_text, transcript_lang = get_transcript(video_id)
134+
except Exception as e:
135+
logging.warning("First transcript attempt failed for video %s: %s. Trying again...", video_id, e)
136+
# Wait briefly before retry
137+
time.sleep(1)
138+
try:
139+
success, transcript_text, transcript_lang = get_transcript(video_id)
140+
except Exception as e:
141+
logging.error("Second transcript attempt also failed for video %s: %s", video_id, e)
142+
success, transcript_text, transcript_lang = False, None, None
133143

134144
return {
135145
'videoId': video_id,
@@ -422,7 +432,23 @@ def main():
422432
)
423433
conn.commit()
424434
else:
425-
logging.info("Couldn't download transcript for existing video %s: %s", video_id, transcript_text)
435+
logging.info("Couldn't download transcript for existing video %s: %s. Retrying once...", video_id, transcript_text)
436+
# Wait briefly before retry
437+
time.sleep(1)
438+
try:
439+
success, transcript_text, transcript_lang = get_transcript(video_id)
440+
except Exception as e:
441+
logging.error("Second transcript attempt also failed for existing video %s: %s", video_id, e)
442+
success = False
443+
if success:
444+
logging.info("Downloaded transcript for existing video %s on retry", video_id)
445+
cursor.execute(
446+
"UPDATE Videos SET videoTranscript = ?, transcriptLanguage = ? WHERE videoId = ?",
447+
(transcript_text, transcript_lang, video_id)
448+
)
449+
conn.commit()
450+
else:
451+
logging.info("Couldn't download transcript for existing video %s after retry: %s", video_id, transcript_text)
426452
continue
427453

428454
# Check if comments are enabled for the video.

0 commit comments

Comments
 (0)