From be99a43822ba6d32ac2b9da132eeccf99bd84979 Mon Sep 17 00:00:00 2001 From: Tamas Szelezsan Date: Thu, 21 May 2026 11:59:36 +0200 Subject: [PATCH 1/2] Add timing fields to transcript entries and SYNCED_TRANSCRIPTS feature flag --- .../pocketcasts/models/to/TranscriptEntry.kt | 2 + .../transcript/TranscripSanitization.kt | 38 +++++++++++++------ .../transcript/TranscriptParser.kt | 20 ++++++---- .../repositories/transcript/SrtParserTest.kt | 6 +-- .../transcript/WebVttParserTest.kt | 10 ++--- .../pocketcasts/utils/featureflag/Feature.kt | 8 ++++ 6 files changed, 57 insertions(+), 27 deletions(-) diff --git a/modules/services/model/src/main/java/au/com/shiftyjelly/pocketcasts/models/to/TranscriptEntry.kt b/modules/services/model/src/main/java/au/com/shiftyjelly/pocketcasts/models/to/TranscriptEntry.kt index 236f58ea67e..3ca18c09eb1 100644 --- a/modules/services/model/src/main/java/au/com/shiftyjelly/pocketcasts/models/to/TranscriptEntry.kt +++ b/modules/services/model/src/main/java/au/com/shiftyjelly/pocketcasts/models/to/TranscriptEntry.kt @@ -3,6 +3,8 @@ package au.com.shiftyjelly.pocketcasts.models.to sealed interface TranscriptEntry { data class Text( val value: String, + val startTimeMs: Long = -1L, + val endTimeMs: Long = -1L, ) : TranscriptEntry data class Speaker( diff --git a/modules/services/repositories/src/main/java/au/com/shiftyjelly/pocketcasts/repositories/transcript/TranscripSanitization.kt b/modules/services/repositories/src/main/java/au/com/shiftyjelly/pocketcasts/repositories/transcript/TranscripSanitization.kt index 3e845142fe1..1712ae32e5a 100644 --- a/modules/services/repositories/src/main/java/au/com/shiftyjelly/pocketcasts/repositories/transcript/TranscripSanitization.kt +++ b/modules/services/repositories/src/main/java/au/com/shiftyjelly/pocketcasts/repositories/transcript/TranscripSanitization.kt @@ -30,33 +30,45 @@ private val ThreeOrMoreNewLines = """\n{3,}""".toRegex() private fun List.joinSplitSentences(): List { val phraseAccumulator = StringBuilder() val entries = mutableListOf() + var accumulatedStartTimeMs = -1L + var accumulatedEndTimeMs = -1L - fun appendToAccumulator(text: String) { + fun appendToAccumulator(text: String, startTimeMs: Long, endTimeMs: Long) { phraseAccumulator.append(' ').append(text.trimStart()) + if (accumulatedStartTimeMs == -1L || (startTimeMs in 0.. accumulatedEndTimeMs) { + accumulatedEndTimeMs = endTimeMs + } } - fun buildFullSentence(text: String): TranscriptEntry { - appendToAccumulator(text) + fun buildFullSentence(text: String, startTimeMs: Long, endTimeMs: Long): TranscriptEntry { + appendToAccumulator(text, startTimeMs, endTimeMs) val sentences = phraseAccumulator.toString() phraseAccumulator.clear() - return TranscriptEntry.Text(sentences) + val resultStartTimeMs = accumulatedStartTimeMs + val resultEndTimeMs = accumulatedEndTimeMs + accumulatedStartTimeMs = -1L + accumulatedEndTimeMs = -1L + return TranscriptEntry.Text(sentences, startTimeMs = resultStartTimeMs, endTimeMs = resultEndTimeMs) } - fun buildMidSentence(text: String): TranscriptEntry? { + fun buildMidSentence(text: String, startTimeMs: Long, endTimeMs: Long): TranscriptEntry? { val midSentence = text.findMidSentence() return if (midSentence != null) { val (index, punctuation) = midSentence val midSentenceText = text.substring(0, index + punctuation.length) - val sentence = buildFullSentence(midSentenceText) + val sentence = buildFullSentence(midSentenceText, startTimeMs, endTimeMs) val leftOverText = text.drop(midSentenceText.length) - appendToAccumulator(leftOverText) + appendToAccumulator(leftOverText, startTimeMs, endTimeMs) sentence } else { - appendToAccumulator(text) + appendToAccumulator(text, startTimeMs, endTimeMs) null } } @@ -68,15 +80,19 @@ private fun List.joinSplitSentences(): List { is TranscriptEntry.Text -> { val text = entry.value if (text.endsAsSentence()) { - buildFullSentence(text) + buildFullSentence(text, entry.startTimeMs, entry.endTimeMs) } else { - buildMidSentence(text) + buildMidSentence(text, entry.startTimeMs, entry.endTimeMs) } } } } if (phraseAccumulator.isNotEmpty()) { - entries += TranscriptEntry.Text(phraseAccumulator.toString()) + entries += TranscriptEntry.Text( + phraseAccumulator.toString(), + startTimeMs = accumulatedStartTimeMs, + endTimeMs = accumulatedEndTimeMs, + ) } return entries } diff --git a/modules/services/repositories/src/main/java/au/com/shiftyjelly/pocketcasts/repositories/transcript/TranscriptParser.kt b/modules/services/repositories/src/main/java/au/com/shiftyjelly/pocketcasts/repositories/transcript/TranscriptParser.kt index 15723b86543..eb32b38329d 100644 --- a/modules/services/repositories/src/main/java/au/com/shiftyjelly/pocketcasts/repositories/transcript/TranscriptParser.kt +++ b/modules/services/repositories/src/main/java/au/com/shiftyjelly/pocketcasts/repositories/transcript/TranscriptParser.kt @@ -33,7 +33,9 @@ internal abstract class SubtitleParser( override fun parse(source: BufferedSource) = runCatching { val data = source.use { it.readByteArray() } parseAll(data).flatMap { cuesWithTiming -> - cuesWithTiming.cues.flatMap { cue -> toEntries(cue) } + val startTimeMs = cuesWithTiming.startTimeUs / 1000 + val endTimeMs = cuesWithTiming.endTimeUs / 1000 + cuesWithTiming.cues.flatMap { cue -> toEntries(cue, startTimeMs, endTimeMs) } } } @@ -45,13 +47,13 @@ internal abstract class SubtitleParser( } } - protected abstract fun toEntries(cue: Cue): List + protected abstract fun toEntries(cue: Cue, startTimeMs: Long, endTimeMs: Long): List } internal class WebVttParser : SubtitleParser(WebvttParser()) { override val type get() = TranscriptType.Vtt - override fun toEntries(cue: Cue): List { + override fun toEntries(cue: Cue, startTimeMs: Long, endTimeMs: Long): List { val cueText = cue.text if (cueText.isNullOrEmpty()) { return emptyList() @@ -66,7 +68,7 @@ internal class WebVttParser : SubtitleParser(WebvttParser()) { add(TranscriptEntry.Speaker(speakers)) } } - add(TranscriptEntry.Text(cueText.toString())) + add(TranscriptEntry.Text(cueText.toString(), startTimeMs = startTimeMs, endTimeMs = endTimeMs)) } } } @@ -74,7 +76,7 @@ internal class WebVttParser : SubtitleParser(WebvttParser()) { internal class SrtParser : SubtitleParser(SubripParser()) { override val type get() = TranscriptType.Srt - override fun toEntries(cue: Cue): List { + override fun toEntries(cue: Cue, startTimeMs: Long, endTimeMs: Long): List { val cueText = cue.text?.toString() if (cueText.isNullOrEmpty()) { return emptyList() @@ -84,9 +86,9 @@ internal class SrtParser : SubtitleParser(SubripParser()) { val speakerGroups = SpeakerRegex.matchEntire(cueText)?.groupValues if (speakerGroups != null) { add(TranscriptEntry.Speaker(speakerGroups[1])) - add(TranscriptEntry.Text(speakerGroups[2])) + add(TranscriptEntry.Text(speakerGroups[2], startTimeMs = startTimeMs, endTimeMs = endTimeMs)) } else { - add(TranscriptEntry.Text(cueText)) + add(TranscriptEntry.Text(cueText, startTimeMs = startTimeMs, endTimeMs = endTimeMs)) } } } @@ -146,6 +148,8 @@ internal class JsonParser( cue.speaker?.let { speaker -> add(TranscriptEntry.Speaker(speaker)) } - add(TranscriptEntry.Text(cue.body)) + val startTimeMs = cue.startTime?.let { (it * 1000).toLong() } ?: -1L + val endTimeMs = cue.endTime?.let { (it * 1000).toLong() } ?: -1L + add(TranscriptEntry.Text(cue.body, startTimeMs = startTimeMs, endTimeMs = endTimeMs)) } } diff --git a/modules/services/repositories/src/test/java/au/com/shiftyjelly/pocketcasts/repositories/transcript/SrtParserTest.kt b/modules/services/repositories/src/test/java/au/com/shiftyjelly/pocketcasts/repositories/transcript/SrtParserTest.kt index c24619e46b8..1d08c8c2592 100644 --- a/modules/services/repositories/src/test/java/au/com/shiftyjelly/pocketcasts/repositories/transcript/SrtParserTest.kt +++ b/modules/services/repositories/src/test/java/au/com/shiftyjelly/pocketcasts/repositories/transcript/SrtParserTest.kt @@ -34,10 +34,10 @@ class SrtParserTest { assertEquals( listOf( - TranscriptEntry.Text("Text"), - TranscriptEntry.Text("Text with HTML tags"), + TranscriptEntry.Text("Text", startTimeMs = 0, endTimeMs = 1000), + TranscriptEntry.Text("Text with HTML tags", startTimeMs = 1000, endTimeMs = 2000), TranscriptEntry.Speaker("Speaker 1"), - TranscriptEntry.Text("Text with speaker"), + TranscriptEntry.Text("Text with speaker", startTimeMs = 2000, endTimeMs = 3000), ), entries, ) diff --git a/modules/services/repositories/src/test/java/au/com/shiftyjelly/pocketcasts/repositories/transcript/WebVttParserTest.kt b/modules/services/repositories/src/test/java/au/com/shiftyjelly/pocketcasts/repositories/transcript/WebVttParserTest.kt index 9d46100ca61..6bda428722b 100644 --- a/modules/services/repositories/src/test/java/au/com/shiftyjelly/pocketcasts/repositories/transcript/WebVttParserTest.kt +++ b/modules/services/repositories/src/test/java/au/com/shiftyjelly/pocketcasts/repositories/transcript/WebVttParserTest.kt @@ -39,14 +39,14 @@ class WebVttParserTest { assertEquals( listOf( - TranscriptEntry.Text("Text"), + TranscriptEntry.Text("Text", startTimeMs = 0, endTimeMs = 1000), TranscriptEntry.Speaker("Alice"), - TranscriptEntry.Text("Text with speaker"), - TranscriptEntry.Text("Text with decorations"), + TranscriptEntry.Text("Text with speaker", startTimeMs = 1000, endTimeMs = 2000), + TranscriptEntry.Text("Text with decorations", startTimeMs = 2000, endTimeMs = 3000), TranscriptEntry.Speaker("Bob"), - TranscriptEntry.Text("Text with speaker and with spans"), + TranscriptEntry.Text("Text with speaker and with spans", startTimeMs = 3000, endTimeMs = 4000), TranscriptEntry.Speaker("Alice, Bob"), - TranscriptEntry.Text("Text with multiple speakers"), + TranscriptEntry.Text("Text with multiple speakers", startTimeMs = 4000, endTimeMs = 5000), ), entries, ) diff --git a/modules/services/utils/src/main/java/au/com/shiftyjelly/pocketcasts/utils/featureflag/Feature.kt b/modules/services/utils/src/main/java/au/com/shiftyjelly/pocketcasts/utils/featureflag/Feature.kt index 51d127aa740..32fcafeed99 100644 --- a/modules/services/utils/src/main/java/au/com/shiftyjelly/pocketcasts/utils/featureflag/Feature.kt +++ b/modules/services/utils/src/main/java/au/com/shiftyjelly/pocketcasts/utils/featureflag/Feature.kt @@ -304,6 +304,14 @@ enum class Feature( hasFirebaseRemoteFlag = true, hasDevToggle = true, ), + SYNCED_TRANSCRIPTS( + key = "synced_transcripts", + title = "Synced transcripts with playback timing", + defaultValue = isDebugOrPrototypeBuild, + tier = FeatureTier.Free, + hasFirebaseRemoteFlag = true, + hasDevToggle = true, + ), } sealed class FeatureTier { From 63540a02c03c3bd37d7e115954ec3287864b7936 Mon Sep 17 00:00:00 2001 From: Tamas Szelezsan Date: Mon, 25 May 2026 16:00:05 +0200 Subject: [PATCH 2/2] Address Copilot comments --- .../repositories/transcript/JsonParserTest.kt | 34 +++++++++++ .../transcript/TranscriptSanitizationTest.kt | 58 ++++++++++++++++++- 2 files changed, 90 insertions(+), 2 deletions(-) diff --git a/modules/services/repositories/src/test/java/au/com/shiftyjelly/pocketcasts/repositories/transcript/JsonParserTest.kt b/modules/services/repositories/src/test/java/au/com/shiftyjelly/pocketcasts/repositories/transcript/JsonParserTest.kt index 91294d6b748..1e6644310cc 100644 --- a/modules/services/repositories/src/test/java/au/com/shiftyjelly/pocketcasts/repositories/transcript/JsonParserTest.kt +++ b/modules/services/repositories/src/test/java/au/com/shiftyjelly/pocketcasts/repositories/transcript/JsonParserTest.kt @@ -37,4 +37,38 @@ class JsonParserTest { entries, ) } + + @Test + fun `parse json subtitles with timing`() { + val subtitles = """ + |{ + | "segments": [ + | { + | "body": "Timed text", + | "startTime": 1.5, + | "endTime": 3.75 + | }, + | { + | "body": "No timing" + | }, + | { + | "body": "Partial timing", + | "startTime": 10.0 + | } + | ] + |} + """.trimMargin() + val source = Buffer().writeUtf8(subtitles) + + val entries = parser.parse(source).getOrThrow() + + assertEquals( + listOf( + TranscriptEntry.Text("Timed text", startTimeMs = 1500, endTimeMs = 3750), + TranscriptEntry.Text("No timing", startTimeMs = -1, endTimeMs = -1), + TranscriptEntry.Text("Partial timing", startTimeMs = 10000, endTimeMs = -1), + ), + entries, + ) + } } diff --git a/modules/services/repositories/src/test/java/au/com/shiftyjelly/pocketcasts/repositories/transcript/TranscriptSanitizationTest.kt b/modules/services/repositories/src/test/java/au/com/shiftyjelly/pocketcasts/repositories/transcript/TranscriptSanitizationTest.kt index cd2b2518f51..8f6e62f671c 100644 --- a/modules/services/repositories/src/test/java/au/com/shiftyjelly/pocketcasts/repositories/transcript/TranscriptSanitizationTest.kt +++ b/modules/services/repositories/src/test/java/au/com/shiftyjelly/pocketcasts/repositories/transcript/TranscriptSanitizationTest.kt @@ -195,6 +195,60 @@ class TranscriptSanitizationTest { ) } + @Test + fun `join split texts uses min start time and max end time`() { + val input = buildTranscript { + text("Text 1", startTimeMs = 1000, endTimeMs = 2000) + text("Text 2", startTimeMs = 2000, endTimeMs = 3000) + text("Text 3", startTimeMs = 3000, endTimeMs = 4000) + } + + val output = input.sanitize() + + assertEquals( + buildTranscript { + text("Text 1 Text 2 Text 3", startTimeMs = 1000, endTimeMs = 4000) + }, + output, + ) + } + + @Test + fun `mid-sentence split preserves timing`() { + val input = buildTranscript { + text("Period. Unfinished", startTimeMs = 0, endTimeMs = 1000) + text("sentence.", startTimeMs = 1000, endTimeMs = 2000) + } + + val output = input.sanitize() + + assertEquals( + buildTranscript { + text("Period.", startTimeMs = 0, endTimeMs = 1000) + text("Unfinished sentence.", startTimeMs = 0, endTimeMs = 2000) + }, + output, + ) + } + + @Test + fun `residual accumulator flush preserves timing`() { + val input = buildTranscript { + text("Finished.", startTimeMs = 0, endTimeMs = 1000) + text("Unfinished", startTimeMs = 1000, endTimeMs = 2000) + } + + val output = input.sanitize() + + assertEquals( + buildTranscript { + text("Finished.", startTimeMs = 0, endTimeMs = 1000) + text("Unfinished", startTimeMs = 1000, endTimeMs = 2000) + }, + output, + ) + } + @Test fun `move unfinished sentence to next text`() { val input = buildTranscript { @@ -360,8 +414,8 @@ private fun buildTranscript(block: TranscriptBuilder.() -> Unit): List() - fun text(value: String) { - entries += TranscriptEntry.Text(value) + fun text(value: String, startTimeMs: Long = -1L, endTimeMs: Long = -1L) { + entries += TranscriptEntry.Text(value, startTimeMs = startTimeMs, endTimeMs = endTimeMs) } fun speaker(value: String) {