Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ package au.com.shiftyjelly.pocketcasts.models.to
sealed interface TranscriptEntry {
data class Text(
val value: String,
val startTimeMs: Long = -1L,
val endTimeMs: Long = -1L,
) : TranscriptEntry

data class Speaker(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,33 +30,45 @@ private val ThreeOrMoreNewLines = """\n{3,}""".toRegex()
private fun List<TranscriptEntry>.joinSplitSentences(): List<TranscriptEntry> {
val phraseAccumulator = StringBuilder()
val entries = mutableListOf<TranscriptEntry>()
var accumulatedStartTimeMs = -1L
var accumulatedEndTimeMs = -1L

fun appendToAccumulator(text: String) {
fun appendToAccumulator(text: String, startTimeMs: Long, endTimeMs: Long) {
phraseAccumulator.append(' ').append(text.trimStart())
if (accumulatedStartTimeMs == -1L || (startTimeMs in 0..<accumulatedStartTimeMs)) {
accumulatedStartTimeMs = startTimeMs
}
if (endTimeMs > accumulatedEndTimeMs) {
accumulatedEndTimeMs = endTimeMs
}
}

fun buildFullSentence(text: String): TranscriptEntry {
appendToAccumulator(text)
fun buildFullSentence(text: String, startTimeMs: Long, endTimeMs: Long): TranscriptEntry {
appendToAccumulator(text, startTimeMs, endTimeMs)
val sentences = phraseAccumulator.toString()
phraseAccumulator.clear()
return TranscriptEntry.Text(sentences)
val resultStartTimeMs = accumulatedStartTimeMs
val resultEndTimeMs = accumulatedEndTimeMs
accumulatedStartTimeMs = -1L
accumulatedEndTimeMs = -1L
return TranscriptEntry.Text(sentences, startTimeMs = resultStartTimeMs, endTimeMs = resultEndTimeMs)
}
Comment thread
sztomek marked this conversation as resolved.

fun buildMidSentence(text: String): TranscriptEntry? {
fun buildMidSentence(text: String, startTimeMs: Long, endTimeMs: Long): TranscriptEntry? {
val midSentence = text.findMidSentence()

return if (midSentence != null) {
val (index, punctuation) = midSentence

val midSentenceText = text.substring(0, index + punctuation.length)
val sentence = buildFullSentence(midSentenceText)
val sentence = buildFullSentence(midSentenceText, startTimeMs, endTimeMs)

val leftOverText = text.drop(midSentenceText.length)
appendToAccumulator(leftOverText)
appendToAccumulator(leftOverText, startTimeMs, endTimeMs)

sentence
} else {
appendToAccumulator(text)
appendToAccumulator(text, startTimeMs, endTimeMs)
null
}
}
Expand All @@ -68,15 +80,19 @@ private fun List<TranscriptEntry>.joinSplitSentences(): List<TranscriptEntry> {
is TranscriptEntry.Text -> {
val text = entry.value
if (text.endsAsSentence()) {
buildFullSentence(text)
buildFullSentence(text, entry.startTimeMs, entry.endTimeMs)
} else {
buildMidSentence(text)
buildMidSentence(text, entry.startTimeMs, entry.endTimeMs)
}
}
}
}
if (phraseAccumulator.isNotEmpty()) {
entries += TranscriptEntry.Text(phraseAccumulator.toString())
entries += TranscriptEntry.Text(
phraseAccumulator.toString(),
startTimeMs = accumulatedStartTimeMs,
endTimeMs = accumulatedEndTimeMs,
)
}
return entries
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ internal abstract class SubtitleParser(
override fun parse(source: BufferedSource) = runCatching {
val data = source.use { it.readByteArray() }
parseAll(data).flatMap { cuesWithTiming ->
cuesWithTiming.cues.flatMap { cue -> toEntries(cue) }
val startTimeMs = cuesWithTiming.startTimeUs / 1000
val endTimeMs = cuesWithTiming.endTimeUs / 1000
cuesWithTiming.cues.flatMap { cue -> toEntries(cue, startTimeMs, endTimeMs) }
}
}

Expand All @@ -45,13 +47,13 @@ internal abstract class SubtitleParser(
}
}

protected abstract fun toEntries(cue: Cue): List<TranscriptEntry>
protected abstract fun toEntries(cue: Cue, startTimeMs: Long, endTimeMs: Long): List<TranscriptEntry>
}

internal class WebVttParser : SubtitleParser(WebvttParser()) {
override val type get() = TranscriptType.Vtt

override fun toEntries(cue: Cue): List<TranscriptEntry> {
override fun toEntries(cue: Cue, startTimeMs: Long, endTimeMs: Long): List<TranscriptEntry> {
val cueText = cue.text
if (cueText.isNullOrEmpty()) {
return emptyList()
Expand All @@ -66,15 +68,15 @@ internal class WebVttParser : SubtitleParser(WebvttParser()) {
add(TranscriptEntry.Speaker(speakers))
}
}
add(TranscriptEntry.Text(cueText.toString()))
add(TranscriptEntry.Text(cueText.toString(), startTimeMs = startTimeMs, endTimeMs = endTimeMs))
}
}
}

internal class SrtParser : SubtitleParser(SubripParser()) {
override val type get() = TranscriptType.Srt

override fun toEntries(cue: Cue): List<TranscriptEntry> {
override fun toEntries(cue: Cue, startTimeMs: Long, endTimeMs: Long): List<TranscriptEntry> {
val cueText = cue.text?.toString()
if (cueText.isNullOrEmpty()) {
return emptyList()
Expand All @@ -84,9 +86,9 @@ internal class SrtParser : SubtitleParser(SubripParser()) {
val speakerGroups = SpeakerRegex.matchEntire(cueText)?.groupValues
if (speakerGroups != null) {
add(TranscriptEntry.Speaker(speakerGroups[1]))
add(TranscriptEntry.Text(speakerGroups[2]))
add(TranscriptEntry.Text(speakerGroups[2], startTimeMs = startTimeMs, endTimeMs = endTimeMs))
} else {
add(TranscriptEntry.Text(cueText))
add(TranscriptEntry.Text(cueText, startTimeMs = startTimeMs, endTimeMs = endTimeMs))
}
}
}
Expand Down Expand Up @@ -146,6 +148,8 @@ internal class JsonParser(
cue.speaker?.let { speaker ->
add(TranscriptEntry.Speaker(speaker))
}
add(TranscriptEntry.Text(cue.body))
val startTimeMs = cue.startTime?.let { (it * 1000).toLong() } ?: -1L
val endTimeMs = cue.endTime?.let { (it * 1000).toLong() } ?: -1L
add(TranscriptEntry.Text(cue.body, startTimeMs = startTimeMs, endTimeMs = endTimeMs))
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,38 @@ class JsonParserTest {
entries,
)
}

@Test
fun `parse json subtitles with timing`() {
val subtitles = """
|{
| "segments": [
| {
| "body": "Timed text",
| "startTime": 1.5,
| "endTime": 3.75
| },
| {
| "body": "No timing"
| },
| {
| "body": "Partial timing",
| "startTime": 10.0
| }
| ]
|}
""".trimMargin()
val source = Buffer().writeUtf8(subtitles)

val entries = parser.parse(source).getOrThrow()

assertEquals(
listOf(
TranscriptEntry.Text("Timed text", startTimeMs = 1500, endTimeMs = 3750),
TranscriptEntry.Text("No timing", startTimeMs = -1, endTimeMs = -1),
TranscriptEntry.Text("Partial timing", startTimeMs = 10000, endTimeMs = -1),
),
entries,
)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,10 @@ class SrtParserTest {

assertEquals(
listOf(
TranscriptEntry.Text("Text"),
TranscriptEntry.Text("Text with HTML tags"),
TranscriptEntry.Text("Text", startTimeMs = 0, endTimeMs = 1000),
TranscriptEntry.Text("Text with HTML tags", startTimeMs = 1000, endTimeMs = 2000),
TranscriptEntry.Speaker("Speaker 1"),
TranscriptEntry.Text("Text with speaker"),
TranscriptEntry.Text("Text with speaker", startTimeMs = 2000, endTimeMs = 3000),
),
entries,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,60 @@ class TranscriptSanitizationTest {
)
}

@Test
fun `join split texts uses min start time and max end time`() {
val input = buildTranscript {
text("Text 1", startTimeMs = 1000, endTimeMs = 2000)
text("Text 2", startTimeMs = 2000, endTimeMs = 3000)
text("Text 3", startTimeMs = 3000, endTimeMs = 4000)
}

val output = input.sanitize()

assertEquals(
buildTranscript {
text("Text 1 Text 2 Text 3", startTimeMs = 1000, endTimeMs = 4000)
},
output,
)
}

@Test
fun `mid-sentence split preserves timing`() {
val input = buildTranscript {
text("Period. Unfinished", startTimeMs = 0, endTimeMs = 1000)
text("sentence.", startTimeMs = 1000, endTimeMs = 2000)
}

val output = input.sanitize()

assertEquals(
buildTranscript {
text("Period.", startTimeMs = 0, endTimeMs = 1000)
text("Unfinished sentence.", startTimeMs = 0, endTimeMs = 2000)
},
output,
)
}

@Test
fun `residual accumulator flush preserves timing`() {
val input = buildTranscript {
text("Finished.", startTimeMs = 0, endTimeMs = 1000)
text("Unfinished", startTimeMs = 1000, endTimeMs = 2000)
}

val output = input.sanitize()

assertEquals(
buildTranscript {
text("Finished.", startTimeMs = 0, endTimeMs = 1000)
text("Unfinished", startTimeMs = 1000, endTimeMs = 2000)
},
output,
)
}

@Test
fun `move unfinished sentence to next text`() {
val input = buildTranscript {
Expand Down Expand Up @@ -360,8 +414,8 @@ private fun buildTranscript(block: TranscriptBuilder.() -> Unit): List<Transcrip
private class TranscriptBuilder {
private val entries = mutableListOf<TranscriptEntry>()

fun text(value: String) {
entries += TranscriptEntry.Text(value)
fun text(value: String, startTimeMs: Long = -1L, endTimeMs: Long = -1L) {
entries += TranscriptEntry.Text(value, startTimeMs = startTimeMs, endTimeMs = endTimeMs)
}

fun speaker(value: String) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,14 @@ class WebVttParserTest {

assertEquals(
listOf(
TranscriptEntry.Text("Text"),
TranscriptEntry.Text("Text", startTimeMs = 0, endTimeMs = 1000),
TranscriptEntry.Speaker("Alice"),
TranscriptEntry.Text("Text with speaker"),
TranscriptEntry.Text("Text with decorations"),
TranscriptEntry.Text("Text with speaker", startTimeMs = 1000, endTimeMs = 2000),
TranscriptEntry.Text("Text with decorations", startTimeMs = 2000, endTimeMs = 3000),
TranscriptEntry.Speaker("Bob"),
TranscriptEntry.Text("Text with speaker and with spans"),
TranscriptEntry.Text("Text with speaker and with spans", startTimeMs = 3000, endTimeMs = 4000),
TranscriptEntry.Speaker("Alice, Bob"),
TranscriptEntry.Text("Text with multiple speakers"),
TranscriptEntry.Text("Text with multiple speakers", startTimeMs = 4000, endTimeMs = 5000),
),
entries,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,14 @@ enum class Feature(
hasFirebaseRemoteFlag = true,
hasDevToggle = true,
),
SYNCED_TRANSCRIPTS(
key = "synced_transcripts",
title = "Synced transcripts with playback timing",
defaultValue = isDebugOrPrototypeBuild,
tier = FeatureTier.Free,
hasFirebaseRemoteFlag = true,
hasDevToggle = true,
),
}

sealed class FeatureTier {
Expand Down