Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,15 @@ import androidx.test.platform.app.InstrumentationRegistry
import au.com.shiftyjelly.pocketcasts.analytics.testing.TestEventSink
import au.com.shiftyjelly.pocketcasts.models.db.AppDatabase
import au.com.shiftyjelly.pocketcasts.models.db.dao.EpisodeDao
import au.com.shiftyjelly.pocketcasts.models.db.dao.TranscriptDao
import au.com.shiftyjelly.pocketcasts.models.di.ModelModule
import au.com.shiftyjelly.pocketcasts.models.di.addTypeConverters
import au.com.shiftyjelly.pocketcasts.models.entity.PodcastEpisode
import au.com.shiftyjelly.pocketcasts.models.type.SyncStatus
import au.com.shiftyjelly.pocketcasts.preferences.model.BookmarksSortTypeDefault
import au.com.shiftyjelly.pocketcasts.repositories.sync.SyncManager
import au.com.shiftyjelly.pocketcasts.repositories.transcript.TranscriptWindowExtractor
import au.com.shiftyjelly.pocketcasts.servers.podcast.TranscriptService
import com.automattic.eventhorizon.BookmarkSourceType
import com.automattic.eventhorizon.EventHorizon
import com.squareup.moshi.Moshi
Expand All @@ -25,6 +29,7 @@ import org.junit.Assert.assertNotNull
import org.junit.Assert.assertNull
import org.junit.Before
import org.junit.Test
import org.mockito.kotlin.mock

class BookmarkManagerTest {
private lateinit var appDatabase: AppDatabase
Expand All @@ -40,6 +45,11 @@ class BookmarkManagerTest {
bookmarkManager = BookmarkManagerImpl(
appDatabase = appDatabase,
eventHorizon = EventHorizon(TestEventSink()),
syncManager = mock<SyncManager>(),
transcriptWindowExtractor = TranscriptWindowExtractor(
transcriptDao = mock<TranscriptDao>(),
transcriptService = mock<TranscriptService>(),
),
)
episodeDao = appDatabase.episodeDao()
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ interface BookmarkManager {
sortType: BookmarksSortTypeForProfile,
): Flow<List<Bookmark>>
fun hasBookmarksFlow(episodeUuid: String): Flow<Boolean>
suspend fun enrichBookmark(bookmark: Bookmark)

var sourceView: SourceView
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ import au.com.shiftyjelly.pocketcasts.models.type.SyncStatus
import au.com.shiftyjelly.pocketcasts.preferences.model.BookmarksSortTypeDefault
import au.com.shiftyjelly.pocketcasts.preferences.model.BookmarksSortTypeForPodcast
import au.com.shiftyjelly.pocketcasts.preferences.model.BookmarksSortTypeForProfile
import au.com.shiftyjelly.pocketcasts.repositories.sync.SyncManager
import au.com.shiftyjelly.pocketcasts.repositories.transcript.TranscriptWindowExtractor
import com.automattic.eventhorizon.BookmarkCreatedEvent
import com.automattic.eventhorizon.BookmarkSourceType
import com.automattic.eventhorizon.BookmarkUpdateTitleEvent
Expand All @@ -18,16 +20,20 @@ import java.util.Date
import java.util.UUID
import javax.inject.Inject
import kotlin.coroutines.CoroutineContext
import kotlin.coroutines.cancellation.CancellationException
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.ExperimentalCoroutinesApi
import kotlinx.coroutines.flow.Flow
import kotlinx.coroutines.flow.flatMapLatest
import kotlinx.coroutines.flow.flowOf
import timber.log.Timber

class BookmarkManagerImpl @Inject constructor(
appDatabase: AppDatabase,
private val eventHorizon: EventHorizon,
private val syncManager: SyncManager,
private val transcriptWindowExtractor: TranscriptWindowExtractor,
) : BookmarkManager,
Comment thread
sztomek marked this conversation as resolved.
CoroutineScope {

Expand Down Expand Up @@ -226,4 +232,35 @@ class BookmarkManagerImpl @Inject constructor(
override fun hasBookmarksFlow(episodeUuid: String): Flow<Boolean> {
return bookmarkDao.hasBookmarksFlow(episodeUuid)
}

override suspend fun enrichBookmark(bookmark: Bookmark) {
try {
val snippet = transcriptWindowExtractor.extractWindow(
episodeUuid = bookmark.episodeUuid,
timeSecs = bookmark.timeSecs,
) ?: return
Comment thread
sztomek marked this conversation as resolved.

val response = syncManager.enrichBookmark(transcriptSnippet = snippet)
Comment thread
sztomek marked this conversation as resolved.
if (response.error != null) {
Timber.w("Smart bookmark enrichment returned error for ${bookmark.uuid}: ${response.error}")
}
val title = response.title
val summary = response.summary
if (title != null && summary != null) {
val now = System.currentTimeMillis()
bookmarkDao.updateAiData(
bookmarkUuid = bookmark.uuid,
aiTitle = title,
aiSummary = summary,
aiTitleModified = now,
aiSummaryModified = now,
syncStatus = SyncStatus.NOT_SYNCED,
)
Comment thread
sztomek marked this conversation as resolved.
}
} catch (e: CancellationException) {
throw e
} catch (e: Exception) {
Timber.e(e, "Smart bookmark enrichment failed for ${bookmark.uuid}")
}
Comment thread
sztomek marked this conversation as resolved.
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import au.com.shiftyjelly.pocketcasts.servers.sync.SubscriptionStatusResponse
import au.com.shiftyjelly.pocketcasts.servers.sync.UpNextSyncRequest
import au.com.shiftyjelly.pocketcasts.servers.sync.UpNextSyncResponse
import au.com.shiftyjelly.pocketcasts.servers.sync.UserChangeResponse
import au.com.shiftyjelly.pocketcasts.servers.sync.bookmark.BookmarkEnrichResponse
import au.com.shiftyjelly.pocketcasts.servers.sync.history.HistoryYearResponse
import au.com.shiftyjelly.pocketcasts.servers.sync.login.ExchangeSonosResponse
import au.com.shiftyjelly.pocketcasts.utils.Optional
Expand Down Expand Up @@ -124,6 +125,7 @@ interface SyncManager : NamedSettingsCaller {
suspend fun upNextSync(request: UpNextSyncRequest): UpNextSyncResponse
suspend fun upNextSyncProtobuf(request: UpNextSyncRequestProtobuf): UpNextResponse
suspend fun getBookmarks(): List<Bookmark>
suspend fun enrichBookmark(transcriptSnippet: String): BookmarkEnrichResponse
suspend fun sendAnonymousFeedback(subject: String, inbox: String, message: String): Response<Void>
suspend fun sendFeedback(subject: String, inbox: String, message: String): Response<Void>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ import au.com.shiftyjelly.pocketcasts.servers.sync.SyncServiceManager
import au.com.shiftyjelly.pocketcasts.servers.sync.UpNextSyncRequest
import au.com.shiftyjelly.pocketcasts.servers.sync.UpNextSyncResponse
import au.com.shiftyjelly.pocketcasts.servers.sync.UserChangeResponse
import au.com.shiftyjelly.pocketcasts.servers.sync.bookmark.BookmarkEnrichRequest
import au.com.shiftyjelly.pocketcasts.servers.sync.bookmark.BookmarkEnrichResponse
import au.com.shiftyjelly.pocketcasts.servers.sync.bookmark.toBookmark
import au.com.shiftyjelly.pocketcasts.servers.sync.exception.RefreshTokenExpiredException
import au.com.shiftyjelly.pocketcasts.servers.sync.history.HistoryYearResponse
Expand Down Expand Up @@ -447,6 +449,15 @@ class SyncManagerImpl @Inject constructor(
}
}

override suspend fun enrichBookmark(
transcriptSnippet: String,
): BookmarkEnrichResponse = getCacheTokenOrLogin { token ->
syncServiceManager.enrichBookmark(
request = BookmarkEnrichRequest(transcriptSnippet = transcriptSnippet),
token = token,
)
}

override suspend fun sendAnonymousFeedback(subject: String, inbox: String, message: String): Response<Void> {
return syncServiceManager.sendAnonymousFeedback(subject, inbox, message)
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
package au.com.shiftyjelly.pocketcasts.repositories.transcript

import au.com.shiftyjelly.pocketcasts.models.db.dao.TranscriptDao
import au.com.shiftyjelly.pocketcasts.servers.podcast.TranscriptService
import javax.inject.Inject
import javax.inject.Singleton
import kotlin.coroutines.cancellation.CancellationException
import kotlin.time.Duration.Companion.minutes
import kotlinx.coroutines.flow.filter
import kotlinx.coroutines.flow.firstOrNull
import kotlinx.coroutines.withTimeoutOrNull
import okhttp3.CacheControl
import timber.log.Timber

@Singleton
class TranscriptWindowExtractor @Inject constructor(
private val transcriptDao: TranscriptDao,
private val transcriptService: TranscriptService,
) {
suspend fun extractWindow(episodeUuid: String, timeSecs: Int, windowSecs: Int = 30): String? {
return try {
val transcripts = withTimeoutOrNull(1.minutes) {
transcriptDao.observeTranscripts(episodeUuid)
.filter { it.isNotEmpty() }
.firstOrNull()
}
val generated = transcripts?.firstOrNull { it.isGenerated } ?: return null

val body = runCatching { transcriptService.getTranscriptOrThrow(generated.url, CacheControl.FORCE_CACHE) }
.getOrNull() ?: return null

val vttContent = body.use { it.string() }
parseVttWindow(vttContent, timeSecs, windowSecs)
} catch (e: CancellationException) {
throw e
} catch (e: Exception) {
Timber.e(e, "Failed to extract transcript window for episode $episodeUuid")
null
Comment thread
sztomek marked this conversation as resolved.
}
}

companion object {
private val TIMESTAMP_REGEX =
"""(?:(\d{2}):)?(\d{2}):(\d{2})\.\d{3}\s*-->\s*(?:(\d{2}):)?(\d{2}):(\d{2})\.\d{3}""".toRegex()
private val HTML_TAG_REGEX = """<[^>]+>""".toRegex()

Comment thread
sztomek marked this conversation as resolved.
internal fun parseVttWindow(content: String, timeSecs: Int, windowSecs: Int): String? {
val windowStart = (timeSecs - windowSecs).coerceAtLeast(0)
val windowEnd = timeSecs + windowSecs
val lines = content.lines()
val texts = mutableListOf<String>()
var i = 0

while (i < lines.size) {
val match = TIMESTAMP_REGEX.find(lines[i])
if (match != null) {
val sh = match.groupValues[1].takeIf { it.isNotEmpty() }?.toInt() ?: 0
val sm = match.groupValues[2].toInt()
Comment thread
sztomek marked this conversation as resolved.
val ss = match.groupValues[3].toInt()
val eh = match.groupValues[4].takeIf { it.isNotEmpty() }?.toInt() ?: 0
val em = match.groupValues[5].toInt()
val es = match.groupValues[6].toInt()
val start = sh * 3600 + sm * 60 + ss
val end = eh * 3600 + em * 60 + es

if (start < windowEnd && end > windowStart) {
i++
val cueLines = mutableListOf<String>()
while (i < lines.size && lines[i].isNotBlank()) {
cueLines.add(lines[i].replace(HTML_TAG_REGEX, "").trim())
i++
Comment thread
sztomek marked this conversation as resolved.
}
val text = cueLines.joinToString(" ").trim()
if (text.isNotEmpty()) {
texts.add(text)
}
} else {
i++
while (i < lines.size && lines[i].isNotBlank()) {
i++
}
}
}
i++
}

val result = texts.joinToString(" ")
return result.takeIf { it.split("\\s+".toRegex()).size >= 10 }
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
package au.com.shiftyjelly.pocketcasts.repositories.transcript

import org.junit.Assert.assertEquals
import org.junit.Assert.assertNull
import org.junit.Test

class TranscriptWindowExtractorTest {

private val sampleVtt = """
|WEBVTT
|
|00:00:00.000 --> 00:00:05.000
|Welcome to the show everyone.
|
|00:00:05.000 --> 00:00:10.000
|Today we are going to discuss artificial intelligence.
|
|00:00:10.000 --> 00:00:20.000
|Let me start by defining what AI actually means in practice.
|
|00:00:20.000 --> 00:00:30.000
|AI is a broad field that includes machine learning, deep learning, and more.
|
|00:00:30.000 --> 00:00:40.000
|The recent advances have been truly remarkable for the industry.
|
|00:00:40.000 --> 00:00:50.000
|Companies are investing billions of dollars into AI research.
|
|00:01:00.000 --> 00:01:10.000
|This is a much later segment about totally different things.
""".trimMargin()
Comment thread
sztomek marked this conversation as resolved.

@Test
fun `extract window around middle of transcript`() {
val result = TranscriptWindowExtractor.parseVttWindow(sampleVtt, timeSecs = 25, windowSecs = 15)

assertEquals(
"Let me start by defining what AI actually means in practice. " +
"AI is a broad field that includes machine learning, deep learning, and more. " +
"The recent advances have been truly remarkable for the industry.",
result,
)
}

@Test
fun `extract window at start of transcript`() {
val result = TranscriptWindowExtractor.parseVttWindow(sampleVtt, timeSecs = 0, windowSecs = 10)

assertEquals(
"Welcome to the show everyone. Today we are going to discuss artificial intelligence.",
result,
)
}

@Test
fun `return null when window has too few words`() {
val shortVtt = """
|WEBVTT
|
|00:00:00.000 --> 00:00:05.000
|Just a few words.
""".trimMargin()

val result = TranscriptWindowExtractor.parseVttWindow(shortVtt, timeSecs = 2, windowSecs = 30)

assertNull(result)
}

@Test
fun `return null when no cues in window`() {
val result = TranscriptWindowExtractor.parseVttWindow(sampleVtt, timeSecs = 300, windowSecs = 10)

assertNull(result)
}

@Test
fun `extract window from mm-ss-mmm timestamps`() {
val vtt = """
|WEBVTT
|
|00:00.000 --> 00:05.000
|Welcome to the show everyone.
|
|00:05.000 --> 00:10.000
|Today we are going to discuss artificial intelligence.
|
|00:10.000 --> 00:20.000
|Let me start by defining what AI actually means in practice.
|
|00:20.000 --> 00:30.000
|AI is a broad field that includes machine learning, deep learning, and more.
|
|00:30.000 --> 00:40.000
|The recent advances have been truly remarkable for the industry.
""".trimMargin()

val result = TranscriptWindowExtractor.parseVttWindow(vtt, timeSecs = 15, windowSecs = 10)

assertEquals(
"Today we are going to discuss artificial intelligence. " +
"Let me start by defining what AI actually means in practice. " +
"AI is a broad field that includes machine learning, deep learning, and more.",
result,
)
}

@Test
fun `strip html tags from cue text`() {
val vttWithTags = """
|WEBVTT
|
|00:00:00.000 --> 00:00:10.000
|<v Alice>This is a sentence with enough words to pass the minimum threshold for extraction.
""".trimMargin()

val result = TranscriptWindowExtractor.parseVttWindow(vttWithTags, timeSecs = 5, windowSecs = 10)

assertEquals(
"This is a sentence with enough words to pass the minimum threshold for extraction.",
result,
)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ package au.com.shiftyjelly.pocketcasts.servers.sync

import au.com.shiftyjelly.pocketcasts.models.to.HistorySyncRequest
import au.com.shiftyjelly.pocketcasts.models.to.HistorySyncResponse
import au.com.shiftyjelly.pocketcasts.servers.sync.bookmark.BookmarkEnrichRequest
import au.com.shiftyjelly.pocketcasts.servers.sync.bookmark.BookmarkEnrichResponse
import au.com.shiftyjelly.pocketcasts.servers.sync.forgotpassword.ForgotPasswordRequest
import au.com.shiftyjelly.pocketcasts.servers.sync.forgotpassword.ForgotPasswordResponse
import au.com.shiftyjelly.pocketcasts.servers.sync.history.HistoryYearResponse
Expand Down Expand Up @@ -180,6 +182,9 @@ interface SyncService {
@POST("/user/bookmark/list")
suspend fun getBookmarkList(@Header("Authorization") authorization: String, @Body request: BookmarkRequest): BookmarksResponse

@POST("/user/bookmark/enrich")
suspend fun enrichBookmark(@Header("Authorization") authorization: String, @Body request: BookmarkEnrichRequest): BookmarkEnrichResponse

@Headers("Content-Type: application/octet-stream")
@POST("/user/podcast_rating/add")
suspend fun addPodcastRating(@Header("Authorization") authorization: String, @Body request: PodcastRatingAddRequest): PodcastRatingResponse
Expand Down
Loading