Skip to content

Commit

Permalink
Issue mozilla-mobile#8690: Specify the frecency threshold for the fet…
Browse files Browse the repository at this point in the history
…ched top frecent sites
  • Loading branch information
gabrielluong committed Nov 4, 2020
1 parent 06a2562 commit 7b76a38
Show file tree
Hide file tree
Showing 12 changed files with 209 additions and 83 deletions.
2 changes: 1 addition & 1 deletion buildSrc/src/main/java/Dependencies.kt
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ object Versions {
const val disklrucache = "2.0.2"
const val leakcanary = "2.4"

const val mozilla_appservices = "63.0.0"
const val mozilla_appservices = "66.0.0"

const val mozilla_glean = "33.0.4"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@
package mozilla.components.browser.storage.memory

import androidx.annotation.VisibleForTesting
import mozilla.components.concept.storage.FrecencyThresholdOption
import mozilla.components.concept.storage.HistoryAutocompleteResult
import mozilla.components.concept.storage.HistoryStorage
import mozilla.components.concept.storage.PageObservation
import mozilla.components.concept.storage.SearchResult
import mozilla.components.concept.storage.PageVisit
import mozilla.components.concept.storage.RedirectSource
import mozilla.components.concept.storage.SearchResult
import mozilla.components.concept.storage.TopFrecentSiteInfo
import mozilla.components.concept.storage.VisitInfo
import mozilla.components.concept.storage.VisitType
Expand All @@ -28,6 +29,7 @@ const val AUTOCOMPLETE_SOURCE_NAME = "memoryHistory"
class InMemoryHistoryStorage : HistoryStorage {
@VisibleForTesting
internal var pages: HashMap<String, MutableList<Visit>> = linkedMapOf()

@VisibleForTesting
internal val pageMeta: HashMap<String, PageObservation> = hashMapOf()

Expand All @@ -50,9 +52,10 @@ class InMemoryHistoryStorage : HistoryStorage {
}
}

override suspend fun recordObservation(uri: String, observation: PageObservation) = synchronized(pageMeta) {
pageMeta[uri] = observation
}
override suspend fun recordObservation(uri: String, observation: PageObservation) =
synchronized(pageMeta) {
pageMeta[uri] = observation
}

override suspend fun getVisited(uris: List<String>): List<Boolean> = synchronized(pages) {
return uris.map {
Expand All @@ -67,7 +70,11 @@ class InMemoryHistoryStorage : HistoryStorage {
return pages.keys.toList()
}

override suspend fun getVisitsPaginated(offset: Long, count: Long, excludeTypes: List<VisitType>): List<VisitInfo> {
override suspend fun getVisitsPaginated(
offset: Long,
count: Long,
excludeTypes: List<VisitType>
): List<VisitInfo> {
throw UnsupportedOperationException("Pagination is not yet supported by the in-memory history storage")
}

Expand All @@ -80,57 +87,78 @@ class InMemoryHistoryStorage : HistoryStorage {

pages.forEach {
it.value.forEach { visit ->
if (visit.timestamp >= start && visit.timestamp <= end && !excludeTypes.contains(visit.type)) {
visits.add(VisitInfo(
url = it.key,
title = pageMeta[it.key]?.title,
visitTime = visit.timestamp,
visitType = visit.type
))
if (visit.timestamp >= start && visit.timestamp <= end && !excludeTypes.contains(
visit.type
)
) {
visits.add(
VisitInfo(
url = it.key,
title = pageMeta[it.key]?.title,
visitTime = visit.timestamp,
visitType = visit.type
)
)
}
}
}

return visits
}

override suspend fun getTopFrecentSites(numItems: Int): List<TopFrecentSiteInfo> {
override suspend fun getTopFrecentSites(
numItems: Int,
frecencyThreshold: FrecencyThresholdOption
): List<TopFrecentSiteInfo> {
throw UnsupportedOperationException("getTopFrecentSites is not yet supported by the in-memory history storage")
}

override fun getSuggestions(query: String, limit: Int): List<SearchResult> = synchronized(pages + pageMeta) {
data class Hit(val url: String, val score: Int)
override fun getSuggestions(query: String, limit: Int): List<SearchResult> =
synchronized(pages + pageMeta) {
data class Hit(val url: String, val score: Int)

val urlMatches = pages.asSequence().map {
Hit(it.key, levenshteinDistance(it.key, query))
}
val titleMatches = pageMeta.asSequence().map {
Hit(it.key, levenshteinDistance(it.value.title ?: "", query))
}
val matchedUrls = mutableMapOf<String, Int>()
urlMatches.plus(titleMatches).forEach {
if (matchedUrls.containsKey(it.url) && matchedUrls[it.url]!! < it.score) {
matchedUrls[it.url] = it.score
} else {
matchedUrls[it.url] = it.score
val urlMatches = pages.asSequence().map {
Hit(it.key, levenshteinDistance(it.key, query))
}
val titleMatches = pageMeta.asSequence().map {
Hit(it.key, levenshteinDistance(it.value.title ?: "", query))
}
val matchedUrls = mutableMapOf<String, Int>()
urlMatches.plus(titleMatches).forEach {
if (matchedUrls.containsKey(it.url) && matchedUrls[it.url]!! < it.score) {
matchedUrls[it.url] = it.score
} else {
matchedUrls[it.url] = it.score
}
}
// Calculate maxScore so that we can invert our scoring.
// Lower Levenshtein distance should produce a higher score.
val maxScore =
urlMatches.maxByOrNull { it.score }?.score ?: return@synchronized listOf()

// TODO exclude non-matching results entirely? Score that implies complete mismatch.
matchedUrls.asSequence().sortedBy { it.value }.map {
SearchResult(
id = it.key,
score = maxScore - it.value,
url = it.key,
title = pageMeta[it.key]?.title
)
}.take(limit).toList()
}
// Calculate maxScore so that we can invert our scoring.
// Lower Levenshtein distance should produce a higher score.
val maxScore = urlMatches.maxByOrNull { it.score }?.score ?: return@synchronized listOf()

// TODO exclude non-matching results entirely? Score that implies complete mismatch.
matchedUrls.asSequence().sortedBy { it.value }.map {
SearchResult(id = it.key, score = maxScore - it.value, url = it.key, title = pageMeta[it.key]?.title)
}.take(limit).toList()
}

override fun getAutocompleteSuggestion(query: String): HistoryAutocompleteResult? = synchronized(pages) {
return segmentAwareDomainMatch(query, pages.keys)?.let { urlMatch ->
HistoryAutocompleteResult(
query, urlMatch.matchedSegment, urlMatch.url, AUTOCOMPLETE_SOURCE_NAME, pages.size)
override fun getAutocompleteSuggestion(query: String): HistoryAutocompleteResult? =
synchronized(pages) {
return segmentAwareDomainMatch(query, pages.keys)?.let { urlMatch ->
HistoryAutocompleteResult(
query,
urlMatch.matchedSegment,
urlMatch.url,
AUTOCOMPLETE_SOURCE_NAME,
pages.size
)
}
}
}

override suspend fun deleteEverything() = synchronized(pages + pageMeta) {
pages.clear()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,20 @@ import kotlinx.coroutines.withContext
import mozilla.appservices.places.PlacesApi
import mozilla.appservices.places.PlacesException
import mozilla.appservices.places.VisitObservation
import mozilla.components.concept.base.crash.CrashReporting
import mozilla.components.concept.storage.FrecencyThresholdOption
import mozilla.components.concept.storage.HistoryAutocompleteResult
import mozilla.components.concept.storage.HistoryStorage
import mozilla.components.concept.storage.PageObservation
import mozilla.components.concept.storage.PageVisit
import mozilla.components.concept.storage.SearchResult
import mozilla.components.concept.storage.RedirectSource
import mozilla.components.concept.storage.SearchResult
import mozilla.components.concept.storage.TopFrecentSiteInfo
import mozilla.components.concept.storage.VisitInfo
import mozilla.components.concept.storage.VisitType
import mozilla.components.concept.sync.SyncAuthInfo
import mozilla.components.concept.sync.SyncStatus
import mozilla.components.concept.sync.SyncableStore
import mozilla.components.concept.base.crash.CrashReporting
import mozilla.components.support.base.log.logger.Logger
import mozilla.components.support.utils.segmentAwareDomainMatch
import org.json.JSONObject
Expand Down Expand Up @@ -100,9 +101,13 @@ open class PlacesHistoryStorage(
}
}

override suspend fun getTopFrecentSites(numItems: Int): List<TopFrecentSiteInfo> {
override suspend fun getTopFrecentSites(
numItems: Int,
frecencyThreshold: FrecencyThresholdOption
): List<TopFrecentSiteInfo> {
return withContext(readScope.coroutineContext) {
places.reader().getTopFrecentSiteInfos(numItems).map { it.into() }
places.reader().getTopFrecentSiteInfos(numItems, frecencyThreshold.into())
.map { it.into() }
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */

@file:Suppress("MatchingDeclarationName")

package mozilla.components.browser.storage.sync

import mozilla.appservices.places.BookmarkFolder
Expand All @@ -12,6 +13,7 @@ import mozilla.appservices.places.BookmarkTreeNode
import mozilla.appservices.places.SyncAuthInfo
import mozilla.components.concept.storage.BookmarkNode
import mozilla.components.concept.storage.BookmarkNodeType
import mozilla.components.concept.storage.FrecencyThresholdOption
import mozilla.components.concept.storage.TopFrecentSiteInfo
import mozilla.components.concept.storage.VisitInfo
import mozilla.components.concept.storage.VisitType
Expand All @@ -31,6 +33,15 @@ internal fun mozilla.components.concept.sync.SyncAuthInfo.into(): SyncAuthInfo {
)
}

/**
* Conversion from a generic [FrecencyThresholdOption] into its richer comrade within the 'places' lib.
*/
internal fun FrecencyThresholdOption.into() = when (this) {
FrecencyThresholdOption.NONE -> mozilla.appservices.places.FrecencyThresholdOption.NONE
FrecencyThresholdOption.SKIP_ONE_TIME_PAGES ->
mozilla.appservices.places.FrecencyThresholdOption.SKIP_ONE_TIME_PAGES
}

/**
* Conversion from a generic [VisitType] into its richer comrade within the 'places' lib.
*/
Expand Down Expand Up @@ -79,14 +90,37 @@ internal fun mozilla.appservices.places.TopFrecentSiteInfo.into(): TopFrecentSit
internal fun BookmarkTreeNode.asBookmarkNode(): BookmarkNode {
return when (this) {
is BookmarkItem -> {
BookmarkNode(BookmarkNodeType.ITEM, this.guid, this.parentGUID, this.position, this.title, this.url, null)
BookmarkNode(
BookmarkNodeType.ITEM,
this.guid,
this.parentGUID,
this.position,
this.title,
this.url,
null
)
}
is BookmarkFolder -> {
BookmarkNode(BookmarkNodeType.FOLDER, this.guid, this.parentGUID, this.position, this.title, null,
this.children?.map(BookmarkTreeNode::asBookmarkNode))
BookmarkNode(
BookmarkNodeType.FOLDER,
this.guid,
this.parentGUID,
this.position,
this.title,
null,
this.children?.map(BookmarkTreeNode::asBookmarkNode)
)
}
is BookmarkSeparator -> {
BookmarkNode(BookmarkNodeType.SEPARATOR, this.guid, this.parentGUID, this.position, null, null, null)
BookmarkNode(
BookmarkNodeType.SEPARATOR,
this.guid,
this.parentGUID,
this.position,
null,
null,
null
)
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import mozilla.appservices.places.PlacesException
import mozilla.appservices.places.PlacesReaderConnection
import mozilla.appservices.places.PlacesWriterConnection
import mozilla.components.concept.storage.BookmarkNode
import mozilla.components.concept.storage.FrecencyThresholdOption
import mozilla.components.concept.storage.PageObservation
import mozilla.components.concept.storage.PageVisit
import mozilla.components.concept.storage.RedirectSource
Expand Down Expand Up @@ -156,29 +157,48 @@ class PlacesHistoryStorageTest {
history.recordVisit(url, PageVisit(VisitType.LINK, RedirectSource.NOT_A_SOURCE))
}

var infos = history.getTopFrecentSites(0)
var infos = history.getTopFrecentSites(0, frecencyThreshold = FrecencyThresholdOption.NONE)
assertEquals(0, infos.size)

infos = history.getTopFrecentSites(3)
infos = history.getTopFrecentSites(0, frecencyThreshold = FrecencyThresholdOption.SKIP_ONE_TIME_PAGES)
assertEquals(0, infos.size)

infos = history.getTopFrecentSites(3, frecencyThreshold = FrecencyThresholdOption.NONE)
assertEquals(3, infos.size)
assertEquals("https://www.mozilla.com/foo/bar/baz", infos[0].url)
assertEquals("https://www.example.com/123", infos[1].url)
assertEquals("https://news.ycombinator.com/", infos[2].url)

infos = history.getTopFrecentSites(3, frecencyThreshold = FrecencyThresholdOption.SKIP_ONE_TIME_PAGES)
assertEquals(2, infos.size)
assertEquals("https://www.mozilla.com/foo/bar/baz", infos[0].url)
assertEquals("https://www.example.com/123", infos[1].url)

infos = history.getTopFrecentSites(5)
infos = history.getTopFrecentSites(5, frecencyThreshold = FrecencyThresholdOption.NONE)
assertEquals(5, infos.size)
assertEquals("https://www.mozilla.com/foo/bar/baz", infos[0].url)
assertEquals("https://www.example.com/123", infos[1].url)
assertEquals("https://news.ycombinator.com/", infos[2].url)
assertEquals("https://mozilla.com/a1/b2/c3", infos[3].url)
assertEquals("https://www.example.com/12345", infos[4].url)

infos = history.getTopFrecentSites(100)
infos = history.getTopFrecentSites(5, frecencyThreshold = FrecencyThresholdOption.SKIP_ONE_TIME_PAGES)
assertEquals(2, infos.size)
assertEquals("https://www.mozilla.com/foo/bar/baz", infos[0].url)
assertEquals("https://www.example.com/123", infos[1].url)

infos = history.getTopFrecentSites(100, frecencyThreshold = FrecencyThresholdOption.NONE)
assertEquals(5, infos.size)
assertEquals("https://www.mozilla.com/foo/bar/baz", infos[0].url)
assertEquals("https://www.example.com/123", infos[1].url)
assertEquals("https://news.ycombinator.com/", infos[2].url)
assertEquals("https://mozilla.com/a1/b2/c3", infos[3].url)
assertEquals("https://www.example.com/12345", infos[4].url)

infos = history.getTopFrecentSites(100, frecencyThreshold = FrecencyThresholdOption.SKIP_ONE_TIME_PAGES)
assertEquals(2, infos.size)
assertEquals("https://www.mozilla.com/foo/bar/baz", infos[0].url)
assertEquals("https://www.example.com/123", infos[1].url)
}

@Test
Expand Down
Loading

0 comments on commit 7b76a38

Please sign in to comment.