Skip to content

Commit

Permalink
odnoklassniki implemented (#322)
Browse files Browse the repository at this point in the history
Co-authored-by: Mikhael Sokolov <mikhael.sokolov@mynd.co>
  • Loading branch information
sokomishalov and mikhael-sokolov-rs authored Sep 7, 2022
1 parent 8e3433e commit f5943de
Show file tree
Hide file tree
Showing 5 changed files with 193 additions and 19 deletions.
9 changes: 6 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ Current list of implemented sources:
- [Vimeo](https://vimeo.com)
- [IFunny](https://ifunny.co)
- [VK](https://vk.com)
- [Odnoklassniki](https://ok.ru)
- [Pikabu](https://pikabu.ru)

# Bugs
Expand Down Expand Up @@ -86,9 +87,10 @@ optional arguments:
positional arguments:
PROVIDER skraper provider, options: [facebook, instagram,
twitter, youtube, twitch, reddit, 9gag, pinterest,
flickr, tumblr, ifunny, vk, pikabu]
PROVIDER skraper provider, options: facebook, instagram,
twitter, youtube, tiktok, telegram, twitch, reddit,
9gag, pinterest, flickr, tumblr, ifunny, vk, pikabu,
vimeo, odnoklassniki
PATH path to user/community/channel/topic/trend
```
Expand Down Expand Up @@ -142,6 +144,7 @@ As mentioned before, the provider implementation list is:
- [VimeoSkraper](skrapers/src/main/kotlin/ru/sokomishalov/skraper/provider/vimeo/VimeoSkraper.kt)
- [IFunnySkraper](skrapers/src/main/kotlin/ru/sokomishalov/skraper/provider/ifunny/IFunnySkraper.kt)
- [VkSkraper](skrapers/src/main/kotlin/ru/sokomishalov/skraper/provider/vk/VkSkraper.kt)
- [OdnoklassnikiSkraper](skrapers/src/main/kotlin/ru/sokomishalov/skraper/provider/odnoklassniki/OdnoklassnikiSkraper.kt)
- [PikabuSkraper](skrapers/src/main/kotlin/ru/sokomishalov/skraper/provider/pikabu/PikabuSkraper.kt)
After that usage as simple as is:
Expand Down
34 changes: 18 additions & 16 deletions skrapers/src/main/kotlin/ru/sokomishalov/skraper/Skrapers.kt
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ import ru.sokomishalov.skraper.provider.flickr.FlickrSkraper
import ru.sokomishalov.skraper.provider.ifunny.IFunnySkraper
import ru.sokomishalov.skraper.provider.instagram.InstagramSkraper
import ru.sokomishalov.skraper.provider.ninegag.NinegagSkraper
import ru.sokomishalov.skraper.provider.odnoklassniki.OdnoklassnikiSkraper
import ru.sokomishalov.skraper.provider.pikabu.PikabuSkraper
import ru.sokomishalov.skraper.provider.pinterest.PinterestSkraper
import ru.sokomishalov.skraper.provider.reddit.RedditSkraper
Expand Down Expand Up @@ -189,22 +190,23 @@ object Skrapers {
val spiSkrapers = spi<Skraper>()

val knownSkrapers = listOf(
FacebookSkraper(),
InstagramSkraper(),
TwitterSkraper(),
YoutubeSkraper(),
TikTokSkraper(),
TelegramSkraper(),
TwitchSkraper(),
RedditSkraper(),
NinegagSkraper(),
PinterestSkraper(),
FlickrSkraper(),
TumblrSkraper(),
IFunnySkraper(),
VkSkraper(),
PikabuSkraper(),
VimeoSkraper(),
FacebookSkraper(client),
InstagramSkraper(client),
TwitterSkraper(client),
YoutubeSkraper(client),
TikTokSkraper(client),
TelegramSkraper(client),
TwitchSkraper(client),
RedditSkraper(client),
NinegagSkraper(client),
PinterestSkraper(client),
FlickrSkraper(client),
TumblrSkraper(client),
IFunnySkraper(client),
VkSkraper(client),
PikabuSkraper(client),
VimeoSkraper(client),
OdnoklassnikiSkraper(client),
)

return spiSkrapers + knownSkrapers
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
/*
* Copyright (c) 2019-present Mikhael Sokolov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ru.sokomishalov.skraper.provider.odnoklassniki

import kotlinx.coroutines.flow.Flow
import kotlinx.coroutines.flow.flow
import org.jsoup.nodes.Document
import ru.sokomishalov.skraper.Skraper
import ru.sokomishalov.skraper.Skrapers
import ru.sokomishalov.skraper.client.HttpRequest
import ru.sokomishalov.skraper.client.SkraperClient
import ru.sokomishalov.skraper.client.fetchDocument
import ru.sokomishalov.skraper.client.fetchOpenGraphMedia
import ru.sokomishalov.skraper.internal.iterable.emitBatch
import ru.sokomishalov.skraper.internal.jsoup.getFirstElementByAttributeValue
import ru.sokomishalov.skraper.internal.jsoup.getFirstElementByClass
import ru.sokomishalov.skraper.internal.jsoup.getMetaPropertyMap
import ru.sokomishalov.skraper.internal.net.host
import ru.sokomishalov.skraper.model.*

open class OdnoklassnikiSkraper @JvmOverloads constructor(
override val client: SkraperClient = Skrapers.client
) : Skraper {

override fun getPosts(path: String): Flow<Post> = flow {
val document = getPage(path = path)

val rawPosts = document
?.getElementsByAttribute("data-feed-id")
?.toList()
.orEmpty()

emitBatch(rawPosts) {
Post(
id = attr("data-feed-id").orEmpty(),
text = getFirstElementByClass("media-text_cnt")?.wholeText(),
statistics = PostStatistics(
likes = getFirstElementByAttributeValue("data-widget-item-type", "like")?.getFirstElementByClass("widget_count")?.html()?.trim()?.toIntOrNull(),
comments = getFirstElementByAttributeValue("data-widget-item-type", "comment")?.getFirstElementByClass("widget_count")?.html()?.trim()?.toIntOrNull(),
reposts = getFirstElementByAttributeValue("data-widget-item-type", "reshare")?.getFirstElementByClass("widget_count")?.html()?.trim()?.toIntOrNull(),
),
media = getElementsByClass("vid-card").mapNotNull { it.getFirstElementByClass("vid-card_cnt_w")?.id()?.substringAfterLast("_")?.takeWhile { it.isDigit() }?.let { "${BASE_URL}/video/${it}" }?.toVideo() }
+ getElementsByAttribute("data-mp4src").mapNotNull { it.attr("data-mp4src").let { "https:${it}" }.toVideo() }
+ getElementsByClass("collage_img").mapNotNull { it.attr("src").let { "https:${it}" }.toImage() }
)
}
}

override suspend fun getPageInfo(path: String): PageInfo? {
val page = getPage(path = path)
val properties = page?.getMetaPropertyMap()

return properties?.let {
PageInfo(
nick = it["og:url"]?.substringAfterLast("/"),
name = it["og:title"].orEmpty().substringBefore(" |"),
description = it["og:description"].orEmpty(),
avatar = it["og:image"]?.toImage(),
)
}
}

override fun supports(url: String): Boolean {
return "ok.ru" in url.host
}

override suspend fun resolve(media: Media): Media {
return client.fetchOpenGraphMedia(media)
}

private suspend fun getPage(path: String): Document? {
return client.fetchDocument(
request = HttpRequest(url = BASE_URL.buildFullURL(path = path)),
)
}

companion object {
const val BASE_URL: String = "https://ok.ru"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
/*
* Copyright (c) 2019-present Mikhael Sokolov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ru.sokomishalov.skraper.provider.odnoklassniki

import kotlinx.coroutines.flow.Flow
import ru.sokomishalov.skraper.model.PageInfo
import ru.sokomishalov.skraper.model.Post

fun OdnoklassnikiSkraper.getCommunityPosts(community: String): Flow<Post> {
return getPosts(path = "/${community}")
}

suspend fun OdnoklassnikiSkraper.getCommunityInfo(community: String): PageInfo? {
return getPageInfo(path = "/${community}")
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
/*
* Copyright (c) 2019-present Mikhael Sokolov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ru.sokomishalov.skraper.provider.odnoklassniki

import org.junit.jupiter.api.Test
import ru.sokomishalov.skraper.model.Image
import ru.sokomishalov.skraper.model.Video
import ru.sokomishalov.skraper.provider.SkraperTck

class OdnoklassnikiSkraperTest : SkraperTck() {
override val skraper: OdnoklassnikiSkraper = OdnoklassnikiSkraper(client = client)
override val path: String = "/milota"
private val community: String = "milota"

@Test
fun `Check community posts`() {
assertPosts { skraper.getCommunityPosts(community = community) }
}

@Test
fun `Check community page info`() {
assertPageInfo { skraper.getCommunityInfo(community = community) }
}

@Test
fun `Check media resolving`() {
assertMediaResolved(Video("https://ok.ru/video/3944589167241"))
assertMediaResolved(Image("https://ok.ru/group/52234248454281/album/52234845225097/937540255625"))
}

@Test
fun `Check media downloading`() {
assertMediaDownloaded(Video("https://ok.ru/video/3944589167241"))
}
}

0 comments on commit f5943de

Please sign in to comment.