Skip to content

Commit

Permalink
Use instance reqwest client instead of creating one for every new req…
Browse files Browse the repository at this point in the history
…uest (#15)

* Clone structures to use same underlying reqwest client instead of recreating one every time.

* using Arcs to FilmowClient instead of cloning it

* no need to clone filmow client

---------

Co-authored-by: Lucas Meireles <lmeireles@palantir.com>
  • Loading branch information
LucasIME and Lucas Meireles authored Feb 16, 2024
1 parent a01afb1 commit 9d38a5e
Show file tree
Hide file tree
Showing 4 changed files with 124 additions and 42 deletions.
47 changes: 34 additions & 13 deletions src/clients/filmow_client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,32 +12,50 @@ use crate::fetchers::{
watched_list_fetcher::WatchedMoviesFetcher, watchlist_fetcher::WatchlistFetcher,
};

#[derive(Debug)]
pub struct FilmowClient {}
use reqwest::Client;

#[derive(Debug, Clone)]
pub struct FilmowClient {
client: Client,
}

impl FilmowClient {
pub async fn get_all_movies_from_watchlist(user: Arc<String>) -> Vec<Movie> {
WatchlistFetcher::get_all_movies_from_watchlist(user).await
pub fn new() -> Self {
Self {
client: Client::new(),
}
}

pub async fn get_all_watched_movies(user: Arc<String>) -> Vec<Movie> {
WatchedMoviesFetcher::get_all_watched_movies(user).await
pub async fn get_all_movies_from_watchlist(
shared_self: Arc<FilmowClient>,
user: Arc<String>,
) -> Vec<Movie> {
let watchlist_fetcher = WatchlistFetcher::new(shared_self.clone());
WatchlistFetcher::get_all_movies_from_watchlist(Arc::new(watchlist_fetcher), user).await
}

pub async fn get_all_watched_movies(
shared_self: Arc<FilmowClient>,
user: Arc<String>,
) -> Vec<Movie> {
let watched_list_fetcher = WatchedMoviesFetcher::new(shared_self.clone());
WatchedMoviesFetcher::get_all_watched_movies(Arc::new(watched_list_fetcher), user).await
}

pub fn get_base_url() -> String {
"https://filmow.com".to_string()
}

pub async fn get_html_from_url(url: &str) -> Result<String, String> {
pub async fn get_html_from_url(&self, url: &str) -> Result<String, String> {
let retry_strategy = ExponentialBackoff::from_millis(10).map(jitter).take(5);
Retry::spawn(retry_strategy, || async move {
Self::get_html_from_url_no_retry(url).await
self.get_html_from_url_no_retry(url).await
})
.await
}

async fn get_html_from_url_no_retry(url: &str) -> Result<String, String> {
match reqwest::get(url).await {
async fn get_html_from_url_no_retry(&self, url: &str) -> Result<String, String> {
match self.client.get(url).send().await {
Ok(resp) => {
if resp.status() == 404 {
return Err("404 page not found".to_string());
Expand All @@ -57,22 +75,25 @@ impl FilmowClient {
}
}

async fn get_movie_from_url(url: &str) -> Result<Movie, String> {
match FilmowClient::get_html_from_url(url).await {
async fn get_movie_from_url(&self, url: &str) -> Result<Movie, String> {
match self.get_html_from_url(url).await {
Ok(html_body) => MovieExtractor::extract_movie_from_html(html_body.as_str(), url),
Err(e) => Err(e),
}
}

pub async fn parallel_build_movie_from_preliminary_info(
shared_self: Arc<FilmowClient>,
info_vec: Vec<PreliminaryMovieInformation>,
) -> Vec<Movie> {
let mut children = vec![];

for info in info_vec {
let self_clone = shared_self.clone();
children.push(
tokio::spawn(async move {
println!("Fetching information for movie {}", info.movie_url);
match FilmowClient::get_movie_from_url(info.movie_url.as_str()).await {
match self_clone.get_movie_from_url(info.movie_url.as_str()).await {
Ok(movie) => {
println!("Successfully fetched information for Movie {}", movie.title);
Some(Movie {
Expand Down
49 changes: 38 additions & 11 deletions src/fetchers/watched_list_fetcher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,36 @@ use crate::{
model::movie::Movie,
};

pub struct WatchedMoviesFetcher {}
#[derive(Clone)]
pub struct WatchedMoviesFetcher {
filmow_client: Arc<FilmowClient>,
}

impl WatchedMoviesFetcher {
pub async fn get_all_watched_movies(user: Arc<String>) -> Vec<Movie> {
pub fn new(filmow_client: Arc<FilmowClient>) -> Self {
WatchedMoviesFetcher { filmow_client }
}

pub async fn get_all_watched_movies(
shared_self: Arc<WatchedMoviesFetcher>,
user: Arc<String>,
) -> Vec<Movie> {
println!("Fetching watched movies for user {}", user);

let number_of_pages = Self::get_last_watched_page_number(user.clone()).await;
let number_of_pages = shared_self.get_last_watched_page_number(user.clone()).await;
println!("Number of watched movies pages {:?}", number_of_pages);

let mut resp = vec![];
let mut handles = vec![];
for page_num in 1..=number_of_pages {
let page_movies_handle = tokio::spawn(Self::get_all_movies_for_watched_page(
page_num,
user.clone(),
));
let self_clone = shared_self.clone();
let user_clone = user.clone();

let page_movies_handle = tokio::spawn(async move {
self_clone
.get_all_movies_for_watched_page(page_num, user_clone)
.await
});
handles.push(page_movies_handle)
}

Expand All @@ -31,15 +45,24 @@ impl WatchedMoviesFetcher {
resp
}

pub async fn get_all_movies_for_watched_page(page_num: i32, user: Arc<String>) -> Vec<Movie> {
pub async fn get_all_movies_for_watched_page(
&self,
page_num: i32,
user: Arc<String>,
) -> Vec<Movie> {
let watched_url_for_page = Self::get_watched_url_for_page(user, page_num);
match FilmowClient::get_html_from_url(watched_url_for_page.as_str()).await {
match self
.filmow_client
.get_html_from_url(watched_url_for_page.as_str())
.await
{
Ok(watched_page_html) => {
let preliminary_movies_info =
MovieExtractor::get_preliminary_info_for_watched_movies(
watched_page_html.as_str(),
);
let page_movies = FilmowClient::parallel_build_movie_from_preliminary_info(
self.filmow_client.clone(),
preliminary_movies_info,
)
.await;
Expand All @@ -56,10 +79,14 @@ impl WatchedMoviesFetcher {
}
}

async fn get_last_watched_page_number(user: Arc<String>) -> i32 {
async fn get_last_watched_page_number(&self, user: Arc<String>) -> i32 {
println!("Getting total number of watched pages");
let watched_url = Self::get_watched_url_for_page(user, 1);
match FilmowClient::get_html_from_url(watched_url.as_str()).await {
match self
.filmow_client
.get_html_from_url(watched_url.as_str())
.await
{
Ok(watched_page_html) => {
MovieExtractor::get_last_page_from_html(watched_page_html.as_str()).unwrap_or(1)
}
Expand Down
52 changes: 41 additions & 11 deletions src/fetchers/watchlist_fetcher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,39 @@ use crate::{
model::movie::Movie,
};

pub struct WatchlistFetcher {}
#[derive(Clone)]
pub struct WatchlistFetcher {
filmow_client: Arc<FilmowClient>,
}

impl WatchlistFetcher {
pub async fn get_all_movies_from_watchlist(user: Arc<String>) -> Vec<Movie> {
pub fn new(filmow_client: Arc<FilmowClient>) -> Self {
WatchlistFetcher { filmow_client }
}

pub async fn get_all_movies_from_watchlist(
shared_self: Arc<WatchlistFetcher>,
user: Arc<String>,
) -> Vec<Movie> {
println!("Fetching watchlist for user {}", user);

let number_of_pages = Self::get_last_watchlist_page_number(user.clone()).await;
let number_of_pages = shared_self
.get_last_watchlist_page_number(user.clone())
.await;
println!("Number of watchlist pages {:?}", number_of_pages);

let mut resp = vec![];
let mut handles = vec![];

for page_num in 1..=number_of_pages {
let page_movies_handle = tokio::spawn(Self::get_all_movies_for_watchlist_page(
page_num,
user.clone(),
));
let self_clone = shared_self.clone();
let user_clone = user.clone();

let page_movies_handle = tokio::spawn(async move {
self_clone
.get_all_movies_for_watchlist_page(page_num, user_clone)
.await
});
handles.push(page_movies_handle)
}

Expand All @@ -32,16 +49,25 @@ impl WatchlistFetcher {
resp
}

pub async fn get_all_movies_for_watchlist_page(page_num: i32, user: Arc<String>) -> Vec<Movie> {
pub async fn get_all_movies_for_watchlist_page(
&self,
page_num: i32,
user: Arc<String>,
) -> Vec<Movie> {
println!("Processing watched movies page {}", page_num);

let watchlist_url = Self::get_watchlist_url_for_page(user, page_num);
match FilmowClient::get_html_from_url(watchlist_url.as_str()).await {
match self
.filmow_client
.get_html_from_url(watchlist_url.as_str())
.await
{
Ok(watchlist_page_html) => {
let preliminary_movies_info = MovieExtractor::get_preliminary_info_for_watchlist(
watchlist_page_html.as_str(),
);
let page_movies = FilmowClient::parallel_build_movie_from_preliminary_info(
self.filmow_client.clone(),
preliminary_movies_info,
)
.await;
Expand All @@ -55,10 +81,14 @@ impl WatchlistFetcher {
}
}

async fn get_last_watchlist_page_number(user: Arc<String>) -> i32 {
async fn get_last_watchlist_page_number(&self, user: Arc<String>) -> i32 {
println!("Getting total number of watchlist pages");
let watchlist_url = Self::get_watchlist_url_for_page(user, 1);
match FilmowClient::get_html_from_url(watchlist_url.as_str()).await {
match self
.filmow_client
.get_html_from_url(watchlist_url.as_str())
.await
{
Ok(watchlist_page_html) => {
MovieExtractor::get_last_page_from_html(watchlist_page_html.as_str()).unwrap_or(1)
}
Expand Down
18 changes: 11 additions & 7 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,14 @@ fn get_username() -> String {

#[tokio::main]
async fn main() {
let filmow_client = Arc::new(FilmowClient::new());
let user = Arc::new(get_username());

let movies_handle = tokio::spawn(fetch_and_save_movies(user.clone()));
let watchlist_handle = tokio::spawn(fetch_and_save_watchlist(user.clone()));
let movies_handle = tokio::spawn(fetch_and_save_movies(filmow_client.clone(), user.clone()));
let watchlist_handle = tokio::spawn(fetch_and_save_watchlist(
filmow_client.clone(),
user.clone(),
));

movies_handle
.await
Expand All @@ -48,9 +52,9 @@ async fn main() {
);
}

async fn fetch_and_save_movies(user: Arc<String>) {
async fn fetch_and_save_movies(client: Arc<FilmowClient>, user: Arc<String>) {
let watched_movies_file_name = "watched.csv";
let mut watched_movies = FilmowClient::get_all_watched_movies(user).await;
let mut watched_movies = FilmowClient::get_all_watched_movies(client, user).await;
watched_movies.sort_by_key(|movie| movie.title.clone());

match CsvWriter::save_movies_to_csv(watched_movies, watched_movies_file_name) {
Expand All @@ -62,9 +66,9 @@ async fn fetch_and_save_movies(user: Arc<String>) {
}
}

async fn fetch_and_save_watchlist(user: Arc<String>) {
async fn fetch_and_save_watchlist(client: Arc<FilmowClient>, user: Arc<String>) {
let watchlist_file_name = "watchlist.csv";
let mut watchlist_movies = FilmowClient::get_all_movies_from_watchlist(user).await;
let mut watchlist_movies = FilmowClient::get_all_movies_from_watchlist(client, user).await;
watchlist_movies.sort_by_key(|movie| movie.title.clone());

match CsvWriter::save_movies_to_csv(watchlist_movies, watchlist_file_name) {
Expand All @@ -91,7 +95,7 @@ mod tests {
let stdout = std::str::from_utf8(&output.stdout).unwrap();
println!("stdout: {}", stdout);

let stderr= std::str::from_utf8(&output.stderr).unwrap();
let stderr = std::str::from_utf8(&output.stderr).unwrap();
println!("stderr: {}", stderr);

let expected_watchlist_content =
Expand Down

0 comments on commit 9d38a5e

Please sign in to comment.