Skip to content

Commit

Permalink
Simplify code by leveraging automatic extension detection
Browse files Browse the repository at this point in the history
  • Loading branch information
0xmichalis committed Jan 30, 2025
1 parent f1f7c64 commit 1fda654
Show file tree
Hide file tree
Showing 5 changed files with 59 additions and 204 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ clippy:

.PHONY: test
test:
cargo test -- $(filter-out $@,$(MAKECMDGOALS))
cargo test

.PHONY: check
check:
Expand Down
91 changes: 14 additions & 77 deletions src/chain/evm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use std::time::Duration;
use std::{future::Future, path::Path};
use tokio::fs;
use tokio::time::sleep;
use tracing::{debug, error, warn};
use tracing::{debug, error};

use crate::content::{
extensions::fetch_and_save_additional_content, fetch_and_save_content, Options,
Expand All @@ -22,25 +22,14 @@ pub struct NFTMetadata {
pub name: Option<String>,
pub description: Option<String>,
pub image: Option<String>,
#[serde(default)]
pub image_details: Option<Details>,
pub animation_url: Option<String>,
#[serde(default)]
pub animation_details: Option<Details>,
pub external_url: Option<String>,
pub attributes: Option<Vec<NFTAttribute>>,
pub media: Option<Media>,
pub content: Option<Media>,
pub assets: Option<Assets>,
}

#[derive(Debug, Serialize, Deserialize)]
#[serde(untagged)]
pub enum Details {
Structured { format: String },
Raw(String),
}

#[derive(Debug, Serialize, Deserialize)]
pub struct NFTAttribute {
pub trait_type: String,
Expand All @@ -50,11 +39,6 @@ pub struct NFTAttribute {
#[derive(Debug, Serialize, Deserialize)]
pub struct Media {
pub uri: String,
pub dimensions: Option<String>,
pub size: Option<String>,
#[serde(rename = "mimeType")]
pub mime_type: Option<String>,
pub mime: Option<String>,
}

#[derive(Debug, Serialize, Deserialize)]
Expand Down Expand Up @@ -138,77 +122,35 @@ async fn get_token_uri(
Ok(uri)
}

fn get_extension_from_mime(mime: &str) -> Option<String> {
match mime.split('/').last() {
Some("gltf-binary") => Some("glb".to_string()),
Some("octet-stream") => None,
Some(ext) => Some(ext.to_string()),
None => None,
}
}

fn get_extension_from_media(media: &Media) -> Option<String> {
if let Some(mime_type) = &media.mime_type {
return get_extension_from_mime(mime_type);
}
if let Some(mime) = &media.mime {
return get_extension_from_mime(mime);
}
None
}

fn parse_details(details: &Details) -> String {
match details {
Details::Structured { format } => format.to_lowercase(),
// Ugly but apparently some metadata is not structured properly
// eg. AMC's Oración
Details::Raw(raw_string) => serde_json::from_str::<serde_json::Value>(raw_string)
.unwrap()
.get("format")
.unwrap()
.as_str()
.unwrap()
.to_string(),
}
}

fn get_uri_and_extension_from_media(media: &Media, fallback_uri: &str) -> (String, Option<String>) {
fn get_uri_from_media(media: &Media, fallback_uri: &str) -> String {
let mut uri = media.uri.to_string();
if uri.is_empty() {
uri = fallback_uri.to_string();
}
(uri, get_extension_from_media(media))
uri
}

fn get_uri_and_extension_from_metadata(
fn get_uri_from_metadata(
metadata: &NFTMetadata,
fallback_uri: &str,
check_image_details: bool,
check_animation_details: bool,
) -> (String, Option<String>) {
) -> String {
if !check_image_details && !check_animation_details {
panic!("Need to check the extension of either an image or animation");
}
if let Some(media) = &metadata.media {
return get_uri_and_extension_from_media(media, fallback_uri);
return get_uri_from_media(media, fallback_uri);
}
if let Some(content) = &metadata.content {
return get_uri_and_extension_from_media(content, fallback_uri);
}
if check_image_details && metadata.image_details.is_some() {
let format = parse_details(metadata.image_details.as_ref().unwrap());
return (fallback_uri.to_string(), Some(format));
}
if check_animation_details && metadata.animation_details.is_some() {
let format = parse_details(metadata.animation_details.as_ref().unwrap());
return (fallback_uri.to_string(), Some(format));
return get_uri_from_media(content, fallback_uri);
}
if let Some(assets) = &metadata.assets {
if let Some(glb) = &assets.glb {
return (glb.to_string(), Some("glb".to_string()));
return glb.to_string();
}
}
(fallback_uri.to_string(), None)
fallback_uri.to_string()
}

pub async fn process_nfts(
Expand Down Expand Up @@ -236,7 +178,7 @@ pub async fn process_nfts(
let contract_addr = match contract.address.parse::<Address>() {
Ok(addr) => addr,
Err(e) => {
warn!("Failed to parse contract address on {}: {}", chain_name, e);
error!("Failed to parse contract address on {}: {}", chain_name, e);
continue;
}
};
Expand All @@ -245,7 +187,7 @@ pub async fn process_nfts(
let token_id = match U256::from_str_radix(&contract.token_id, 10) {
Ok(id) => id,
Err(e) => {
warn!("Failed to parse token ID: {}", e);
error!("Failed to parse token ID: {}", e);
continue;
}
};
Expand All @@ -254,7 +196,7 @@ pub async fn process_nfts(
let token_uri = match get_token_uri(contract_addr, provider.clone(), token_id).await {
Ok(uri) => uri,
Err(e) => {
error!("Failed to get token URI: {}, skipping token", e);
error!("Failed to get token URI: {}", e);
continue;
}
};
Expand All @@ -272,7 +214,6 @@ pub async fn process_nfts(
Options {
overriden_filename: Some("metadata.json".to_string()),
fallback_filename: None,
fallback_extension: None,
},
)
.await
Expand All @@ -289,8 +230,7 @@ pub async fn process_nfts(

// Save linked content
if let Some(image_url) = &metadata.image {
let (image_url, extension) =
get_uri_and_extension_from_metadata(&metadata, image_url, true, false);
let image_url = get_uri_from_metadata(&metadata, image_url, true, false);
debug!("Downloading image from {}", image_url);
fetch_and_save_content(
&image_url,
Expand All @@ -301,15 +241,13 @@ pub async fn process_nfts(
Options {
overriden_filename: None,
fallback_filename: Some("image".to_string()),
fallback_extension: extension,
},
)
.await?;
}

if let Some(animation_url) = &metadata.animation_url {
let (animation_url, extension) =
get_uri_and_extension_from_metadata(&metadata, animation_url, false, true);
let animation_url = get_uri_from_metadata(&metadata, animation_url, false, true);
debug!("Downloading animation from {}", animation_url);
fetch_and_save_content(
&animation_url,
Expand All @@ -320,7 +258,6 @@ pub async fn process_nfts(
Options {
overriden_filename: None,
fallback_filename: Some("animation".to_string()),
fallback_extension: extension,
},
)
.await?;
Expand Down
2 changes: 0 additions & 2 deletions src/chain/tezos.rs
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,6 @@ pub async fn process_nfts(
Options {
overriden_filename: Some("metadata.json".to_string()),
fallback_filename: None,
fallback_extension: None,
},
);
let metadata_content_str = fs::read_to_string(metadata_content.await?).await?;
Expand Down Expand Up @@ -192,7 +191,6 @@ pub async fn process_nfts(
Options {
overriden_filename: Some(file_name),
fallback_filename: None,
fallback_extension: None,
},
)
.await?;
Expand Down
78 changes: 31 additions & 47 deletions src/content/mod.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
use crate::url::{
get_data_url_content, get_data_url_mime_type, get_last_path_segment, get_url, is_data_url,
};
use crate::url::{get_data_url, get_last_path_segment, get_url, is_data_url};
use anyhow::Result;
use serde_json::Value;
use std::path::{Path, PathBuf};
use tokio::fs;
use tracing::{debug, info};

pub mod extensions;
pub mod html;

async fn fetch_http_content(url: &str) -> Result<(Vec<u8>, String)> {
async fn fetch_http_content(url: &str) -> Result<Vec<u8>> {
let client = reqwest::Client::new();
let response = client.get(url).send().await?;

Expand All @@ -22,22 +21,14 @@ async fn fetch_http_content(url: &str) -> Result<(Vec<u8>, String)> {
));
}

let content_type = response
.headers()
.get(reqwest::header::CONTENT_TYPE)
.and_then(|h| h.to_str().ok())
.unwrap_or("")
.to_string();

let content = response.bytes().await?.to_vec();

Ok((content, content_type))
Ok(content)
}

pub struct Options {
pub overriden_filename: Option<String>,
pub fallback_filename: Option<String>,
pub fallback_extension: Option<String>,
}

async fn get_filename(
Expand All @@ -54,16 +45,10 @@ async fn get_filename(
.join(token_id);

// Determine filename
let mut filename = if let Some(name) = options.overriden_filename {
let filename = if let Some(name) = options.overriden_filename {
name.to_string()
} else if is_data_url(url) {
// For data URLs, use content type as filename
let mime_type = get_data_url_mime_type(url);
format!(
"{}.{}",
options.fallback_filename.unwrap_or("content".to_string()),
mime_type
)
options.fallback_filename.unwrap_or("content".to_string())
} else {
// For regular URLs, try to extract filename from path
get_last_path_segment(
Expand All @@ -75,12 +60,6 @@ async fn get_filename(
)
};

if let Some(extension) = options.fallback_extension {
if !filename.contains('.') {
filename = format!("{}.{}", filename, extension);
}
}

let file_path = dir_path.join(&filename);

Ok(file_path)
Expand All @@ -101,6 +80,16 @@ fn detect_media_extension(content: &[u8]) -> Option<&'static str> {
[0x00, 0x00, 0x00, _, 0x66, 0x74, 0x79, 0x70, 0x6D, 0x70, 0x34, 0x32, ..] => Some("mp4"),
// QuickTime MOV
[0x00, 0x00, 0x00, 0x14, 0x66, 0x74, 0x79, 0x70, 0x71, 0x74, 0x20, 0x20, ..] => Some("mov"),
// HTML
[b'<', b'h', b't', b'm', b'l', ..] => Some("html"),
// HTML starting with <!DOCTYPE html
[0x3C, 0x21, 0x44, 0x4F, 0x43, 0x54, 0x59, 0x50, 0x45, 0x20, 0x68, 0x74, 0x6D, 0x6C, ..] => {
Some("html")
}
// JSON
[b'{', ..] => Some("json"),
// GLB
[0x47, 0x4C, 0x42, 0x0D, 0x0A, 0x1A, 0x0A, ..] => Some("glb"),
_ => None,
}
}
Expand All @@ -123,8 +112,8 @@ pub async fn fetch_and_save_content(
}

// Get content based on URL type
let (mut content, content_type) = if is_data_url(url) {
get_data_url_content(url)?
let mut content = if is_data_url(url) {
get_data_url(url).unwrap()
} else {
let content_url = get_url(url);
// TODO: Rotate IPFS gateways to handle rate limits
Expand All @@ -134,31 +123,26 @@ pub async fn fetch_and_save_content(
// Create directory and save content
fs::create_dir_all(file_path.parent().unwrap()).await?;

if content_type.contains("text/html") || content_type.contains("application/xhtml") {
if !file_path.to_string_lossy().ends_with(".html") {
file_path = file_path.with_extension("html");
}
debug!("Downloading HTML content from {}. The saved files may be incomplete as they may have more dependencies.", url);
let content_str = String::from_utf8_lossy(&content);
html::download_html_resources(&content_str, url, file_path.parent().unwrap()).await?;
} else if content_type.contains("application/json") {
// Try to parse and format JSON content
if let Ok(content_str) = String::from_utf8(content.clone()) {
if let Ok(json_value) = serde_json::from_str::<serde_json::Value>(&content_str) {
content = serde_json::to_string_pretty(&json_value)?.into();
}
}
}

// After the HTML/JSON handling block, add media extension detection:
// Check for media files if no extension detected
// Detect media extension if no extension is present
if file_path.extension().is_none() {
if let Some(ext) = detect_media_extension(&content) {
file_path = file_path.with_extension(ext);
debug!("Detected media extension: {}", ext);
}
}

match file_path.extension().unwrap_or_default().to_str() {
Some("json") => {
let json_value: Value = serde_json::from_slice(&content)?;
content = serde_json::to_string_pretty(&json_value)?.into();
}
Some("html") => {
let content_str = String::from_utf8_lossy(&content);
html::download_html_resources(&content_str, url, file_path.parent().unwrap()).await?;
}
_ => {}
}

// Check if file exists again before downloading
if fs::try_exists(&file_path).await? {
debug!("File already exists at {}", file_path.display());
Expand Down
Loading

0 comments on commit 1fda654

Please sign in to comment.