Skip to content

Commit

Permalink
Improve harvest error reporting (#1257)
Browse files Browse the repository at this point in the history
Previously, when an item failed to deserialize, we logged this:

```
WARN  tobira::sync::harvest >  Could not deserialize item in harvest response for kind 'event' (updated 2024-10-02 09:01:49.038 UTC)
```

This always required, manually finding the item with said `updated`
time, then writing some code to actually find out whats wrong.

With this PR, Tobira will log an actual useful error and the whole raw
value. See this (I added dummy required field `title2` to cause an
error):

```
ERROR tobira::sync::harvest >  Could not deserialize 'event' harvest item -> ignoring. Error: missing field `title2`
DEBUG tobira::sync::harvest >  Raw value: {
                            >    "acl": {
                            >      "read": [
                            >        "ROLE_ANONYMOUS"
                            >      ]
                            >    },
                            >    "captions": [],
                            >    "created": 1460505600000,
                            >    "creators": [
                            >      "Dissertori,  Günther"
                            >    ],
                            >    "description": null,
                            >    "duration": 5637803,
                            >    "endTime": 1460543400000,
                            >    "id": "09828f52-b684-32b3-a614-cbb96fc63ee8",
                            >    "isLive": false,
                            >    "kind": "event",
                            >    "metadata": {
                            >      "dcterms": {
                            >        "contributor": [
                            >          "D-PHYS"
                            >        ],
                            >        "issued": [
                            >          "2016-04-14"
                            >        ],
                            >        "language": [
                            >          "deu"
                            >        ],
                            >        "license": [
                            >          "CC-BY-NC-ND"
                            >        ],
                            >        "publisher": [
                            >          "ETH Zürich, ID-MMS; Schweiz"
                            >        ],
                            >        "rightsHolder": [
                            >          "ETH Zürich, Schweiz"
                            >        ],
                            >        "spatial": [
                            >          "Zürich, Schweiz; ETH Zürich, HPV G 5"
                            >        ],
                            >        "temporal": [
                            >          "start=2016-04-13T08:45:00Z; end=2016-04-13T10:30:00Z; scheme=W3C-DTF;"
                            >        ],
                            >        "type": [
                            >          "MovingImage"
                            >        ]
                            >      },
                            >      "http://ethz.ch/video/metadata": {
                            >        "owner": [
                            >          "Dissertori, Günther"
                            >        ]
                            >      }
                            >    },
                            >    "partOf": "97eea518-409b-372b-b47a-54483658fc0c",
                            >    "segments": [],
                            >    "slideText": null,
                            >    "startTime": 1460537100000,
                            >    "thumbnail": "https://tobira-test-oc.ethz.ch/static/mh_default_org/engage-player/09828f52-b684-32b3-a614-cbb96fc63ee8/0579ed69-56f2-4d4d-878d-d85210e8df30/presentation_0dd34669_9a79_4a1b_be52_216ba65f4688_3_000s_player_preview_16_9_all_to_16_9_640x360.jpg",
                            >    "timelinePreview": null,
                            >    "title": "Einführung in die Kern- und Teilchenphysik",
                            >    "tracks": [
                            >      {
                            >        "flavor": "presentation/delivery",
                            >        "isMaster": false,
                            >        "mimetype": "video/webm",
                            >        "resolution": [
                            >          640,
                            >          360
                            >        ],
                            >        "uri": "https://tobira-test-oc.ethz.ch/static/mh_default_org/engage-player/09828f52-b684-32b3-a614-cbb96fc63ee8/82d70840-8acc-41aa-9c0d-9236be3e4112/presentation_0dd34669_9a79_4a1b_be52_216ba65f4688.webm"
                            >      },
                            >      {
                            >        "flavor": "presentation/delivery",
                            >        "isMaster": false,
                            >        "mimetype": "video/mp4",
                            >        "resolution": [
                            >          1280,
                            >          720
                            >        ],
                            >        "uri": "https://tobira-test-oc.ethz.ch/static/mh_default_org/engage-player/09828f52-b684-32b3-a614-cbb96fc63ee8/5f782138-8194-4368-8543-26c284d2b168/presentation_0dd34669_9a79_4a1b_be52_216ba65f4688.mp4"
                            >      },
                            >      {
                            >        "flavor": "presentation/delivery",
                            >        "isMaster": false,
                            >        "mimetype": "video/webm",
                            >        "resolution": [
                            >          1280,
                            >          720
                            >        ],
                            >        "uri": "https://tobira-test-oc.ethz.ch/static/mh_default_org/engage-player/09828f52-b684-32b3-a614-cbb96fc63ee8/4b776ef3-f1bd-4fe9-be16-f828374cdd2a/presentation_0dd34669_9a79_4a1b_be52_216ba65f4688.webm"
                            >      },
                            >      {
                            >        "flavor": "presentation/delivery",
                            >        "isMaster": false,
                            >        "mimetype": "video/mp4",
                            >        "resolution": [
                            >          640,
                            >          360
                            >        ],
                            >        "uri": "https://tobira-test-oc.ethz.ch/static/mh_default_org/engage-player/09828f52-b684-32b3-a614-cbb96fc63ee8/ccab01b9-f1c6-45b0-a6a5-c4648c5d96dc/presentation_0dd34669_9a79_4a1b_be52_216ba65f4688.mp4"
                            >      }
                            >    ],
                            >    "updated": 1626161268818
                            >  }
      

```
  • Loading branch information
owi92 authored Oct 8, 2024
2 parents b1be3c8 + ddb6450 commit 75da84d
Show file tree
Hide file tree
Showing 3 changed files with 266 additions and 182 deletions.
206 changes: 103 additions & 103 deletions backend/src/sync/harvest/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use std::{
time::{Duration, Instant},
};

use serde::de::DeserializeOwned;
use tokio_postgres::types::ToSql;

use crate::{
Expand Down Expand Up @@ -86,7 +87,7 @@ pub(crate) async fn run(

// Write received data into the database, updating the sync status if
// everything worked out alright.
let last_updated = harvest_data.items.last().map(|item| item.updated());
let last_updated = harvest_data.items.iter().rev().find_map(|item| item.updated());
let mut transaction = db.transaction().await?;
store_in_db(harvest_data.items, &sync_status, &mut transaction).await?;
SyncStatus::update_harvested_until(harvest_data.includes_items_until, &*transaction).await?;
Expand Down Expand Up @@ -145,33 +146,14 @@ async fn store_in_db(
// Make sure we haven't received this update yet. The code below can
// handle duplicate items alright, but this way we can save on some DB
// accesses and the logged statistics are more correct.
if item.updated() < sync_status.harvested_until {
if item.updated().is_some_and(|updated| updated < sync_status.harvested_until) {
debug!("Skipping item which `updated` value is earlier than `harvested_until`");
continue;
}

match item {
HarvestItem::Event {
id: opencast_id,
title,
description,
part_of,
tracks,
captions,
created,
start_time,
end_time,
creators,
duration,
thumbnail,
mut acl,
is_live,
metadata,
updated,
segments,
slide_text,
} => {
let series_id = match &part_of {
HarvestItem::Event(mut event) => {
let series_id = match &event.part_of {
None => None,
Some(part_of) => {
db.query_opt("select id from series where opencast_id = $1", &[part_of])
Expand All @@ -182,44 +164,44 @@ async fn store_in_db(

// We always handle the admin role in a special way, so no need
// to store it for every single event.
acl.read.retain(|role| role != ROLE_ADMIN);
acl.write.retain(|role| role != ROLE_ADMIN);
event.acl.read.retain(|role| role != ROLE_ADMIN);
event.acl.write.retain(|role| role != ROLE_ADMIN);

for (_, roles) in &mut acl.custom_actions.0 {
for (_, roles) in &mut event.acl.custom_actions.0 {
roles.retain(|role| role != ROLE_ADMIN);
}

let tracks = tracks.into_iter().map(Into::into).collect::<Vec<EventTrack>>();
let captions = captions.into_iter().map(Into::into).collect::<Vec<EventCaption>>();
let segments = segments.into_iter().map(Into::into).collect::<Vec<EventSegment>>();
let tracks = event.tracks.into_iter().map(Into::into).collect::<Vec<EventTrack>>();
let captions = event.captions.into_iter().map(Into::into).collect::<Vec<EventCaption>>();
let segments = event.segments.into_iter().map(Into::into).collect::<Vec<EventSegment>>();

// We upsert the event data.
upsert(db, "all_events", "opencast_id", &[
("opencast_id", &opencast_id),
("opencast_id", &event.id),
("state", &EventState::Ready),
("series", &series_id),
("part_of", &part_of),
("is_live", &is_live),
("title", &title),
("description", &description),
("duration", &duration),
("created", &created),
("start_time", &start_time),
("end_time", &end_time),
("updated", &updated),
("creators", &creators),
("thumbnail", &thumbnail),
("metadata", &metadata),
("read_roles", &acl.read),
("write_roles", &acl.write),
("custom_action_roles", &acl.custom_actions),
("part_of", &event.part_of),
("is_live", &event.is_live),
("title", &event.title),
("description", &event.description),
("duration", &event.duration),
("created", &event.created),
("start_time", &event.start_time),
("end_time", &event.end_time),
("updated", &event.updated),
("creators", &event.creators),
("thumbnail", &event.thumbnail),
("metadata", &event.metadata),
("read_roles", &event.acl.read),
("write_roles", &event.acl.write),
("custom_action_roles", &event.acl.custom_actions),
("tracks", &tracks),
("captions", &captions),
("segments", &segments),
("slide_text", &slide_text),
("slide_text", &event.slide_text),
]).await?;

trace!("Inserted or updated event {} ({})", opencast_id, title);
trace!("Inserted or updated event {} ({})", event.id, event.title);
upserted_events += 1;
}

Expand All @@ -231,41 +213,33 @@ async fn store_in_db(
removed_events += 1;
}

HarvestItem::Series {
id: opencast_id,
title,
description,
updated,
acl,
created,
metadata
} => {
HarvestItem::Series(series) => {
// We first simply upsert the series.
let new_id = upsert(db, "series", "opencast_id", &[
("opencast_id", &opencast_id),
("opencast_id", &series.id),
("state", &SeriesState::Ready),
("title", &title),
("description", &description),
("read_roles", &acl.read),
("write_roles", &acl.write),
("updated", &updated),
("created", &created),
("metadata", &metadata),
("title", &series.title),
("description", &series.description),
("read_roles", &series.acl.read),
("write_roles", &series.acl.write),
("updated", &series.updated),
("created", &series.created),
("metadata", &series.metadata),
]).await?;

// But now we have to fix the foreign key for any events that
// previously referenced this series (via the Opencast UUID)
// but did not have the correct foreign key yet.
let query = "update events set series = $1 where part_of = $2 and series <> $1";
let updated_events = db.execute(query, &[&new_id, &opencast_id]).await?;
let updated_events = db.execute(query, &[&new_id, &series.id]).await?;

trace!("Inserted or updated series {} ({})", opencast_id, title);
trace!("Inserted or updated series {} ({})", series.id, series.title);
if updated_events != 0 {
debug!(
"Fixed foreign series key of {} event(s) after upserting series {} ({})",
updated_events,
opencast_id,
title,
series.id,
series.title,
);
}
upserted_series += 1;
Expand All @@ -283,16 +257,8 @@ async fn store_in_db(
removed_series += 1;
}

HarvestItem::Playlist {
id: opencast_id,
title,
description,
creator,
acl,
entries,
updated,
} => {
let entries = entries.into_iter().filter_map(|e| {
HarvestItem::Playlist(playlist) => {
let entries = playlist.entries.into_iter().filter_map(|e| {
// We do not store entries that we don't know, meaning that
// a resync is required as soon as Tobira learns about
// these new entries. But that's fine as that's likely
Expand All @@ -310,17 +276,17 @@ async fn store_in_db(
}).collect::<Vec<_>>();

upsert(db, "playlists", "opencast_id", &[
("opencast_id", &opencast_id),
("title", &title),
("description", &description),
("creator", &creator),
("read_roles", &acl.read),
("write_roles", &acl.write),
("opencast_id", &playlist.id),
("title", &playlist.title),
("description", &playlist.description),
("creator", &playlist.creator),
("read_roles", &playlist.acl.read),
("write_roles", &playlist.acl.write),
("entries", &entries),
("updated", &updated),
("updated", &playlist.updated),
]).await?;

trace!(opencast_id, title, "Inserted or updated playlist");
trace!(playlist.id, playlist.title, "Inserted or updated playlist");
upserted_playlists += 1;
}

Expand All @@ -332,23 +298,57 @@ async fn store_in_db(
removed_playlists += 1;
}

HarvestItem::Unknown { kind, updated } => {
let known = [
"event",
"event-deleted",
"series",
"series-deleted",
"playlist",
"playlist-deleted",
];

if known.contains(&&*kind) {
warn!("Could not deserialize item in harvest response for \
kind '{kind}' (updated {updated})");
} else {
warn!("Unknown item of kind '{kind}' in harvest response. \
You might need to update Tobira.");
}
HarvestItem::Unknown(raw) => {
(|| {
let Some(obj) = raw.as_object() else {
warn!(?raw, "Unexpected JSON value in harvest response -> ignoring");
return;
};

let Some(kind) = obj.get("kind") else {
warn!(?obj, "Item without 'kind' field in harvest response -> ignoring");
return;
};

let Some(kind) = kind.as_str() else {
warn!(?kind, "'kind' field has unexpected non-string type in \
harvest response -> ignoring");
return;
};

// This tries to deserialize the item again from the `raw`
// JSON in order to print a useful error message.
fn log_deserialize_error<T: DeserializeOwned>(
kind: &str,
v: serde_json::Value,
) {
let err = serde_json::from_value::<T>(v.clone())
.err()
.map(|e| e.to_string())
// This shouldn't happen, at all. But I rather print
// a stupid message than crashing the process.
.unwrap_or("failed to fail deserializing... wat".into());

error!("Could not deserialize '{kind}' harvest item -> ignoring. \
Error: {err}");
debug!("Raw value: {}", serde_json::to_string_pretty(&v).unwrap_or("ERR".into()));
}

match kind {
"event" => log_deserialize_error::<response::Event>("event", raw),
"series" => log_deserialize_error::<response::Series>("series", raw),
"playlist" => log_deserialize_error::<response::Playlist>("playlist", raw),
"event-deleted" | "series-deleted" | "playlist-deleted" => {
warn!("Could not deserialize item in harvest response for \
kind '{kind}'");
}
_ => {
warn!("Unknown item of kind '{kind}' in harvest response. \
You might need to update Tobira.");
}
}
})();

}
}
}
Expand Down
39 changes: 39 additions & 0 deletions backend/src/sync/harvest/playlist-response-oldest.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
{
"includesItemsUntil": 1727883891896,
"hasMore": false,
"items":
[
{
"creator": "Opencast",
"entries": [
{
"contentId": "ID-about-opencast",
"id": 1702,
"type": "E"
},
{
"contentId": "ID-3d-print",
"id": 1703,
"type": "E"
}
],
"kind": "playlist",
"description": "This is a playlist about Opencast",
"id": "1494cd19-cc43-4a2b-af29-b41d98d4e0d9",
"acl": {
"read": [
"ROLE_USER_BOB"
]
},
"title": "Opencast Playlist",
"updated": 1727884054447
},
{
"kind": "playlist-deleted",
"id": "eec06048-703d-40b1-a058-478f8bfc13f4",
"updated": 1727884054247
}
]
}


Loading

0 comments on commit 75da84d

Please sign in to comment.