Skip to content

Commit

Permalink
Add slide segments and extracted text to harvesting and DB, enable pa…
Browse files Browse the repository at this point in the history
…ella slide previews (#1163)

This adds the ocr'd slide texts as well as a list of timestamped frames
to the harvesting sync code and stores them in the DB.
In order the show the slide previews, `paella-slide-plugins` was added
and configured to use the timestamped frames.

Needs opencast/opencast#5757 to work. Once that
is merged, released and used on our test Opencast, the changes can be
tested with fresh uploads. We'll still need some mechanism to apply
segmentation and ocr (and speech-to-text as well) to existing videos.

(Can be reviewed commit by commit, though note that the migration from
the second commit was extended in the third)
  • Loading branch information
LukasKalbertodt authored Jun 3, 2024
2 parents 2f71e30 + 02ec7c7 commit 1563c80
Show file tree
Hide file tree
Showing 22 changed files with 238 additions and 61 deletions.
31 changes: 27 additions & 4 deletions backend/src/api/model/event.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@ use crate::{
api::{
Context, Cursor, Id, Node, NodeValue,
common::NotAllowed,
err::{self, ApiResult, invalid_input},
model::{series::Series, realm::Realm, acl::{Acl, self}},
err::{self, invalid_input, ApiResult},
model::{acl::{self, Acl}, realm::Realm, series::Series},
},
db::{
types::{EventTrack, EventState, Key, ExtraMetadata, EventCaption},
types::{EventCaption, EventSegment, EventState, EventTrack, ExtraMetadata, Key},
util::{impl_from_db, select},
},
prelude::*,
Expand Down Expand Up @@ -52,6 +52,7 @@ pub(crate) struct SyncedEventData {
tracks: Vec<Track>,
thumbnail: Option<String>,
captions: Vec<Caption>,
segments: Vec<Segment>,
}

impl_from_db!(
Expand All @@ -61,7 +62,7 @@ impl_from_db!(
id, state, series, opencast_id, is_live,
title, description, duration, creators, thumbnail, metadata,
created, updated, start_time, end_time,
tracks, captions,
tracks, captions, segments,
read_roles, write_roles,
},
},
Expand Down Expand Up @@ -90,6 +91,10 @@ impl_from_db!(
.into_iter()
.map(Caption::from)
.collect(),
segments: row.segments::<Vec<EventSegment>>()
.into_iter()
.map(Segment::from)
.collect(),
}),
EventState::Waiting => None,
},
Expand All @@ -115,6 +120,12 @@ pub(crate) struct Caption {
lang: Option<String>,
}

#[derive(Debug, GraphQLObject)]
pub(crate) struct Segment {
uri: String,
start_time: f64,
}

impl Node for AuthorizedEvent {
fn id(&self) -> Id {
Id::event(self.key)
Expand Down Expand Up @@ -145,6 +156,9 @@ impl SyncedEventData {
fn captions(&self) -> &[Caption] {
&self.captions
}
fn segments(&self) -> &[Segment] {
&self.segments
}
}

#[graphql_object(Context = Context, impl = NodeValue)]
Expand Down Expand Up @@ -505,6 +519,15 @@ impl From<EventCaption> for Caption {
}
}

impl From<EventSegment> for Segment {
fn from(src: EventSegment) -> Self {
Self {
uri: src.uri,
start_time: src.start_time as f64,
}
}
}

/// Defines the sort order for events.
#[derive(Debug, Clone, Copy, juniper::GraphQLInputObject)]
pub(crate) struct EventSortOrder {
Expand Down
2 changes: 1 addition & 1 deletion backend/src/api/model/search/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ macro_rules! handle_search_result {
// rebuilt yet. We also show "search unavailable" for this case.
Err(MsError::ParseError(e)) if e.is_data() => {
error!("Failed to deserialize search results (missing rebuild after update?): {e} \
(=> replying 'search uavailable')");
(=> replying 'search unavailable')");
return Ok(<$return_type>::SearchUnavailable(SearchUnavailable));
}

Expand Down
3 changes: 2 additions & 1 deletion backend/src/db/migrations.rs
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ pub(crate) async fn unsafe_overwrite_migrations(db: &mut Db) -> Result<()> {
}


// Helper macro to include migrations in the `migations` folder and add them to
// Helper macro to include migrations in the `migrations` folder and add them to
// a map. The `assert!` and `panic!` in there should ideally be compile errors,
// but panics are fine for now.
macro_rules! include_migrations {
Expand Down Expand Up @@ -365,4 +365,5 @@ static MIGRATIONS: Lazy<BTreeMap<u64, Migration>> = include_migrations![
30: "realm-permissions",
31: "series-metadata",
32: "custom-actions",
33: "event-slide-text-and-segments",
];
30 changes: 30 additions & 0 deletions backend/src/db/migrations/33-event-slide-text-and-segments.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
-- Adds two new fields to `events`:
-- `slide_text` which holds a url pointing to the extracted slide text
-- generated in Opencast,
-- and `segments`, which holds a list of frames with their respective
-- starting time, and is needed for slide previews in paella.
-- Also creates the appropriate type for the segments and adjusts
-- the constraints. Basically an adjusted copy of `14-event-captions`.

create type event_segment as (
uri text,
start_time bigint -- in ms
);

alter table events
add column slide_text text,
add column segments event_segment[]
default '{}'
constraint no_null_segment_items check (array_position(segments, null) is null);

alter table events
-- The default above was just for all existing records. New records should
-- require this to be set.
alter column segments drop default,
drop constraint ready_event_has_fields,
add constraint ready_event_has_fields check (state <> 'ready' or (
duration is not null
and tracks is not null and array_length(tracks, 1) > 0
and captions is not null
and segments is not null
));
4 changes: 2 additions & 2 deletions backend/src/db/tests/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ impl TestDb {
) -> Result<Key> {
let sql = "insert into events
(state, opencast_id, title, series, is_live, read_roles, write_roles, created,
updated, metadata, duration, tracks, captions)
updated, metadata, duration, tracks, captions, segments)
values
('ready', $1, $2, $3, false, '{ROLE_ANONYMOUS}', '{ROLE_ANONYMOUS}',
now(), now(), '{}', $4,
Expand All @@ -100,7 +100,7 @@ impl TestDb {
'{1280, 720}',
true
)]::event_track[],
'{}'
'{}', '{}'
)
returning id";

Expand Down
7 changes: 7 additions & 0 deletions backend/src/db/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,13 @@ pub struct EventTrack {
pub resolution: Option<[i32; 2]>,
pub is_master: Option<bool>,
}
/// Represents the `event_segment` type defined in `33-event-slide-text-and-segments.sql`.
#[derive(Debug, FromSql, ToSql)]
#[postgres(name = "event_segment")]
pub struct EventSegment {
pub uri: String,
pub start_time: i64,
}

/// Represents the `event_caption` type defined in `14-event-captions.sql`.
#[derive(Debug, FromSql, ToSql)]
Expand Down
2 changes: 1 addition & 1 deletion backend/src/http/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ pub(crate) async fn serve(
}))
.await;
if let Err(e) = res {
warn!("Error serving connectiion: {e:#}");
warn!("Error serving connection: {e:#}");
}
});
}
Expand Down
9 changes: 7 additions & 2 deletions backend/src/sync/harvest/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use tokio_postgres::types::ToSql;
use crate::{
auth::ROLE_ADMIN,
config::Config,
db::{types::{EventTrack, EventState, SeriesState, EventCaption}, DbConnection},
db::{types::{EventCaption, EventSegment, EventState, EventTrack, SeriesState}, DbConnection},
prelude::*,
};
use super::{status::SyncStatus, OcClient};
Expand All @@ -25,7 +25,7 @@ const INITIAL_BACKOFF: Duration = Duration::from_secs(1);
const MAX_BACKOFF: Duration = Duration::from_secs(5 * 60);


/// Continuiously fetches from the harvesting API and writes new data into our
/// Continuously fetches from the harvesting API and writes new data into our
/// database.
pub(crate) async fn run(
daemon: bool,
Expand Down Expand Up @@ -162,6 +162,8 @@ async fn store_in_db(
is_live,
metadata,
updated,
segments,
slide_text,
} => {
let series_id = match &part_of {
None => None,
Expand All @@ -183,6 +185,7 @@ async fn store_in_db(

let tracks = tracks.into_iter().map(Into::into).collect::<Vec<EventTrack>>();
let captions = captions.into_iter().map(Into::into).collect::<Vec<EventCaption>>();
let segments = segments.into_iter().map(Into::into).collect::<Vec<EventSegment>>();

// We upsert the event data.
upsert(db, "events", "opencast_id", &[
Expand All @@ -206,6 +209,8 @@ async fn store_in_db(
("custom_action_roles", &acl.custom_actions),
("tracks", &tracks),
("captions", &captions),
("segments", &segments),
("slide_text", &slide_text),
]).await?;

trace!("Inserted or updated event {} ({})", opencast_id, title);
Expand Down
20 changes: 19 additions & 1 deletion backend/src/sync/harvest/response.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};

use crate::db::types::{CustomActions, EventCaption, EventTrack, ExtraMetadata};
use crate::db::types::{CustomActions, EventCaption, EventTrack, EventSegment, ExtraMetadata};


/// What the harvesting API returns.
Expand Down Expand Up @@ -41,6 +41,8 @@ pub(crate) enum HarvestItem {
end_time: Option<DateTime<Utc>>,
#[serde(with = "chrono::serde::ts_milliseconds")]
updated: DateTime<Utc>,
segments: Vec<Segment>,
slide_text: Option<String>,
},

#[serde(rename_all = "camelCase")]
Expand Down Expand Up @@ -128,6 +130,22 @@ impl Into<EventCaption> for Caption {
}
}

#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct Segment {
uri: String,
start_time: i64
}

impl Into<EventSegment> for Segment {
fn into(self) -> EventSegment {
EventSegment {
uri: self.uri,
start_time: self.start_time,
}
}
}

#[derive(Debug, Serialize, Deserialize)]
pub(crate) struct Acl {
#[serde(default)]
Expand Down
9 changes: 9 additions & 0 deletions frontend/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions frontend/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
"paella-core": "1.48.1",
"paella-mp4multiquality-plugin": "1.47.1",
"paella-skins": "1.48.0",
"paella-slide-plugins": "^1.41.6",
"paella-user-tracking": "1.42.1",
"paella-zoom-plugin": "1.41.3",
"qrcode.react": "^3.1.0",
Expand Down
1 change: 1 addition & 0 deletions frontend/src/routes/Embed.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ const query = graphql`
thumbnail
tracks { uri flavor mimetype resolution isMaster }
captions { uri lang }
segments { uri startTime }
}
}
}
Expand Down
1 change: 1 addition & 0 deletions frontend/src/routes/Video.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,7 @@ const eventFragment = graphql`
endTime
tracks { uri flavor mimetype resolution isMaster }
captions { uri lang }
segments { uri startTime }
}
series {
id
Expand Down
48 changes: 27 additions & 21 deletions frontend/src/schema.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -109,27 +109,6 @@ type EventConnection {
totalCount: Int!
}

"Some extra information we know about a role."
type RoleInfo {
"""
A user-facing label for this role (group or person). If the label does
not depend on the language (e.g. a name), `{ "_": "Peter" }` is
returned.
"""
label: TranslatedString!
"""
For user roles this is `null`. For groups, it defines a list of other
group roles that this role implies. I.e. a user with this role always
also has these other roles.
"""
implies: [String!]
"""
Is `true` if this role represents a large group. Used to warn users
accidentally giving write access to large groups.
"""
large: Boolean!
}

"A simple realm name: a fixed string."
type PlainRealmName {
name: String!
Expand Down Expand Up @@ -161,6 +140,27 @@ type Series {
events(order: EventSortOrder = {column: "CREATED", direction: "DESCENDING"}): [AuthorizedEvent!]!
}

"Some extra information we know about a role."
type RoleInfo {
"""
A user-facing label for this role (group or person). If the label does
not depend on the language (e.g. a name), `{ "_": "Peter" }` is
returned.
"""
label: TranslatedString!
"""
For user roles this is `null`. For groups, it defines a list of other
group roles that this role implies. I.e. a user with this role always
also has these other roles.
"""
implies: [String!]
"""
Is `true` if this role represents a large group. Used to warn users
accidentally giving write access to large groups.
"""
large: Boolean!
}

union EventSearchOutcome = SearchUnavailable | EventSearchResults

"""
Expand Down Expand Up @@ -189,6 +189,7 @@ type SyncedEventData implements Node {
tracks: [Track!]!
thumbnail: String
captions: [Caption!]!
segments: [Segment!]!
}

"A `Block`: a UI element that belongs to a realm."
Expand Down Expand Up @@ -713,6 +714,11 @@ input NewRealm {
pathSegment: String!
}

type Segment {
uri: String!
startTime: Float!
}

enum SortDirection {
ASCENDING
DESCENDING
Expand Down
Loading

0 comments on commit 1563c80

Please sign in to comment.