@@ -15,12 +15,13 @@ function CardMeta({wordCount, pictureAvailable, source, versioncreated, displayD
{!isEmpty(audio) &&
{!isEmpty(video) &&
{displayDivider &&
diff --git a/assets/images.js b/assets/images.js
new file mode 100644
index 000000000..327d09176
--- /dev/null
+++ b/assets/images.js
@@ -0,0 +1 @@
+import './images/poster_default.jpg';
diff --git a/assets/images/poster_default.jpg b/assets/images/poster_default.jpg
new file mode 100644
index 000000000..c4e4e7bdd
Binary files /dev/null and b/assets/images/poster_default.jpg differ
diff --git a/assets/server.js b/assets/server.js
index 68b6dab53..f5d690710 100644
--- a/assets/server.js
+++ b/assets/server.js
@@ -29,6 +29,12 @@ class Server {
* @param {String} url
* @return {Promise}
+ getCsrfToken() {
+ return this.get('/companies/get-csrf-token')
+ .then(response => response.csrf_token);
+ }
get(url) {
return fetch(url, options({}))
@@ -63,6 +69,19 @@ class Server {
+ postWithCsrfToken(url, data) {
+ return this.getCsrfToken().then(csrfToken => {
+ return fetch(url, options({
+ method: 'POST',
+ headers: {
+ 'Content-Type': 'application/json',
+ 'X-CSRF-Token': csrfToken
+ },
+ body: JSON.stringify(data),
+ })).then(checkStatus);
+ });
+ }
* Make POST request to url in keeps the format of the input
diff --git a/assets/ui/components/ArticleBodyHtml.jsx b/assets/ui/components/ArticleBodyHtml.jsx
index 761801a9d..448fe0b56 100644
--- a/assets/ui/components/ArticleBodyHtml.jsx
+++ b/assets/ui/components/ArticleBodyHtml.jsx
@@ -4,62 +4,83 @@ import {get, memoize} from 'lodash';
import {formatHTML} from 'utils';
import {connect} from 'react-redux';
import {selectCopy} from '../../wire/actions';
+import DOMPurify from 'dompurify';
+const fallbackDefault = '/static/poster_default.jpg';
- * using component to fix iframely loading
- * https://iframely.com/docs/reactjs
- */
class ArticleBodyHtml extends React.PureComponent {
constructor(props) {
+ this.state = {
+ sanitizedHtml: '',
+ };
this.copyClicked = this.copyClicked.bind(this);
this.clickClicked = this.clickClicked.bind(this);
- // use memoize so this function is only called when `body_html` changes
+ this.preventContextMenu = this.preventContextMenu.bind(this);
this.getBodyHTML = memoize(this._getBodyHTML.bind(this));
this.bodyRef = React.createRef();
+ this.players = new Map();
componentDidMount() {
+ this.updateSanitizedHtml();
+ this.setupPlyrPlayers();
document.addEventListener('copy', this.copyClicked);
document.addEventListener('click', this.clickClicked);
+ this.addContextMenuEventListeners();
- clickClicked(event) {
- if (event != null) {
- const target = event.target;
- if (target && target.tagName === 'A' && this.isLinkExternal(target.href)) {
- event.preventDefault();
- event.stopPropagation();
- // security https://mathiasbynens.github.io/rel-noopener/
- var nextWindow = window.open();
- nextWindow.opener = null;
- nextWindow.location.href = target.href;
- }
+ componentDidUpdate(prevProps) {
+ if (prevProps.item !== this.props.item) {
+ this.updateSanitizedHtml();
+ this.loadIframely();
+ this.executeScripts();
+ this.setupPlyrPlayers();
+ this.addContextMenuEventListeners();
- isLinkExternal(href) {
- try {
- const url = new URL(href);
+ componentWillUnmount() {
+ document.removeEventListener('copy', this.copyClicked);
+ document.removeEventListener('click', this.clickClicked);
+ this.removeContextMenuEventListeners();
- // Check if the hosts are different and protocol is http or https
- return url.host !== window.location.host && ['http:', 'https:'].includes(url.protocol);
- } catch (e) {
- // will throw if string is not a valid link
- return false;
- }
+ this.players.forEach(player => player.destroy());
+ this.players.clear();
- componentDidUpdate() {
- this.loadIframely();
- this.executeScripts();
+ updateSanitizedHtml() {
+ const item = this.props.item;
+ const html = this.getBodyHTML(
+ get(item, 'es_highlight.body_html.length', 0) > 0 ?
+ item.es_highlight.body_html[0] :
+ item.body_html
+ );
+ this.sanitizeHtml(html);
+ }
+ sanitizeHtml(html) {
+ if (!html) {
+ this.setState({sanitizedHtml: ''});
+ return;
+ }
+ const sanitizedHtml = DOMPurify.sanitize(html, {
+ ADD_TAGS: ['iframe', 'video', 'audio', 'figure', 'figcaption', 'script', 'twitter-widget', 'fb:like',
+ 'blockquote', 'div'],
+ 'allow', 'allowfullscreen', 'frameborder', 'scrolling', 'src', 'width', 'height',
+ 'data-plyr-config', 'data-plyr', 'aria-label', 'aria-hidden', 'focusable',
+ 'class', 'role', 'tabindex', 'controls', 'download', 'target',
+ 'async', 'defer', 'data-tweet-id', 'data-href',
+ 'data-instgrm-captioned', 'data-instgrm-permalink',
+ 'data-flourish-embed', 'data-src'
+ ],
+ });
+ this.setState({sanitizedHtml});
loadIframely() {
@@ -70,6 +91,7 @@ class ArticleBodyHtml extends React.PureComponent {
executeScripts() {
const tree = this.bodyRef.current;
const loaded = [];
@@ -78,10 +100,6 @@ class ArticleBodyHtml extends React.PureComponent {
- if (window.Plyr != null) {
- window.Plyr.setup('.js-player');
- }
tree.querySelectorAll('script').forEach((s) => {
if (s.hasAttribute('src') && !loaded.includes(s.getAttribute('src'))) {
let url = s.getAttribute('src');
@@ -117,8 +135,9 @@ class ArticleBodyHtml extends React.PureComponent {
- script.onerrror = (error) => {
- throw new URIError('The script ' + error.target.src + 'didn\'t load.');
+ script.onerror = (error) => {
+ console.error('Script load error:', error);
+ throw new URIError('The script ' + error.target.src + ' didn\'t load.');
@@ -126,13 +145,130 @@ class ArticleBodyHtml extends React.PureComponent {
- copyClicked() {
- this.props.reportCopy(this.props.item);
+ setupPlyrPlayers() {
+ const tree = this.bodyRef.current;
+ if (tree == null || window.Plyr == null) {
+ return;
+ }
+ tree.querySelectorAll('.js-player:not(.plyr--setup)').forEach(element => {
+ if (!this.players.has(element)) {
+ const player = new window.Plyr(element, {
+ seekTime: 1,
+ keyboard: {focused: true, global: true},
+ tooltips: {controls: true, seek: true},
+ captions: {active: true, language: 'auto', update: true}
+ });
+ this.players.set(element, player);
+ this.checkVideoLoading(player, element.getAttribute('src'));
+ this.setupMovePlayback(player);
+ }
+ });
- componentWillUnmount() {
- document.removeEventListener('copy', this.copyClicked);
- document.removeEventListener('click', this.clickClicked);
+ setupMovePlayback(player) {
+ const container = player.elements.container;
+ let isScrubbing = false;
+ let wasPaused = false;
+ container.addEventListener('mousedown', startScrubbing);
+ document.addEventListener('mousemove', scrub);
+ document.addEventListener('mouseup', stopScrubbing);
+ function startScrubbing(event) {
+ if (event.target.closest('.plyr__progress')) {
+ isScrubbing = true;
+ wasPaused = player.paused;
+ player.pause();
+ scrub(event);
+ }
+ }
+ function scrub(event) {
+ if (!isScrubbing) return;
+ const progress = player.elements.progress;
+ const rect = progress.getBoundingClientRect();
+ const percent = Math.min(Math.max((event.clientX - rect.left) / rect.width, 0), 1);
+ player.currentTime = percent * player.duration;
+ }
+ function stopScrubbing() {
+ if (isScrubbing) {
+ isScrubbing = false;
+ if (!wasPaused) {
+ player.play();
+ }
+ }
+ }
+ }
+ checkVideoLoading(player, videoSrc) {
+ if (!videoSrc || !videoSrc.startsWith('/assets/')) {
+ return;
+ }
+ const loadHandler = () => {
+ const checkVideoContent = () => {
+ if (player.media.videoWidth > 0 && player.media.videoHeight > 0) {
+ const canvas = document.createElement('canvas');
+ canvas.width = player.media.videoWidth;
+ canvas.height = player.media.videoHeight;
+ const ctx = canvas.getContext('2d');
+ ctx.drawImage(player.media, 0, 0, canvas.width, canvas.height);
+ const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
+ const data = imageData.data;
+ // loop for none blank pixel
+ let stepSize = 10; // Adjust the step size
+ for (let i = 0; i < data.length; i += stepSize * 4) {
+ if (data[i] > 0 || data[i + 1] > 0 || data[i + 2] > 0) {
+ console.warn('Pixel content detected, poster not needed');
+ return true;
+ }
+ }
+ }
+ return false;
+ };
+ const attemptContentCheck = () => {
+ if (checkVideoContent()) {
+ player.poster = null;
+ console.warn('Pixel content detected, poster removed');
+ return true;
+ }
+ return false;
+ };
+ let attemptCount = 0;
+ const maxAttempts = 1;
+ const checkInterval = setInterval(() => {
+ if (attemptContentCheck() || attemptCount >= maxAttempts) {
+ clearInterval(checkInterval);
+ player.off('loadeddata', loadHandler);
+ if (attemptCount >= maxAttempts) {
+ console.warn('Setting fallback poster');
+ player.poster = fallbackDefault;
+ }
+ }
+ attemptCount++;
+ }, 500);
+ };
+ player.on('error', (error) => {
+ console.error('Error details and location:', {
+ message: error.message,
+ code: error.code,
+ type: error.type,
+ target: error.target,
+ currentTarget: error.currentTarget,
+ originalTarget: error.originalTarget,
+ error: error.error
+ });
+ player.poster = fallbackDefault;
+ });
+ player.on('loadeddata', loadHandler);
_getBodyHTML(bodyHtml) {
@@ -141,17 +277,9 @@ class ArticleBodyHtml extends React.PureComponent {
- /**
- * Update Image Embeds to use the Web APIs Assets endpoint
- *
- * @param html - The `body_html` value (could also be the ES Highlight version)
- * @returns {string}
- * @private
- */
_updateImageEmbedSources(html) {
const item = this.props.item;
- // Get the list of Original Rendition IDs for all Image Associations
const imageEmbedOriginalIds = Object
.keys(item.associations || {})
.filter((key) => key.startsWith('editor_'))
@@ -159,35 +287,27 @@ class ArticleBodyHtml extends React.PureComponent {
.filter((value) => value);
if (!imageEmbedOriginalIds.length) {
- // This item has no Image Embeds
- // return the supplied html as-is
return html;
- // Create a DOM node tree from the supplied html
- // We can then efficiently find and update the image sources
const container = document.createElement('div');
let imageSourcesUpdated = false;
container.innerHTML = html;
- .querySelectorAll('img')
+ .querySelectorAll('img,video,audio')
.forEach((imageTag) => {
- // Using the tag's `src` attribute, find the Original Rendition's ID
const originalMediaId = imageEmbedOriginalIds.find((mediaId) => (
!imageTag.src.startsWith('/assets/') &&
if (originalMediaId) {
- // We now have the Original Rendition's ID
- // Use that to update the `src` attribute to use Newshub's Web API
imageSourcesUpdated = true;
imageTag.src = `/assets/${originalMediaId}`;
- // Find all Audio and Video tags and mark them up for the player
container.querySelectorAll('video, audio')
.forEach((vTag) => {
@@ -195,7 +315,6 @@ class ArticleBodyHtml extends React.PureComponent {
vTag.setAttribute('data-plyr-config', '{"controls": ["play-large", "play",' +
'"progress", "volume", "mute", "rewind", "fast-forward", "current-time",' +
'"captions", "restart", "duration"]}');
} else {
vTag.setAttribute('data-plyr-config', '{"controls": ["play-large", "play",' +
'"progress", "volume", "mute", "rewind", "fast-forward", "current-time",' +
@@ -203,35 +322,77 @@ class ArticleBodyHtml extends React.PureComponent {
'"' + vTag.getAttribute('src') + '?item_id=' + item._id + '"' +
imageSourcesUpdated = true;
- // If Image tags were not updated, then return the supplied html as-is
return imageSourcesUpdated ?
container.innerHTML :
- render() {
- const item = this.props.item;
- const html = this.getBodyHTML(
- get(item, 'es_highlight.body_html.length', 0) > 0 ?
- item.es_highlight.body_html[0] :
- item.body_html
- );
+ clickClicked(event) {
+ if (event != null) {
+ const target = event.target;
+ if (target && target.tagName === 'A' && this.isLinkExternal(target.href)) {
+ event.preventDefault();
+ event.stopPropagation();
+ const nextWindow = window.open(target.href, '_blank', 'noopener');
- if (!html) {
+ if (nextWindow) {
+ nextWindow.opener = null;
+ }
+ }
+ }
+ }
+ isLinkExternal(href) {
+ try {
+ const url = new URL(href);
+ return url.host !== window.location.host && ['http:', 'https:'].includes(url.protocol);
+ } catch (e) {
+ return false;
+ }
+ }
+ copyClicked() {
+ this.props.reportCopy(this.props.item);
+ }
+ addContextMenuEventListeners() {
+ const tree = this.bodyRef.current;
+ if (tree) {
+ tree.querySelectorAll('[data-disable-download="true"]').forEach((element) => {
+ element.addEventListener('contextmenu', this.preventContextMenu);
+ });
+ }
+ }
+ removeContextMenuEventListeners() {
+ const tree = this.bodyRef.current;
+ if (tree) {
+ tree.querySelectorAll('[data-disable-download="true"]').forEach((element) => {
+ element.removeEventListener('contextmenu', this.preventContextMenu);
+ });
+ }
+ }
+ preventContextMenu(event) {
+ event.preventDefault();
+ }
+ render() {
+ if (!this.state.sanitizedHtml) {
return null;
return (
@@ -252,4 +413,4 @@ const mapDispatchToProps = (dispatch) => ({
reportCopy: (item) => dispatch(selectCopy(item))
-export default connect(null, mapDispatchToProps)(ArticleBodyHtml);
\ No newline at end of file
+export default connect(null, mapDispatchToProps)(ArticleBodyHtml);
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 541fde948..a1de46a22 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -3,7 +3,7 @@
diff --git a/features/news_api_auth.feature b/features/news_api_auth.feature
index ccef347d8..4403bf4e9 100644
--- a/features/news_api_auth.feature
+++ b/features/news_api_auth.feature
@@ -22,7 +22,7 @@ Feature: News API Authorization
"company" : "#companies._id#",
"enabled" : true
- """+
+ """
When we save API token
Given "products"
@@ -118,7 +118,7 @@ Feature: News API Authorization
"product_type": "news_api"
- When we set header "X-Forwarded-For" to value ","
+ When we set header "X-Forwarded-For" to value ","
When we get "news/search?q=fish&include_fields=body_html"
Then we get list with 1 items
diff --git a/features/news_api_syndicate.feature b/features/news_api_syndicate.feature
new file mode 100755
index 000000000..14ca27283
--- /dev/null
+++ b/features/news_api_syndicate.feature
@@ -0,0 +1,162 @@
+Feature: News API Syndicate ATOM Search
+ Background: Initial setup
+ Given "companies"
+ """
+ [{"name": "Test Company", "is_enabled" : true}]
+ """
+ Given "news_api_tokens"
+ """
+ [{"company" : "#companies._id#", "enabled" : true}]
+ """
+ When we save API token
+ Scenario: Test atom of syndicate request response restricted by product
+ Given "items"
+ """
+ [{"body_html": "Once upon a time there was a fish who could swim", "headline": "headline 1",
+ "firstpublished": "#DATE-1#", "versioncreated": "#DATE#"},
+ {"body_html": "Once upon a time there was a aardvark that could not swim", "headline": "headline 2",
+ "firstpublished": "#DATE-1#", "versioncreated": "#DATE#"}]
+ """
+ Given "products"
+ """
+ [{"name": "A fishy Product",
+ "decsription": "a product for those interested in fish",
+ "companies" : [
+ "#companies._id#"
+ ],
+ "query": "fish",
+ "product_type": "news_api"
+ }]
+ """
+ When we get "syndicate?formatter=atom"
+ Then we get OK response
+ Then we "get" "
" in syndicate xml response
+ Then we "don't get" "
" in syndicate xml response
+ Scenario: test syndicate atom request search q
+ Given "items"
+ """
+ [{"body_html": "
Once upon a time there was a monkey who could swim
", "headline": "headline 1",
+ "byline": "S Smith", "pubstatus": "usable", "service" : [{"name" : "Australian General News", "code" : "a"}],
+ "description_text": "summary",
+ "associations" : {
+ "featuremedia" : {
+ "mimetype" : "image/jpeg",
+ "description_text" : "Deputy Prime Minister Michael McCormack during Question Time",
+ "version" : "1",
+ "byline" : "Mick Tsikas/AAP PHOTOS",
+ "body_text" : "QUESTION TIME ALT",
+ "renditions" : {
+ "16-9" : {
+ "href" : "/assets/5fc5dce16369ab07be3325fa",
+ "height" : 720,
+ "width" : 1280,
+ "media" : "5fc5dce16369ab07be3325fa",
+ "poi" : {
+ "x" : 453,
+ "y" : 335
+ },
+ "mimetype" : "image/jpeg"
+ }
+ }
+ }},
+ "firstpublished": "#DATE-1#", "versioncreated": "#DATE#"}]
+ """
+ When we get "syndicate?formatter=atom&q=monkey"
+ Then we get OK response
+ Then we "get" "
" in syndicate xml response
+ Then we "get" "
Mick Tsikas/AAP PHOTOS" in syndicate xml response
+ Scenario: test atom request with embedded image
+ Given "products"
+ """
+ [{"name": "A fishy Product",
+ "decsription": "a product for those interested in fish",
+ "companies" : [
+ "#companies._id#"
+ ],
+ "query": "fish",
+ "product_type": "news_api"
+ },
+ {"name": "A pic product",
+ "decsription": "pic product",
+ "companies" : [
+ "#companies._id#"
+ ],
+ "query": "",
+ "sd_product_id": "1",
+ "product_type": "news_api"
+ }]
+ """
+ Given "items"
+ """
+ [{"body_html": "
Once upon a time there was a fish who could swim
+ "headline": "headline 1",
+ "byline": "S Smith", "pubstatus": "usable", "service" : [{"name" : "Australian General News", "code" : "a"}],
+ "description_text": "summary",
+ "associations" : {
+ "editor_19" : {
+ "mimetype" : "image/jpeg",
+ "description_text" : "Deputy Prime Minister Michael McCormack during Question Time",
+ "version" : "1",
+ "byline" : "Mick Tsikas/AAP PHOTOS",
+ "body_text" : "QUESTION TIME ALT",
+ "products": [{"code": "1"}],
+ "renditions" : {
+ "16-9" : {
+ "href" : "/assets/5fc5dce16369ab07be3325fa",
+ "height" : 720,
+ "width" : 1280,
+ "media" : "5fc5dce16369ab07be3325fa",
+ "poi" : {
+ "x" : 453,
+ "y" : 335
+ },
+ "mimetype" : "image/jpeg"
+ }
+ }
+ }},
+ "firstpublished": "#DATE-1#", "versioncreated": "#DATE#"}]
+ """
+ When we get "syndicate?formatter=atom"
+ Then we get OK response
+ Then we "get" "
" in syndicate xml response
+ Then we "get" "5fc5dce16369ab07be3325fa" in atom xml response
+ Then we "get" "src="http://" in atom xml response
+ Scenario: Atom request response restricted by featured image product
+ Given "items"
+ """
+ [{"body_html": "Once upon a time there was a fish who could swim", "headline": "headline 1",
+ "firstpublished": "#DATE-1#", "versioncreated": "#DATE#",
+ "associations": {"featuremedia": {"products": [{"code": "1234"}], "renditions": {"original": {}} }}},
+ {"body_html": "Once upon a time there was a aardvark that could not swim", "headline": "headline 2",
+ "firstpublished": "#DATE-1#", "versioncreated": "#DATE#",
+ "associations": {"featuremedia": {"products": [{"code": "4321"}], "renditions": {"original": {}} }}}]
+ """
+ Given "products"
+ """
+ [{"name": "A fishy Product",
+ "decsription": "a product for those interested in fish",
+ "companies" : [
+ "#companies._id#"
+ ],
+ "query": "Once upon a time",
+ "product_type": "news_api"
+ },
+ {"name": "A fishy superdesk product",
+ "description": "a superdesk product restricting images in the atom feed",
+ "companies" : [
+ "#companies._id#"
+ ],
+ "sd_product_id": "1234",
+ "product_type": "news_api"
+ }
+ ]
+ """
+ When we get "syndicate?formatter=atom"
+ Then we get OK response
+ Then we "get" "
" in syndicate xml response
+ Then we "don't get" "
" in syndicate xml response
diff --git a/features/steps/steps.py b/features/steps/steps.py
index 9bd4f6aeb..60342b2a8 100644
--- a/features/steps/steps.py
+++ b/features/steps/steps.py
@@ -17,6 +17,7 @@
from wooper.general import (
+import logging
@when('we save API token')
@@ -74,3 +75,21 @@ def we_get_text_in_atom_xml_response(context, get, text):
assert (text in get_body(context.response))
assert (text not in get_body(context.response))
+@then('we "{get}" "{text}" in syndicate xml response')
+def we_get_text_in_syndicate_xml_response(context, get, text):
+ with context.app.test_request_context(context.app.config['URL_PREFIX']):
+ response_body = get_body(context.response)
+ logging.info("Response body: %s", response_body)
+ assert (isinstance(get_body(context.response), str))
+ try:
+ tree = lxml.etree.fromstring(response_body.encode('utf-8'))
+ assert '{http://www.w3.org/2005/Atom}feed' == tree.tag
+ if get == 'get':
+ assert (text in response_body)
+ else:
+ assert (text not in response_body)
+ except lxml.etree.XMLSyntaxError as e:
+ logging.error("XML parsing error: %s", e)
+ raise AssertionError("Response is not valid XML")
diff --git a/karma.conf.js b/karma.conf.js
index d17325f2e..d8e96e201 100644
--- a/karma.conf.js
+++ b/karma.conf.js
@@ -13,6 +13,7 @@ module.exports = function(config) {
'assets/tests.js': ['webpack', 'sourcemap'],
webpack: {
module: webpackConfig.module,
resolve: webpackConfig.resolve,
diff --git a/newsroom/companies/companies.py b/newsroom/companies/companies.py
index 270744a79..71f48ec50 100644
--- a/newsroom/companies/companies.py
+++ b/newsroom/companies/companies.py
@@ -47,13 +47,66 @@ class CompaniesResource(newsroom.Resource):
'archive_access': {
'type': 'boolean',
+ 'company_type': {
+ 'type': 'string',
+ 'nullable': True,
+ },
'events_only': {
'type': 'boolean',
'default': False,
- 'company_type': {
- 'type': 'string',
- 'nullable': True,
+ 'embedded': {
+ 'type': 'dict',
+ 'schema': {
+ 'video_display': {
+ 'type': 'boolean',
+ 'default': False,
+ },
+ 'audio_display': {
+ 'type': 'boolean',
+ 'default': False,
+ },
+ 'social_media_display': {
+ 'type': 'boolean',
+ 'default': False,
+ },
+ 'images_display': {
+ 'type': 'boolean',
+ 'default': False,
+ },
+ 'sdpermit_display': {
+ 'type': 'boolean',
+ 'default': False,
+ },
+ 'all_display': {
+ 'type': 'boolean',
+ 'default': False,
+ },
+ 'social_media_download': {
+ 'type': 'boolean',
+ 'default': False,
+ },
+ 'video_download': {
+ 'type': 'boolean',
+ 'default': False,
+ },
+ 'audio_download': {
+ 'type': 'boolean',
+ 'default': False,
+ },
+ 'images_download': {
+ 'type': 'boolean',
+ 'default': False,
+ },
+ 'sdpermit_download': {
+ 'type': 'boolean',
+ 'default': False,
+ },
+ 'all_download': {
+ 'type': 'boolean',
+ 'default': False,
+ }
+ }
'account_manager': {
'type': 'string'
diff --git a/newsroom/companies/views.py b/newsroom/companies/views.py
index b80a78e4a..2144b7b7e 100644
--- a/newsroom/companies/views.py
+++ b/newsroom/companies/views.py
@@ -3,7 +3,7 @@
import flask
from bson import ObjectId
-from flask import jsonify, current_app as app
+from flask import current_app as app
from flask_babel import gettext
from superdesk import get_resource_service
from werkzeug.exceptions import NotFound
@@ -13,6 +13,8 @@
from newsroom.utils import query_resource, find_one, get_entity_or_404, get_json_or_400, set_original_creator, \
import ipaddress
+from flask import request, jsonify, current_app, session
+import secrets
def get_company_types_options(company_types):
@@ -154,14 +156,45 @@ def update_products(updates, company_id):
def update_company(data, _id):
updates = {k: v for k, v in data.items() if k in ('sections', 'archive_access', 'events_only')}
+ embedded_fields = [
+ 'video_display', 'audio_display', 'social_media_display', 'images_display', 'sdpermit_display', 'all_display',
+ 'social_media_download', 'video_download', 'audio_download', 'images_download', 'sdpermit_download',
+ 'all_download'
+ ]
+ if 'embedded' in data:
+ embedded_updates = {k: v for k, v in data['embedded'].items() if k in embedded_fields}
+ if embedded_updates:
+ updates['embedded'] = embedded_updates
get_resource_service('companies').patch(_id, updates=updates)
@blueprint.route('/companies/<_id>/permissions', methods=['POST'])
def save_company_permissions(_id):
+ csrf_token = request.headers.get('X-CSRF-Token')
+ expected_csrf_token = session.pop('csrf_token')
orig = get_entity_or_404(_id, 'companies')
data = get_json_or_400()
- update_products(data['products'], _id)
- update_company(data, orig['_id'])
- return jsonify(), 200
+ if not csrf_token or csrf_token != expected_csrf_token:
+ current_app.logger.error("Permisson CSRF validation failed:")
+ return jsonify({"error": "Permisson CSRF token validation failed"}), 403
+ try:
+ update_products(data['products'], _id)
+ update_company(data, orig['_id'])
+ except Exception as e:
+ current_app.logger.error(f"Error updating company permissions: {str(e)}")
+ return jsonify({"error": "An error occurred while updating permissions"}), 500
+ return jsonify({"message": "Permissions updated successfully"}), 200
+@blueprint.route('/companies/get-csrf-token', methods=['GET'])
+def get_csrf_token():
+ csrf_token = secrets.token_hex(32)
+ session['csrf_token'] = csrf_token
+ return jsonify({'csrf_token': csrf_token})
diff --git a/newsroom/news_api/news/atom/atom.py b/newsroom/news_api/news/atom/atom.py
deleted file mode 100644
index ebc49dad6..000000000
--- a/newsroom/news_api/news/atom/atom.py
+++ /dev/null
@@ -1,157 +0,0 @@
-import superdesk
-import flask
-from eve.methods.get import get_internal
-from lxml import etree
-from lxml.etree import SubElement
-from superdesk.utc import utcnow
-from flask import current_app as app, g
-import datetime
-import logging
-from newsroom.news_api.utils import check_featuremedia_association_permission, update_embed_urls
-from newsroom.wire.formatters.utils import remove_unpermissioned_embeds
-blueprint = superdesk.Blueprint('atom', __name__)
-logger = logging.getLogger(__name__)
-def init_app(app):
- superdesk.blueprint(blueprint, app)
-@blueprint.route('/atom', methods=['GET'])
', methods=['GET'])
-def get_atom(token=None):
- def _format_date(date):
- iso8601 = date.isoformat()
- if date.tzinfo:
- return iso8601
- return iso8601 + 'Z'
- def _format_update_date(date):
- DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S"
- return date.strftime(DATETIME_FORMAT) + 'Z'
- auth = app.auth
- if not auth.authorized([], None, flask.request.method):
- if token:
- if not auth.check_auth(token, allowed_roles=None, resource=None, method='GET'):
- return auth.authenticate()
- else:
- return auth.authenticate()
- XML_ROOT = ''
- _message_nsmap = {None: 'http://www.w3.org/2005/Atom', 'dcterms': 'http://purl.org/dc/terms/',
- 'media': 'http://search.yahoo.com/mrss/',
- 'mi': 'http://schemas.ingestion.microsoft.com/common/'}
-# feed = etree.Element('feed', attrib={'lang': 'en-us'}, nsmap=_message_nsmap)
- feed = etree.Element('feed', nsmap=_message_nsmap)
- SubElement(feed, 'title').text = etree.CDATA('{} Atom Feed'.format(app.config['SITE_NAME']))
- SubElement(feed, 'updated').text = _format_update_date(utcnow())
- SubElement(SubElement(feed, 'author'), 'name').text = app.config['SITE_NAME']
- SubElement(feed, 'id').text = flask.url_for('atom.get_atom', _external=True)
- SubElement(feed, 'link', attrib={'href': flask.url_for('atom.get_atom', _external=True), 'rel': 'self'})
- response = get_internal('news/search')
-# TODO allow products to be passed as arguments
-# req = ParsedRequest()
-# req.args = ImmutableMultiDict([('include_fields', 'associations')])
-# response = superdesk.get_resource_service('news/search').get(req=req, lookup=None)
- # for item in response.docs:
- for item in response[0].get('_items'):
- try:
- complete_item = superdesk.get_resource_service('items').find_one(req=None, _id=item.get('_id'))
- # If featuremedia is not allowed for the company don't add the item
- if ((complete_item.get('associations') or {}).get('featuremedia') or {}).get('renditions'):
- if not check_featuremedia_association_permission(complete_item):
- continue
- remove_unpermissioned_embeds(complete_item, g.user, 'news_api')
- entry = SubElement(feed, 'entry')
- # If the item has any parents we use the id of the first, this should be constant throught the update
- # history
- if complete_item.get('ancestors') and len(complete_item.get('ancestors')):
- SubElement(entry, 'id').text = complete_item.get('ancestors')[0]
- else:
- SubElement(entry, 'id').text = complete_item.get('_id')
- SubElement(entry, 'title').text = etree.CDATA(complete_item.get('headline'))
- SubElement(entry, 'published').text = _format_date(complete_item.get('firstpublished'))
- SubElement(entry, 'updated').text = _format_update_date(complete_item.get('versioncreated'))
- if token:
- SubElement(entry, 'link', attrib={'rel': 'self', 'href': flask.url_for('news/item.get_item',
- item_id=item.get('_id'),
- format='TextFormatter',
- token=token,
- _external=True)})
- else:
- SubElement(entry, 'link', attrib={'rel': 'self', 'href': flask.url_for('news/item.get_item',
- item_id=item.get('_id'),
- format='TextFormatter',
- _external=True)})
- if complete_item.get('byline'):
- name = complete_item.get('byline')
- if complete_item.get('source') and not app.config['COPYRIGHT_HOLDER'].lower() == complete_item.get(
- 'source', '').lower():
- name = name + " - " + complete_item.get('source')
- SubElement(SubElement(entry, 'author'), 'name').text = name
- else:
- SubElement(SubElement(entry, 'author'), 'name').text = complete_item.get('source') if complete_item.get(
- 'source') else app.config['COPYRIGHT_HOLDER']
- SubElement(entry, 'rights').text = complete_item.get('source', '')
- if complete_item.get('pubstatus') == 'usable':
- SubElement(entry, etree.QName(_message_nsmap.get('dcterms'), 'valid')).text = \
- 'start={}; end={}; scheme=W3C-DTF'.format(_format_date(utcnow()),
- _format_date(utcnow() + datetime.timedelta(days=30)))
- else:
- # in effect a kill set the end date into the past
- SubElement(entry, etree.QName(_message_nsmap.get('dcterms'), 'valid')).text = \
- 'start={}; end={}; scheme=W3C-DTF'.format(_format_date(utcnow()),
- _format_date(utcnow() - datetime.timedelta(days=30)))
- categories = [{'name': s.get('name')} for s in complete_item.get('service', [])]
- for category in categories:
- SubElement(entry, 'category', attrib={'term': category.get('name')})
- SubElement(entry, 'summary').text = etree.CDATA(complete_item.get('description_text', ''))
- update_embed_urls(complete_item, token)
- SubElement(entry, 'content', attrib={'type': 'html'}).text = etree.CDATA(complete_item.get('body_html', ''))
- if ((complete_item.get('associations') or {}).get('featuremedia') or {}).get('renditions'):
- image = ((complete_item.get('associations') or {}).get('featuremedia') or {}).get('renditions').get(
- "16-9")
- metadata = ((complete_item.get('associations') or {}).get('featuremedia') or {})
- url = flask.url_for('assets.get_item', _external=True, asset_id=image.get('media'),
- token=token) if token else flask.url_for(
- 'assets.get_item', _external=True, asset_id=image.get('media'))
- media = SubElement(entry, etree.QName(_message_nsmap.get('media'), 'content'),
- attrib={'url': url, 'type': image.get('mimetype'), 'medium': 'image'})
- SubElement(media, etree.QName(_message_nsmap.get('media'), 'credit')).text = metadata.get('byline')
- SubElement(media, etree.QName(_message_nsmap.get('media'), 'title')).text = metadata.get(
- 'description_text')
- SubElement(media, etree.QName(_message_nsmap.get('media'), 'text')).text = metadata.get('body_text')
- focr = SubElement(media, etree.QName(_message_nsmap.get('mi'), 'focalRegion'))
- SubElement(focr, etree.QName(_message_nsmap.get('mi'), 'x1')).text = str(image.get('poi').get('x'))
- SubElement(focr, etree.QName(_message_nsmap.get('mi'), 'x2')).text = str(image.get('poi').get('x'))
- SubElement(focr, etree.QName(_message_nsmap.get('mi'), 'y1')).text = str(image.get('poi').get('y'))
- SubElement(focr, etree.QName(_message_nsmap.get('mi'), 'y2')).text = str(image.get('poi').get('y'))
- except Exception as ex:
- logger.exception('processing {} - {}'.format(item.get('_id'), ex))
- return flask.Response(XML_ROOT + etree.tostring(feed, method='xml', pretty_print=True).decode('utf-8'),
- mimetype='application/atom+xml')
diff --git a/newsroom/news_api/news/rss/rss.py b/newsroom/news_api/news/rss/rss.py
deleted file mode 100644
index af6a733f6..000000000
--- a/newsroom/news_api/news/rss/rss.py
+++ /dev/null
@@ -1,164 +0,0 @@
-import superdesk
-import flask
-from eve.methods.get import get_internal
-from lxml import etree
-from lxml.etree import SubElement
-from superdesk.utc import utcnow
-from flask import current_app as app, g
-from email import utils
-import datetime
-import logging
-from newsroom.news_api.utils import check_featuremedia_association_permission, update_embed_urls
-from newsroom.wire.formatters.utils import remove_unpermissioned_embeds
-blueprint = superdesk.Blueprint('rss', __name__)
-logger = logging.getLogger(__name__)
-def init_app(app):
- superdesk.blueprint(blueprint, app)
-@blueprint.route('/rss', methods=['GET'])
-@blueprint.route('/rss/', methods=['GET'])
-def get_rss(token=None):
- def _format_date(date):
- iso8601 = date.isoformat()
- if date.tzinfo:
- return iso8601
- return iso8601 + 'Z'
- def _format_date_2(date):
- DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S"
- return date.strftime(DATETIME_FORMAT) + 'Z'
- def _format_date_3(date):
- return utils.format_datetime(date)
- auth = app.auth
- if not auth.authorized([], None, flask.request.method):
- if token:
- if not auth.check_auth(token, allowed_roles=None, resource=None, method='GET'):
- return auth.authenticate()
- else:
- return auth.authenticate()
- XML_ROOT = ''
- _message_nsmap = {'dcterms': 'http://purl.org/dc/terms/', 'media': 'http://search.yahoo.com/mrss/',
- 'dc': 'http://purl.org/dc/elements/1.1/', 'mi': 'http://schemas.ingestion.microsoft.com/common/',
- 'content': 'http://purl.org/rss/1.0/modules/content/'}
-# feed = etree.Element('feed', attrib={'lang': 'en-us'}, nsmap=_message_nsmap)
- feed = etree.Element('rss', attrib={'version': '2.0'}, nsmap=_message_nsmap)
- channel = SubElement(feed, 'channel')
- SubElement(channel, 'title').text = '{} RSS Feed'.format(app.config['SITE_NAME'])
- SubElement(channel, 'description').text = '{} RSS Feed'.format(app.config['SITE_NAME'])
- SubElement(channel, 'link').text = flask.url_for('rss.get_rss', _external=True)
- response = get_internal('news/search')
-# req = ParsedRequest()
-# req.args = {'include_fields': 'abstract'}
-# response = superdesk.get_resource_service('news/search').get(req=req, lookup=None)
- for item in response[0].get('_items'):
- try:
- complete_item = superdesk.get_resource_service('items').find_one(req=None, _id=item.get('_id'))
- # If featuremedia is not allowed for the company don't add the item
- if ((complete_item.get('associations') or {}).get('featuremedia') or {}).get('renditions'):
- if not check_featuremedia_association_permission(complete_item):
- continue
- remove_unpermissioned_embeds(complete_item, g.user, 'news_api')
- entry = SubElement(channel, 'item')
- # If the item has any parents we use the id of the first, this should be constant throught the update
- # history
- if complete_item.get('ancestors') and len(complete_item.get('ancestors')):
- SubElement(entry, 'guid').text = complete_item.get('ancestors')[0]
- else:
- SubElement(entry, 'guid').text = complete_item.get('_id')
- SubElement(entry, 'title').text = etree.CDATA(complete_item.get('headline'))
- SubElement(entry, 'pubDate').text = _format_date_3(complete_item.get('firstpublished'))
- SubElement(entry, etree.QName(_message_nsmap.get('dcterms'), 'modified')).text = _format_date_2(
- complete_item.get('versioncreated'))
- if token:
- SubElement(entry, 'link').text = flask.url_for('news/item.get_item',
- item_id=item.get('_id'),
- format='TextFormatter',
- token=token,
- _external=True)
- else:
- SubElement(entry, 'link').text = flask.url_for('news/item.get_item',
- item_id=item.get('_id'),
- format='TextFormatter',
- _external=True)
- if complete_item.get('byline'):
- name = complete_item.get('byline')
- if complete_item.get('source') and not app.config['COPYRIGHT_HOLDER'].lower() == complete_item.get(
- 'source', '').lower():
- name = name + " - " + complete_item.get('source')
- SubElement(entry, etree.QName(_message_nsmap.get('dc'), 'creator')).text = name
- else:
- SubElement(entry, etree.QName(_message_nsmap.get('dc'), 'creator')).text = \
- complete_item.get('source') if complete_item.get('source') else app.config['COPYRIGHT_HOLDER']
- SubElement(entry, 'source', attrib={'url': flask.url_for('rss.get_rss', _external=True)}).text = \
- complete_item.get('source', '')
- if complete_item.get('pubstatus') == 'usable':
- SubElement(entry, etree.QName(_message_nsmap.get('dcterms'), 'valid')).text = \
- 'start={}; end={}; scheme=W3C-DTF'.format(_format_date(utcnow()),
- _format_date(utcnow() + datetime.timedelta(days=30)))
- else:
- # in effect a kill set the end date into the past
- SubElement(entry, etree.QName(_message_nsmap.get('dcterms'), 'valid')).text = \
- 'start={}; end={}; scheme=W3C-DTF'.format(_format_date(utcnow()),
- _format_date(utcnow() - datetime.timedelta(days=30)))
- categories = [{'name': s.get('name')} for s in complete_item.get('service', [])] \
- + [{'name': s.get('name')} for s in complete_item.get('subject', [])] \
- + [{'name': s.get('name')} for s in complete_item.get('place', [])] \
- + [{'name': k} for k in complete_item.get('keywords', [])]
- for category in categories:
- SubElement(entry, 'category').text = category.get('name')
- SubElement(entry, 'description').text = etree.CDATA(complete_item.get('description_text', ''))
- update_embed_urls(complete_item, token)
- SubElement(entry, etree.QName(_message_nsmap.get('content'), 'encoded')).text = etree.CDATA(
- complete_item.get('body_html', ''))
- if ((complete_item.get('associations') or {}).get('featuremedia') or {}).get('renditions'):
- image = ((complete_item.get('associations') or {}).get('featuremedia') or {}).get('renditions').get(
- "16-9")
- metadata = ((complete_item.get('associations') or {}).get('featuremedia') or {})
- url = flask.url_for('assets.get_item', _external=True, asset_id=image.get('media'),
- token=token) if token else flask.url_for(
- 'assets.get_item', _external=True, asset_id=image.get('media'))
- media = SubElement(entry, etree.QName(_message_nsmap.get('media'), 'content'),
- attrib={'url': url, 'type': image.get('mimetype'), 'medium': 'image'})
- SubElement(media, etree.QName(_message_nsmap.get('media'), 'credit')).text = metadata.get('byline')
- SubElement(media, etree.QName(_message_nsmap.get('media'), 'title')).text = metadata.get(
- 'description_text')
- SubElement(media, etree.QName(_message_nsmap.get('media'), 'text')).text = metadata.get('body_text')
- focr = SubElement(media, etree.QName(_message_nsmap.get('mi'), 'focalRegion'))
- SubElement(focr, etree.QName(_message_nsmap.get('mi'), 'x1')).text = str(image.get('poi').get('x'))
- SubElement(focr, etree.QName(_message_nsmap.get('mi'), 'x2')).text = str(image.get('poi').get('x'))
- SubElement(focr, etree.QName(_message_nsmap.get('mi'), 'y1')).text = str(image.get('poi').get('y'))
- SubElement(focr, etree.QName(_message_nsmap.get('mi'), 'y2')).text = str(image.get('poi').get('y'))
- except Exception as ex:
- logger.exception('processing {} - {}'.format(item.get('_id'), ex))
- return flask.Response(XML_ROOT + etree.tostring(feed, method='xml', pretty_print=True).decode('utf-8'),
- mimetype='application/rss+xml')
diff --git a/newsroom/news_api/news/syndicate/__init__.py b/newsroom/news_api/news/syndicate/__init__.py
new file mode 100644
index 000000000..4cf192da9
--- /dev/null
+++ b/newsroom/news_api/news/syndicate/__init__.py
@@ -0,0 +1,44 @@
+import superdesk
+import logging
+from flask import request, make_response, jsonify
+from eve.methods.get import get_internal
+from .error_handlers import process_error_response
+from .auth import authenticate
+from .resource import NewsAPISyndicateResource
+from .service import NewsAPISyndicateService
+from werkzeug.routing import BaseConverter
+syndicate_blueprint = superdesk.Blueprint('syndicate', __name__)
+logger = logging.getLogger(__name__)
+class RegExConverter(BaseConverter):
+ def __init__(self, map, regex='[^/]+'):
+ super().__init__(map)
+ self.regex = regex
+@syndicate_blueprint.route('/', methods=['GET'])
+@syndicate_blueprint.route('//', methods=['GET'])
+def get_syndicate_feed(syndicate_type, token=None):
+ response = get_internal('news/syndicate')
+ format_param = request.args.get('formatter')
+ if format_param:
+ format_param = format_param.upper().strip()
+ try:
+ return FORMAT_HANDLERS[format_param]['handler'](response[0], format_param)
+ except ValueError as e:
+ error_message = f"An error occurred in converting response to {format_param}: {e}"
+ error_response = make_response(jsonify({'error': error_message}), 400)
+ return process_error_response(error_response)
+ return FORMAT_HANDLERS_INIT[syndicate_type.lower()](response[0])
+def init_app(app):
+ superdesk.register_resource('news/syndicate', NewsAPISyndicateResource, NewsAPISyndicateService, _app=app)
+ app.url_map.converters['regex'] = RegExConverter
+ superdesk.blueprint(syndicate_blueprint, app)
diff --git a/newsroom/news_api/news/syndicate/auth.py b/newsroom/news_api/news/syndicate/auth.py
new file mode 100644
index 000000000..ed0d2074d
--- /dev/null
+++ b/newsroom/news_api/news/syndicate/auth.py
@@ -0,0 +1,18 @@
+from functools import wraps
+from flask import current_app as app
+import flask
+def authenticate(func):
+ @wraps(func)
+ def wrapper(*args, **kwargs):
+ auth = app.auth
+ token = kwargs.get('token')
+ if not auth.authorized([], None, flask.request.method):
+ if token:
+ if not auth.check_auth(token, allowed_roles=None, resource=None, method=flask.request.method):
+ return auth.authenticate()
+ else:
+ return auth.authenticate()
+ return func(*args, **kwargs)
+ return wrapper
diff --git a/newsroom/news_api/news/syndicate/error_handlers.py b/newsroom/news_api/news/syndicate/error_handlers.py
new file mode 100644
index 000000000..e53efe803
--- /dev/null
+++ b/newsroom/news_api/news/syndicate/error_handlers.py
@@ -0,0 +1,54 @@
+from typing import Union, Mapping, Dict
+from flask import request, make_response, jsonify
+def handle_unsupported_format(data, formatter=None):
+ error_message = f"Unsupported formatter: {formatter if formatter is not None else ''} "
+ error_response = make_response(jsonify({'error': error_message}), 400)
+ return process_error_response(error_response)
+def process_error_response(response):
+ error_message: Union[bytes, str] = response.data.decode(
+ 'utf-8') if response.data else 'error message empty,contact admin for log information'
+ def syndicate_examples() -> Mapping[str, str]:
+ examples = {
+ 'json': (
+ f"{request.url_root}syndicate?format=json&q=trump&start_date=2020-04-01"
+ f"&timezone=Australia/Sydney"
+ ),
+ 'atom': (
+ f"{request.url_root}syndicate?format=atom&start_date=now-30d&end_date=now"
+ f"&timezone=Australia/Sydney&include_fields=headline,byline,slugline,description_html,"
+ f"located,keywords,source,subject,place,wordcount,charcount,body_html,readtime,profile,"
+ f"service,genre,associations"
+ ),
+ 'rss': (
+ f"{request.url_root}syndicate?format=rss&exclude_fields=version,versioncreated,"
+ f"firstcreated"
+ )
+ }
+ return examples
+ def syndicate_parameters() -> Dict[str, str]:
+ parameters = {
+ 'format': "Specifies the desired format of the response. Accepts 'json', 'atom', or 'rss'.",
+ # ... (other parameters) ...
+ }
+ return parameters
+ error_payload: Dict[str, Dict[str, Union[int, str, Dict[str, str], Mapping[str, str]]]] = {
+ "error": {
+ "code": response.status_code,
+ "message": error_message,
+ },
+ "usage": {
+ "endpoint": str(request.url),
+ "method": request.method,
+ "description": "This API endpoint allows formats (JSON, ATOM, RSS).",
+ "parameters": syndicate_examples(),
+ "examples": syndicate_parameters(),
+ },
+ }
+ return jsonify(error_payload)
diff --git a/newsroom/news_api/news/syndicate/resource.py b/newsroom/news_api/news/syndicate/resource.py
new file mode 100644
index 000000000..b37e32d23
--- /dev/null
+++ b/newsroom/news_api/news/syndicate/resource.py
@@ -0,0 +1,12 @@
+from newsroom import Resource
+class NewsAPISyndicateResource(Resource):
+ resource_title = 'News Syndicate'
+ datasource = {
+ 'search_backend': 'elastic',
+ 'source': 'items',
+ }
+ item_methods = []
+ resource_methods = []
diff --git a/newsroom/news_api/news/syndicate/service.py b/newsroom/news_api/news/syndicate/service.py
new file mode 100644
index 000000000..1f4934ff2
--- /dev/null
+++ b/newsroom/news_api/news/syndicate/service.py
@@ -0,0 +1,335 @@
+from content_api.errors import BadParameterValueError
+from newsroom.news_api.news.search_service import NewsAPINewsService
+from superdesk import get_resource_service
+from lxml import etree
+from lxml.etree import SubElement
+from superdesk.utc import utcnow
+from flask import current_app as app, g, Response, url_for
+import logging
+from newsroom.news_api.utils import check_featuremedia_association_permission, update_embed_urls
+from newsroom.wire.formatters.utils import remove_unpermissioned_embeds
+from datetime import timedelta
+from email import utils
+class NewsAPISyndicateService(NewsAPINewsService):
+ allowed_params = {
+ 'start_date', 'end_date',
+ 'include_fields', 'exclude_fields',
+ 'max_results', 'page_size', 'page', 'timezone',
+ 'version', 'where', 'item_source',
+ 'q', 'default_operator', 'filter',
+ 'service', 'subject', 'genre', 'urgency',
+ 'priority', 'type', 'item_source', 'timezone', 'products',
+ 'exclude_ids', 'formatter', 'sort'
+ }
+ default_sort = [{'versioncreated': 'asc'}]
+ allowed_exclude_fields = {'version', 'firstcreated', 'headline', 'byline', 'slugline'}
+ def on_fetched(self, doc):
+ self._enhance_hateoas(doc)
+ super().on_fetched(doc)
+ def _enhance_hateoas(self, doc):
+ doc.setdefault('_links', {})
+ doc['_links']['parent'] = {
+ 'title': 'Home',
+ 'href': '/'
+ },
+ self._hateoas_set_item_links(doc)
+ def _hateoas_set_item_links(self, doc):
+ for item in doc.get('_items') or []:
+ doc_id = str(item['_id'])
+ item.setdefault('_links', {})
+ item['_links']['self'] = {
+ 'href': 'news/item/{}'.format(doc_id),
+ 'title': 'News Item'
+ }
+ item.pop('_updated', None)
+ item.pop('_created', None)
+ item.pop('_etag', None)
+ def prefill_search_query(self, search, req=None, lookup=None):
+ super().prefill_search_query(search, req, lookup)
+ if search.args.get('exclude_ids'):
+ search.args['exclude_ids'] = search.args['exclude_ids'].split(',')
+ try:
+ search.args['max_results'] = int(search.args.get('max_results') or 200)
+ except ValueError:
+ raise BadParameterValueError('Max Results must be a number')
+ search.args['size'] = search.args['max_results']
+ @staticmethod
+ def _format_date(date):
+ iso8601 = date.isoformat()
+ if date.tzinfo:
+ return iso8601
+ return iso8601 + 'Z'
+ @staticmethod
+ def _format_update_date(date):
+ DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S"
+ return date.strftime(DATETIME_FORMAT) + 'Z'
+ @staticmethod
+ def _format_date_publish(date):
+ return utils.format_datetime(date)
+ @staticmethod
+ def generate_atom_feed(response, token=None):
+ XML_ROOT = ''
+ _message_nsmap = {None: 'http://www.w3.org/2005/Atom', 'dcterms': 'http://purl.org/dc/terms/',
+ 'media': 'http://search.yahoo.com/mrss/',
+ 'mi': 'http://schemas.ingestion.microsoft.com/common/'}
+ feed = etree.Element('feed', nsmap=_message_nsmap)
+ SubElement(feed, 'title').text = etree.CDATA('{} Atom Feed'.format(app.config['SITE_NAME']))
+ SubElement(feed, 'updated').text = __class__._format_update_date(utcnow())
+ SubElement(SubElement(feed, 'author'), 'name').text = app.config['SITE_NAME']
+ feed_url = url_for('syndicate.get_syndicate_feed',
+ syndicate_type='syndicate',
+ _external=True,
+ formatter='atom')
+ SubElement(feed, 'id').text = feed_url
+ SubElement(feed, 'link',
+ attrib={'href': feed_url, 'rel': 'self'})
+ item_resource = get_resource_service('items')
+ image = None
+ for item in response['_items']:
+ try:
+ complete_item = item_resource.find_one(req=None, _id=item.get('_id'))
+ # If featuremedia is not allowed for the company don't add the item
+ if ((complete_item.get('associations') or {}).get('featuremedia') or {}).get('renditions'):
+ if not check_featuremedia_association_permission(complete_item):
+ continue
+ remove_unpermissioned_embeds(complete_item, g.user, 'news_api')
+ entry = SubElement(feed, 'entry')
+ # If the item has any parents we use the id of the first, this should be constant throught the update
+ # history
+ if complete_item.get('ancestors') and len(complete_item.get('ancestors')):
+ SubElement(entry, 'id').text = complete_item.get('ancestors')[0]
+ else:
+ SubElement(entry, 'id').text = complete_item.get('_id')
+ SubElement(entry, 'title').text = etree.CDATA(complete_item.get('headline'))
+ SubElement(entry, 'published').text = __class__._format_date(complete_item.get('firstpublished'))
+ SubElement(entry, 'updated').text = __class__._format_update_date(complete_item.get('versioncreated'))
+ if token:
+ SubElement(entry, 'link', attrib={'rel': 'self', 'href': url_for('news/item.get_item',
+ item_id=item.get('_id'),
+ format='TextFormatter',
+ token=token,
+ _external=True)})
+ else:
+ SubElement(entry, 'link', attrib={'rel': 'self', 'href': url_for('news/item.get_item',
+ item_id=item.get('_id'),
+ format='TextFormatter',
+ _external=True)})
+ if complete_item.get('byline'):
+ name = complete_item.get('byline')
+ if complete_item.get('source') and not app.config['COPYRIGHT_HOLDER'].lower() == complete_item.get(
+ 'source', '').lower():
+ name = name + " - " + complete_item.get('source')
+ SubElement(SubElement(entry, 'author'), 'name').text = name
+ else:
+ SubElement(SubElement(entry, 'author'), 'name').text = complete_item.get(
+ 'source') if complete_item.get(
+ 'source') else app.config['COPYRIGHT_HOLDER']
+ SubElement(entry, 'rights').text = complete_item.get('source', '')
+ if complete_item.get('pubstatus') == 'usable':
+ SubElement(entry, etree.QName(_message_nsmap.get('dcterms'), 'valid')).text = \
+ 'start={}; end={}; scheme=W3C-DTF'.format(__class__._format_date(utcnow()),
+ __class__._format_date(utcnow() + timedelta(days=30)))
+ else:
+ SubElement(entry, etree.QName(_message_nsmap.get('dcterms'), 'valid')).text = \
+ 'start={}; end={}; scheme=W3C-DTF'.format(__class__._format_date(utcnow()),
+ __class__._format_date(utcnow() - timedelta(days=30)))
+ categories = [{'name': s.get('name')} for s in complete_item.get('service', [])]
+ for category in categories:
+ SubElement(entry, 'category', attrib={'term': category.get('name')})
+ SubElement(entry, 'summary').text = etree.CDATA(complete_item.get('description_text', ''))
+ update_embed_urls(complete_item, token)
+ SubElement(entry, 'content', attrib={'type': 'html'}).text = etree.CDATA(
+ complete_item.get('body_html', ''))
+ if ((complete_item.get('associations') or {}).get('featuremedia') or {}).get('renditions'):
+ image = ((complete_item.get('associations') or {}).get('featuremedia') or {}).get('renditions').get(
+ "16-9")
+ if image:
+ metadata = ((complete_item.get('associations') or {}).get('featuremedia') or {})
+ url = url_for('assets.get_item', _external=True, asset_id=image.get('media'),
+ token=token) if token else url_for(
+ 'assets.get_item', _external=True, asset_id=image.get('media'))
+ media = SubElement(entry, etree.QName(_message_nsmap.get('media'), 'content'),
+ attrib={'url': url, 'type': image.get('mimetype'), 'medium': 'image'})
+ SubElement(media, etree.QName(_message_nsmap.get('media'), 'credit')).text = metadata.get(
+ 'byline')
+ SubElement(media, etree.QName(_message_nsmap.get('media'), 'title')).text = metadata.get(
+ 'description_text')
+ SubElement(media, etree.QName(_message_nsmap.get('media'), 'text')).text = metadata.get(
+ 'body_text')
+ if image.get('poi'):
+ focr = SubElement(media, etree.QName(_message_nsmap.get('mi'), 'focalRegion'))
+ SubElement(focr, etree.QName(_message_nsmap.get('mi'), 'x1')).text = str(
+ image.get('poi').get('x'))
+ SubElement(focr, etree.QName(_message_nsmap.get('mi'), 'x2')).text = str(
+ image.get('poi').get('x'))
+ SubElement(focr, etree.QName(_message_nsmap.get('mi'), 'y1')).text = str(
+ image.get('poi').get('y'))
+ SubElement(focr, etree.QName(_message_nsmap.get('mi'), 'y2')).text = str(
+ image.get('poi').get('y'))
+ except Exception as ex:
+ __class__.handle_exception(item, ex)
+ continue
+ return Response(XML_ROOT + etree.tostring(feed, method='xml', pretty_print=True).decode('utf-8'),
+ mimetype='application/atom+xml')
+ @staticmethod
+ def generate_rss_feed(response, token=None):
+ XML_ROOT = ''
+ _message_nsmap = {'dcterms': 'http://purl.org/dc/terms/', 'media': 'http://search.yahoo.com/mrss/',
+ 'dc': 'http://purl.org/dc/elements/1.1/',
+ 'mi': 'http://schemas.ingestion.microsoft.com/common/',
+ 'content': 'http://purl.org/rss/1.0/modules/content/'}
+ feed = etree.Element('rss', attrib={'version': '2.0'}, nsmap=_message_nsmap)
+ channel = SubElement(feed, 'channel')
+ SubElement(channel, 'title').text = '{} RSS Feed'.format(app.config['SITE_NAME'])
+ SubElement(channel, 'description').text = '{} RSS Feed'.format(app.config['SITE_NAME'])
+ feed_url = url_for('syndicate.get_syndicate_feed',
+ syndicate_type='syndicate',
+ _external=True,
+ formatter='rss')
+ SubElement(channel, 'link').text = feed_url
+ item_resource = get_resource_service('items')
+ image = None
+ for item in response['_items']:
+ try:
+ complete_item = item_resource.find_one(req=None, _id=item.get('_id'))
+ if ((complete_item.get('associations') or {}).get('featuremedia') or {}).get('renditions'):
+ if not check_featuremedia_association_permission(complete_item):
+ continue
+ remove_unpermissioned_embeds(complete_item, g.user, 'news_api')
+ entry = SubElement(channel, 'item')
+ if complete_item.get('ancestors') and len(complete_item.get('ancestors')):
+ SubElement(entry, 'guid').text = complete_item.get('ancestors')[0]
+ else:
+ SubElement(entry, 'guid').text = complete_item.get('_id')
+ SubElement(entry, 'title').text = etree.CDATA(complete_item.get('headline'))
+ SubElement(entry, 'pubDate').text = __class__._format_date_publish(complete_item.get('firstpublished'))
+ SubElement(entry,
+ etree.QName(_message_nsmap.get('dcterms'), 'modified')).text = __class__._format_update_date(
+ complete_item.get('versioncreated'))
+ if token:
+ SubElement(entry, 'link').text = url_for('news/item.get_item',
+ item_id=item.get('_id'),
+ format='TextFormatter',
+ token=token,
+ _external=True)
+ else:
+ SubElement(entry, 'link').text = url_for('news/item.get_item',
+ item_id=item.get('_id'),
+ format='TextFormatter',
+ _external=True)
+ if complete_item.get('byline'):
+ name = complete_item.get('byline')
+ if complete_item.get('source') and not app.config[
+ 'COPYRIGHT_HOLDER'].lower() == complete_item.get(
+ 'source', '').lower():
+ name = name + " - " + complete_item.get('source')
+ SubElement(entry, etree.QName(_message_nsmap.get('dc'), 'creator')).text = name
+ else:
+ SubElement(entry, etree.QName(_message_nsmap.get('dc'), 'creator')).text = \
+ complete_item.get('source') if complete_item.get('source') else app.config[
+ SubElement(entry, 'source',
+ attrib={'url': feed_url}).text = \
+ complete_item.get('source', '')
+ if complete_item.get('pubstatus') == 'usable':
+ SubElement(entry, etree.QName(_message_nsmap.get('dcterms'), 'valid')).text = \
+ 'start={}; end={}; scheme=W3C-DTF'.format(__class__._format_date_publish(
+ complete_item.get('firstpublished')),
+ __class__._format_date(
+ utcnow() + timedelta(days=30)))
+ else:
+ # in effect a kill set the end date into the past
+ SubElement(entry, etree.QName(_message_nsmap.get('dcterms'), 'valid')).text = \
+ 'start={}; end={}; scheme=W3C-DTF'.format(__class__._format_date(utcnow()),
+ __class__._format_date(
+ utcnow() - timedelta(days=30)))
+ categories = [{'name': s.get('name')} for s in complete_item.get('service', [])] \
+ + [{'name': s.get('name')} for s in complete_item.get('subject', [])] \
+ + [{'name': s.get('name')} for s in complete_item.get('place', [])] \
+ + [{'name': k} for k in complete_item.get('keywords', [])]
+ for category in categories:
+ SubElement(entry, 'category').text = category.get('name')
+ SubElement(entry, 'description').text = etree.CDATA(complete_item.get('description_text', ''))
+ update_embed_urls(complete_item, token)
+ SubElement(entry, etree.QName(_message_nsmap.get('content'), 'encoded')).text = etree.CDATA(
+ complete_item.get('body_html', ''))
+ if ((complete_item.get('associations') or {}).get('featuremedia') or {}).get('renditions'):
+ image = ((complete_item.get('associations') or {}).get('featuremedia') or {}).get(
+ 'renditions').get(
+ "16-9")
+ if image:
+ metadata = ((complete_item.get('associations') or {}).get('featuremedia') or {})
+ url = url_for('assets.get_item', _external=True, asset_id=image.get('media'),
+ token=token) if token else url_for(
+ 'assets.get_item', _external=True, asset_id=image.get('media'))
+ media = SubElement(entry, etree.QName(_message_nsmap.get('media'), 'content'),
+ attrib={'url': url, 'type': image.get('mimetype'), 'medium': 'image'})
+ SubElement(media, etree.QName(_message_nsmap.get('media'), 'credit')).text = metadata.get(
+ 'byline')
+ SubElement(media, etree.QName(_message_nsmap.get('media'), 'title')).text = metadata.get(
+ 'description_text')
+ SubElement(media, etree.QName(_message_nsmap.get('media'), 'text')).text = metadata.get(
+ 'body_text')
+ if image.get('poi'):
+ focr = SubElement(media, etree.QName(_message_nsmap.get('mi'), 'focalRegion'))
+ SubElement(focr, etree.QName(_message_nsmap.get('mi'), 'x1')).text = str(
+ image.get('poi').get('x'))
+ SubElement(focr, etree.QName(_message_nsmap.get('mi'), 'x2')).text = str(
+ image.get('poi').get('x'))
+ SubElement(focr, etree.QName(_message_nsmap.get('mi'), 'y1')).text = str(
+ image.get('poi').get('y'))
+ SubElement(focr, etree.QName(_message_nsmap.get('mi'), 'y2')).text = str(
+ image.get('poi').get('y'))
+ except Exception as ex:
+ __class__.handle_exception(item, ex)
+ continue
+ return Response(XML_ROOT + etree.tostring(feed, method='xml', pretty_print=True).decode('utf-8'),
+ mimetype='application/rss+xml')
+ @staticmethod
+ def handle_exception(item, ex):
+ item_id = item.get('_id')
+ log_message = f"Processing {item_id} - {str(ex)}"
+ logging.exception(log_message)
diff --git a/newsroom/news_api/news/syndicate/syndicate_handlers.py b/newsroom/news_api/news/syndicate/syndicate_handlers.py
new file mode 100644
index 000000000..934449abe
--- /dev/null
+++ b/newsroom/news_api/news/syndicate/syndicate_handlers.py
@@ -0,0 +1,41 @@
+from collections import defaultdict
+from .service import NewsAPISyndicateService
+from flask import make_response, jsonify
+from .error_handlers import process_error_response
+def convert_to_syndicate(data, formatter):
+ # remove token from requirments
+ if formatter and formatter == 'ATOM':
+ return NewsAPISyndicateService.generate_atom_feed(data)
+ elif formatter and formatter == 'RSS':
+ return NewsAPISyndicateService.generate_rss_feed(data)
+ elif formatter and formatter == 'JSON':
+ return jsonify(data)
+ else:
+ raise ValueError("Invalid formatter specified")
+FORMAT_HANDLERS = defaultdict(
+ lambda: {'handler': handle_unsupported_format, 'content_type': 'application/json'},
+ {
+ 'ATOM': {'handler': convert_to_syndicate, 'content_type': 'application/xml'},
+ 'RSS': {'handler': convert_to_syndicate, 'content_type': 'application/xml'},
+ 'JSON': {'handler': convert_to_syndicate, 'content_type': 'application/json'},
+ }
+FEED_GENERATORS = defaultdict(
+ lambda: handle_unsupported_format,
+ {
+ 'atom': NewsAPISyndicateService.generate_atom_feed,
+ 'rss': NewsAPISyndicateService.generate_rss_feed,
+ }
+def handle_unsupported_format(data, formatter=None):
+ if formatter and formatter != 'JSON':
+ error_message = f"Unsupported formatter: {formatter if formatter is not None else 'empty value'} "
+ error_response = make_response(jsonify({'error': error_message}), 400)
+ return process_error_response(error_response)
+ return jsonify(data)
diff --git a/newsroom/news_api/settings.py b/newsroom/news_api/settings.py
index 3bb4c3849..e327c0e9b 100644
--- a/newsroom/news_api/settings.py
+++ b/newsroom/news_api/settings.py
@@ -12,6 +12,7 @@
+ 'newsroom.news_api.news.syndicate',
@@ -21,8 +22,6 @@
- 'newsroom.news_api.news.atom.atom',
- 'newsroom.news_api.news.rss.rss',
diff --git a/newsroom/static/poster_default.jpg b/newsroom/static/poster_default.jpg
new file mode 100644
index 000000000..c4e4e7bdd
Binary files /dev/null and b/newsroom/static/poster_default.jpg differ
diff --git a/newsroom/upload.py b/newsroom/upload.py
index 3d179b6b7..6da0c9f46 100644
--- a/newsroom/upload.py
+++ b/newsroom/upload.py
@@ -1,21 +1,36 @@
import flask
import newsroom
import bson.errors
from werkzeug.wsgi import wrap_file
+from werkzeug.http import parse_range_header
from werkzeug.utils import secure_filename
from flask import request, url_for, current_app as newsroom_app
from superdesk.upload import upload_url as _upload_url
from superdesk import get_resource_service
from newsroom.decorator import login_required
cache_for = 3600 * 24 * 7 # 7 days cache
blueprint = flask.Blueprint(ASSETS_RESOURCE, __name__)
+class MediaFileLoader:
+ _loaded_files = {}
+ @classmethod
+ def get_media_file(cls, media_id):
+ if media_id in cls._loaded_files:
+ return cls._loaded_files[media_id]
+ media_file = flask.current_app.media.get(media_id, ASSETS_RESOURCE)
+ if media_file and 'video' in media_file.content_type:
+ cls._loaded_files[media_id] = media_file
+ return media_file
def get_file(key):
file = request.files.get(key)
if file:
@@ -27,19 +42,68 @@ def get_file(key):
@blueprint.route('/assets/', methods=['GET'])
def get_upload(media_id):
+ is_safari = ('Safari' in request.headers.get('User-Agent', '') and 'Chrome'
+ not in request.headers.get('User-Agent', ''))
- media_file = flask.current_app.media.get(media_id, ASSETS_RESOURCE)
+ if is_safari:
+ media_file = flask.current_app.media.get(media_id, ASSETS_RESOURCE)
+ else:
+ media_file = MediaFileLoader.get_media_file(media_id)
except bson.errors.InvalidId:
media_file = None
if not media_file:
- flask.abort(404)
- data = wrap_file(flask.request.environ, media_file, buffer_size=1024 * 256)
- response = flask.current_app.response_class(
- data,
- mimetype=media_file.content_type,
- direct_passthrough=True)
- response.content_length = media_file.length
+ flask.abort(404, description="File not found")
+ file_size = media_file.length
+ content_type = media_file.content_type or 'application/octet-stream'
+ range_header = request.headers.get('Range')
+ if not is_safari and range_header:
+ try:
+ ranges = parse_range_header(range_header)
+ if ranges and len(ranges.ranges) == 1:
+ start, end = ranges.ranges[0]
+ if start is None:
+ flask.abort(416, description="Invalid range header")
+ if end is None or end >= file_size:
+ end = file_size - 1
+ length = end - start + 1
+ def range_generate():
+ media_file.seek(start)
+ remaining = length
+ chunk_size = 8192
+ while remaining:
+ chunk = media_file.read(min(chunk_size, remaining))
+ if not chunk:
+ break
+ remaining -= len(chunk)
+ yield chunk
+ response = flask.Response(
+ flask.stream_with_context(range_generate()),
+ 206,
+ mimetype=content_type,
+ direct_passthrough=True,
+ )
+ response.headers.add('Content-Range', f'bytes {start}-{end}/{file_size}')
+ response.headers.add('Accept-Ranges', 'bytes')
+ response.headers.add('Content-Length', str(length))
+ else:
+ flask.abort(416, description="Requested range not satisfiable")
+ except ValueError:
+ flask.abort(400, description="Invalid range header")
+ else:
+ data = wrap_file(flask.request.environ, media_file, buffer_size=1024 * 256)
+ response = flask.current_app.response_class(
+ data,
+ mimetype=media_file.content_type,
+ direct_passthrough=True)
+ response.content_length = media_file.length
+ response.headers['Access-Control-Allow-Origin'] = '*'
+ response.headers['Access-Control-Allow-Methods'] = 'GET, OPTIONS'
+ response.headers.pop('Content-Disposition', None)
+ response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate'
response.last_modified = media_file.upload_date
response.cache_control.max_age = cache_for
@@ -47,15 +111,17 @@ def get_upload(media_id):
response.cache_control.public = True
- if flask.request.args.get('filename'):
- response.headers['Content-Type'] = media_file.content_type
- response.headers['Content-Disposition'] = 'attachment; filename="%s"' % flask.request.args['filename']
+ if request.args.get('filename'):
+ response.headers['Content-Disposition'] = f'attachment; filename="{request.args["filename"]}"'
response.headers['Content-Disposition'] = 'inline'
item_id = request.args.get('item_id')
if item_id:
- get_resource_service('history').log_media_download(item_id, media_id)
+ try:
+ get_resource_service('history').log_media_download(item_id, media_id)
+ except Exception as e:
+ newsroom_app.logger.error(f"Error logging media download: {str(e)}")
return response
diff --git a/newsroom/wire/block_media/company_factory.py b/newsroom/wire/block_media/company_factory.py
new file mode 100644
index 000000000..cc8d522f1
--- /dev/null
+++ b/newsroom/wire/block_media/company_factory.py
@@ -0,0 +1,84 @@
+import time
+from flask import session, g
+from superdesk import get_resource_service
+class CompanyFactory:
+ _company_cache = {}
+ _cache_expiration_time = 30
+ @staticmethod
+ def get_user_company(user):
+ current_time = time.time()
+ if not user.get('company'):
+ return []
+ if user and user.get('company') in CompanyFactory._company_cache:
+ cached_data = CompanyFactory._company_cache[user['company']]
+ if current_time - cached_data['timestamp'] < CompanyFactory._cache_expiration_time:
+ return cached_data['company']
+ company = get_resource_service('companies').find_one(req=None, _id=user['company'])
+ if company:
+ CompanyFactory._company_cache[user['company']] = {
+ 'company': company,
+ 'timestamp': current_time
+ }
+ CompanyFactory._update_embedded_data_in_session(user, company)
+ return company
+ company = get_resource_service('companies').find_one(req=None, _id=g.user) if hasattr(g, 'user') else None
+ if company:
+ CompanyFactory._company_cache[g.user] = {
+ 'company': company,
+ 'timestamp': current_time
+ }
+ CompanyFactory._update_embedded_data_in_session(g.user, company)
+ return company
+ @staticmethod
+ def get_embedded_data(user):
+ company = CompanyFactory.get_user_company(user)
+ if not company:
+ return {
+ "embedded": {
+ "social_media_display": False,
+ "video_display": False,
+ "audio_display": False,
+ "images_display": False,
+ "all_display": True,
+ "social_media_download": False,
+ "video_download": False,
+ "audio_download": False,
+ "images_download": False,
+ "all_download": False,
+ "sdpermit_display": False,
+ "sdpermit_download": False
+ }
+ }
+ embedded = session.get(f"embedded_data_{user['company']}", {})
+ if embedded != company.get("embedded", {}):
+ CompanyFactory._update_embedded_data_in_session(user, company)
+ embedded = company.get("embedded", {})
+ return embedded
+ @staticmethod
+ def _update_embedded_data_in_session(user, company):
+ session[f"embedded_data_{user['company']}"] = company.get("embedded", {
+ "social_media_display": False,
+ "video_display": False,
+ "audio_display": False,
+ "images_display": False,
+ "all_display": True,
+ "social_media_download": False,
+ "video_download": False,
+ "audio_download": False,
+ "images_download": False,
+ "all_download": False,
+ "sdpermit_display": False,
+ "sdpermit_download": False
+ })
+ session.permanent = False
+ session.modified = True
diff --git a/newsroom/wire/block_media/download_items.py b/newsroom/wire/block_media/download_items.py
new file mode 100644
index 000000000..765476d60
--- /dev/null
+++ b/newsroom/wire/block_media/download_items.py
@@ -0,0 +1,105 @@
+from newsroom.auth import get_user
+from newsroom.wire.block_media.company_factory import CompanyFactory
+from newsroom.wire.block_media.filter_media import get_allowed_tags
+from lxml import html as lxml_html
+import re
+import logging
+logger = logging.getLogger(__name__)
+def filter_items_download(func):
+ """
+ A decorator that filters downloaded items based on a given filter function.
+ :param func: The function to be decorated. It should take _ids and item_type as parameters
+ and return a list of items.
+ :return: A wrapper function that adds filtering capability to the decorated function.
+ """
+ def wrapper(_ids, item_type, filter_func=None):
+ """
+ Wrapper function that calls the decorated function and applies optional filtering.
+ :param _ids: List of IDs to download items for.
+ :param item_type: Type of items to download .
+ :param filter_func: Optional function to filter the downloaded items.
+ default is None, no filtering is applied.
+ :return: A list of downloaded items, potentially filtered if a filter_func is provided
+ and the item_type is not 'agenda'.
+ """
+ items = func(_ids, item_type)
+ if filter_func and items and (item_type != 'agenda'):
+ items = filter_func(items)
+ return items
+ return wrapper
+def block_items_by_embedded_data(items):
+ def remove_editors_media(item, allowed_tags):
+ associations = item.get("associations")
+ if associations:
+ editors_to_remove = []
+ allowed_tags = ['picture' if tag == 'img' else tag for tag in allowed_tags]
+ for key, value in associations.items():
+ if key.startswith("editor_") and ((value and value.get("type") not in allowed_tags)):
+ editors_to_remove.append(key)
+ for editor in editors_to_remove:
+ associations.pop(editor, None)
+ item["associations"] = associations
+ return item
+ download_social_tag = False
+ user = get_user(required=True)
+ embedded_data = CompanyFactory.get_embedded_data(user)
+ embedded_tags = get_allowed_tags(embedded_data)
+ allowed_tags = embedded_tags['download_tags']
+ if 'all' in allowed_tags or (not any(allowed_tags)):
+ allowed_tags = ['video', 'audio', 'img', 'social_media']
+ download_social_tag = True
+ if 'social_media' in allowed_tags:
+ download_social_tag = True
+ filtered_items = []
+ for item in items:
+ html_updated = False
+ root_elem = lxml_html.fromstring(item.get('body_html', ''))
+ if allowed_tags:
+ tag_map = {'video': 'Video', 'audio': 'Audio', 'img': 'Image', 'social_media': 'social_media'}
+ excluded_tags = set(tag_map.keys()) - set(allowed_tags)
+ regex_parts = [tag_map[tag] for tag in excluded_tags]
+ regex = rf" EMBED START (?:{'|'.join(regex_parts)}) {{id: \"editor_([0-9]+)"
+ comments = root_elem.xpath('//comment()')
+ for comment in comments:
+ m = re.search(regex, comment.text)
+ if m and m.group(1):
+ figure = comment.getnext()
+ for elem in figure.iterchildren():
+ if (elem.tag in excluded_tags
+ and ('data-disable-download' not in elem.attrib
+ or elem.attrib['data-disable-download'] != 'true')):
+ elem.attrib['data-disable-download'] = 'true'
+ html_updated = True
+ break
+ if not download_social_tag:
+ social_media_embeds = root_elem.xpath('//div[@class="embed-block"]')
+ for social_media_embed in social_media_embeds:
+ if 'disabled-embed' not in social_media_embed.attrib.get('class', ''):
+ social_media_embed.attrib['class'] = social_media_embed.attrib.get('class', '') + ' disabled-embed'
+ blockquote_elements = social_media_embed.xpath('.//blockquote')
+ for blockquote in blockquote_elements:
+ if 'data-disable-download' not in blockquote.attrib:
+ blockquote.attrib['data-disable-download'] = 'true'
+ html_updated = True
+ break
+ if html_updated:
+ for elem in root_elem.xpath('//*[@data-disable-download="true"]'):
+ elem.getparent().remove(elem)
+ item["body_html"] = lxml_html.tostring(root_elem, encoding='unicode', method="html")
+ item_remove = remove_editors_media(item, allowed_tags)
+ filtered_items.append(item_remove)
+ return filtered_items
diff --git a/newsroom/wire/block_media/filter_htmlpackage.py b/newsroom/wire/block_media/filter_htmlpackage.py
new file mode 100644
index 000000000..6fe5761c7
--- /dev/null
+++ b/newsroom/wire/block_media/filter_htmlpackage.py
@@ -0,0 +1,14 @@
+from functools import wraps
+import flask
+from newsroom.auth import get_user
+from newsroom.wire.block_media.company_factory import CompanyFactory
+def filter_embedded_data(func):
+ @wraps(func)
+ def wrapper(self, item, item_type='items'):
+ embedded_data = CompanyFactory.get_embedded_data(get_user(required=True))
+ if any(embedded_data):
+ return str.encode(flask.render_template('download_embed.html', item=item), 'utf-8')
+ return func(self, item, item_type)
+ return wrapper
diff --git a/newsroom/wire/block_media/filter_media.py b/newsroom/wire/block_media/filter_media.py
new file mode 100644
index 000000000..2de6d29b5
--- /dev/null
+++ b/newsroom/wire/block_media/filter_media.py
@@ -0,0 +1,169 @@
+from functools import wraps
+from flask import current_app as app
+from newsroom.auth import get_user
+from newsroom.wire.block_media.company_factory import CompanyFactory
+from lxml import html as lxml_html
+import re
+import logging
+from superdesk.etree import to_string
+logger = logging.getLogger(__name__)
+def filter_media(func):
+ @wraps(func)
+ def wrapper(*args, **kwargs):
+ if not app.config.get("EMBED_PRODUCT_FILTERING"):
+ return func(*args, **kwargs)
+ item_arg = get_item_argument(args, kwargs)
+ if item_arg is None:
+ return func(*args, **kwargs)
+ embedded_data = get_embedded_data()
+ if not any(embedded_data.values()):
+ return func(*args, **kwargs)
+ item_arg = process_item_embeds(item_arg, embedded_data)
+ return func(*args, **kwargs)
+ return wrapper
+def get_item_argument(args, kwargs):
+ if len(args) > 1 and isinstance(args[1], dict) and 'body_html' in args[1]:
+ return args[1]
+ for arg in args:
+ if isinstance(arg, dict) and 'body_html' in arg:
+ return arg
+ return kwargs.get('item')
+def get_embedded_data():
+ try:
+ user = get_user(required=True)
+ return CompanyFactory.get_embedded_data(user)
+ except Exception as e:
+ logger.error(f"Error in from embedded data: {str(e)}")
+ return {}
+def process_item_embeds(item_arg, embedded_data):
+ html_updated = False
+ html_string = item_arg.get('body_html', '')
+ root_elem = lxml_html.fromstring(html_string)
+ allowed_tags = get_allowed_tags(embedded_data)
+ if allowed_tags:
+ html_updated = process_allowed_tags(root_elem, allowed_tags)
+ if html_updated:
+ item_arg["body_html"] = to_string(root_elem, method="html")
+ es_highlight = item_arg.get('es_highlight', {})
+ es_highlight_body_html = es_highlight.get('body_html', [])
+ if len(es_highlight_body_html) > 0:
+ es_highlight_html_string = es_highlight_body_html[0]
+ es_highlight_root_elem = lxml_html.fromstring(es_highlight_html_string)
+ es_highlight_allowed_tags = allowed_tags
+ if es_highlight_allowed_tags:
+ es_highlight_html_updated = process_allowed_tags(es_highlight_root_elem, es_highlight_allowed_tags)
+ if es_highlight_html_updated:
+ item_arg['es_highlight']['body_html'][0] = to_string(es_highlight_root_elem, method="html")
+ return item_arg
+def get_allowed_tags(embedded_data):
+ tag_mapping = {
+ 'video': ('video_display', 'video_download'),
+ 'audio': ('audio_display', 'audio_download'),
+ 'img': ('images_display', 'images_download'),
+ 'all': ('all_display', 'all_download'),
+ 'social_media': ('social_media_display', 'social_media_download'),
+ 'sd': ('sdpermit_display', 'sdpermit_download'),
+ }
+ allowed_tags = {
+ 'display_tags': [tag for tag, (display_key, _) in tag_mapping.items() if embedded_data.get(display_key, False)],
+ 'download_tags': [tag for tag, (_, download_key) in tag_mapping.items()
+ if embedded_data.get(download_key, False)],
+ }
+ return allowed_tags
+def process_allowed_tags(root_elem, allowed_tags):
+ html_updated = False
+ display_social_tag = False
+ download_social_tag = False
+ display_tags = allowed_tags['display_tags']
+ if 'all' in display_tags or (not any(display_tags)):
+ display_tags = ['video', 'audio', 'img', 'social_media']
+ display_social_tag = True
+ if 'social_media' in display_tags:
+ display_social_tag = True
+ download_tags = allowed_tags['download_tags']
+ if 'all' in download_tags or (not any(download_tags)):
+ download_tags = ['video', 'audio', 'img', 'social_media']
+ download_social_tag = True
+ if 'social_media' in download_tags:
+ download_social_tag = True
+ tag_map = {'video': 'Video', 'audio': 'Audio', 'img': 'Image'}
+ display_regex_parts = ['|'.join(tag_map[tag] for tag in tag_map if tag not in display_tags)]
+ display_regex = rf" EMBED START (?:{'|'.join(display_regex_parts)}) {{id: \"editor_([0-9]+)"
+ download_regex_parts = ['|'.join(tag_map[tag] for tag in tag_map if tag not in download_tags)]
+ download_regex = rf" EMBED START (?:{'|'.join(download_regex_parts)}) {{id: \"editor_([0-9]+)"
+ comments = root_elem.xpath('//comment()')
+ for comment in comments:
+ display_match = re.search(display_regex, comment.text)
+ download_match = re.search(download_regex, comment.text)
+ if display_match and display_match.group(1):
+ figure = comment.getnext()
+ for elem in figure.iterchildren():
+ if elem.tag not in display_tags:
+ figure.attrib['class'] = 'disabled-embed'
+ html_updated = True
+ break
+ figure = comment.getnext()
+ if figure is None:
+ continue
+ if download_match and download_match.group(1):
+ for elem in figure.iterchildren():
+ if elem.tag not in download_tags:
+ elem.attrib['data-disable-download'] = 'true'
+ html_updated = True
+ break
+ if not display_social_tag:
+ social_media_embeds = root_elem.xpath('//div[@class="embed-block"]')
+ for social_media_embed in social_media_embeds:
+ social_media_embed.attrib['class'] = 'embed-block disabled-embed'
+ html_updated = True
+ if not download_social_tag:
+ social_media_embeds = root_elem.xpath('//div[@class="embed-block"]')
+ for social_media_embed in social_media_embeds:
+ blockquote_elements = social_media_embed.xpath('.//blockquote')
+ for blockquote in blockquote_elements:
+ blockquote.attrib['data-disable-download'] = 'true'
+ html_updated = True
+ break
+ return html_updated
diff --git a/newsroom/wire/formatters/downloadninjs.py b/newsroom/wire/formatters/downloadninjs.py
index 6d3b54fd7..793214111 100644
--- a/newsroom/wire/formatters/downloadninjs.py
+++ b/newsroom/wire/formatters/downloadninjs.py
@@ -13,21 +13,27 @@ def __init__(self):
self.direct_copy_properties += ('associations',)
def rewire_embeded_images(self, item):
def _get_source_ref(marker, item):
widest = -1
src_rendition = ""
- for rendition in item.get("associations").get(marker).get("renditions"):
- width = item.get("associations").get(marker).get("renditions").get(rendition).get("width")
- if width > widest:
- widest = width
- src_rendition = rendition
+ associations = item.get("associations")
+ if associations:
+ marker_association = associations.get(marker)
+ if marker_association:
+ renditions = marker_association.get("renditions")
+ if renditions:
+ for rendition in renditions:
+ width = renditions.get(rendition, {}).get("width")
+ if width and width > widest:
+ widest = width
+ src_rendition = rendition
- if widest > 0:
- return item.get("associations").get(marker).get("renditions").get(src_rendition).get("href").lstrip('/')
+ if widest > 0 and src_rendition:
+ href = associations.get(marker, {}).get("renditions", {}).get(src_rendition, {}).get("href")
+ if href:
+ return href.lstrip('/')
- logger.warning(
- "href not found for the original in NINJSDownload formatter")
+ logger.warning("href not found for the original in NINJSDownload formatter")
return None
def _get_source_set_refs(marker, item):
@@ -38,13 +44,17 @@ def _get_source_set_refs(marker, item):
srcset = []
- for rendition in item.get("associations").get(marker).get("renditions"):
- srcset.append(
- item.get("associations").get(marker).get("renditions").get(rendition).get("href").lstrip('/')
- + " "
- + str(item.get("associations").get(marker).get("renditions").get(rendition).get("width"))
- + "w"
- )
+ associations = item.get("associations")
+ if associations:
+ marker_association = associations.get(marker)
+ if marker_association:
+ renditions = marker_association.get("renditions")
+ if renditions:
+ for rendition in renditions:
+ href = renditions.get(rendition, {}).get("href")
+ width = renditions.get(rendition, {}).get("width")
+ if href and width:
+ srcset.append(href.lstrip('/') + " " + str(width) + "w")
return ",".join(srcset)
def update_image(item, elem, group):
diff --git a/newsroom/wire/formatters/htmlwithmedia.py b/newsroom/wire/formatters/htmlwithmedia.py
index 4d5be1d75..7a5a712b7 100644
--- a/newsroom/wire/formatters/htmlwithmedia.py
+++ b/newsroom/wire/formatters/htmlwithmedia.py
@@ -3,6 +3,7 @@
from .utils import remove_internal_renditions, log_media_downloads, remove_unpermissioned_embeds
from newsroom.utils import update_embeds_in_body
from ...upload import ASSETS_RESOURCE
import base64
diff --git a/newsroom/wire/formatters/utils.py b/newsroom/wire/formatters/utils.py
index 2b5267201..6a4a7a84e 100644
--- a/newsroom/wire/formatters/utils.py
+++ b/newsroom/wire/formatters/utils.py
@@ -40,22 +40,39 @@ def remove_internal_renditions(item, remove_media=False):
def add_media(zf, item):
- """
- Add the media files associated with the item
- :param zf: Zipfile
- :param item:
- :return:
- """
added_files = []
- for _key, associated_item in item.get('associations', {}).items():
- for rendition in associated_item.get('renditions'):
- name = associated_item.get('renditions').get(rendition).get('href').lstrip('/')
+ associations = item.get('associations', {})
+ for associated_item in associations.values():
+ if not associated_item:
+ continue
+ renditions = associated_item.get('renditions')
+ if not renditions or not isinstance(renditions, dict):
+ continue
+ for rendition_data in renditions.values():
+ if not rendition_data:
+ continue
+ name = rendition_data.get('href', '').lstrip('/')
if name in added_files:
- file = flask.current_app.media.get(associated_item.get('renditions').get(rendition).get('media'),
- zf.writestr(name, file.read())
- added_files.append(name)
+ media_id = rendition_data.get('media')
+ if not media_id:
+ flask.current_app.logger.warning(f"Media ID not found for rendition: {name}")
+ continue
+ file = flask.current_app.media.get(media_id, ASSETS_RESOURCE)
+ if not file:
+ flask.current_app.logger.warning(f"File not found: {name}")
+ continue
+ try:
+ zf.writestr(name, file.read())
+ added_files.append(name)
+ except Exception as e:
+ flask.current_app.logger.error(f"Error adding file to zip: {name}. Error: {str(e)}")
def rewire_featuremedia(item):
diff --git a/newsroom/wire/search.py b/newsroom/wire/search.py
index d587e0489..013bf2c4a 100644
--- a/newsroom/wire/search.py
+++ b/newsroom/wire/search.py
@@ -1,7 +1,6 @@
import logging
from datetime import datetime, timedelta
from copy import deepcopy
from eve.utils import ParsedRequest, config
from flask import current_app as app, json, request
from superdesk import get_resource_service
@@ -19,8 +18,7 @@
from newsroom.auth import get_user
from newsroom.companies import get_user_company
from newsroom.products.products import get_products_by_company
-from newsroom.user_roles import UserRole
+from newsroom.wire.block_media.filter_media import filter_media
logger = logging.getLogger(__name__)
@@ -54,9 +52,6 @@ class WireSearchResource(newsroom.Resource):
item_methods = ['GET']
resource_methods = ['GET']
- allowed_roles = [role for role in UserRole]
- allowed_item_roles = allowed_roles
def versioncreated_range(created):
_range = {}
@@ -189,7 +184,6 @@ def get_product_items(self, product_id, size):
search.source['post_filter'] = {'bool': {'must': []}}
internal_req = self.get_internal_request(search)
docs = list(self.internal_get(internal_req, None))
if app.config.get("EMBED_PRODUCT_FILTERING"):
for item in docs:
self.permission_embeds_in_item(item, self.get_permitted_products())
@@ -545,18 +539,12 @@ def get_permitted_products(self):
get_products_by_company(company.get('_id'), None, request.args.get('type', 'wire'))
if p.get('sd_product_id')]
+ @filter_media
def permission_embeds_in_item(self, item, permitted_products):
- """
- Given the permitted products for the current user and an item, mark any video or audio embedded elements
- that are not associated with any products that the user is allowed.
- :param item:
- :param permitted_products:
- :return:
- """
disable_download = []
for key, embed_item in item.get("associations", {}).items():
- if key.startswith("editor_") and embed_item and (embed_item.get('type', '')) in ['audio', 'video']:
- # get the list of products that the embedded item matched in Superdesk
+ if (key.startswith("editor_") and embed_item
+ and embed_item.get('type', '') in ['audio', 'video', 'picture']):
embed_products = [p.get('code') for p in
((item.get('associations') or {}).get(key) or {}).get('products', [])]
@@ -564,22 +552,24 @@ def permission_embeds_in_item(self, item, permitted_products):
if len(disable_download) == 0:
+ logger.info("No embedded items require download disabling.")
- # mark the each embed as allowed or not, except for images
root_elem = lxml_html.fromstring(item.get('body_html', ''))
- regex = r" EMBED START (?:Video|Audio) {id: \"editor_([0-9]+)"
+ regex = r" EMBED START (?:Video|Audio|Image) {id: \"editor_([0-9]+)"
html_updated = False
comments = root_elem.xpath('//comment()')
for comment in comments:
m = re.search(regex, comment.text)
- # if we've found an Embed Start comment
if m and m.group(1):
figure = comment.getnext()
for elem in figure.iterchildren():
- if elem.tag in ['video', 'audio']:
+ if elem.tag in ['video', 'audio', 'img']:
if "editor_" + m.group(1) in disable_download:
- elem.attrib['data-disable-download'] = 'true'
+ if 'data-disable-download' not in elem.attrib or \
+ elem.attrib['data-disable-download'] != 'true':
+ elem.attrib['data-disable-download'] = 'true'
+ html_updated = True
if elem.text and ' EMBED END ' in elem.text:
html_updated = True
diff --git a/newsroom/wire/utils.py b/newsroom/wire/utils.py
index 86a289cff..74c3d6c34 100644
--- a/newsroom/wire/utils.py
+++ b/newsroom/wire/utils.py
@@ -9,9 +9,9 @@ def get_picture(item):
def get_body_picture(item):
- pictures = [assoc for assoc in item.get('associations', {}).values() if assoc.get('type') == 'picture']
- if pictures:
- return pictures[0]
+ pictures = [assoc for assoc in item.get('associations', {}).values()
+ if assoc is not None and assoc.get('type') == 'picture']
+ return pictures[0] if pictures else None
def get_caption(picture):
diff --git a/newsroom/wire/views.py b/newsroom/wire/views.py
index 32801e3d2..456ac1eed 100644
--- a/newsroom/wire/views.py
+++ b/newsroom/wire/views.py
@@ -4,7 +4,6 @@
import superdesk
import json
from html import escape
from bson import ObjectId
from operator import itemgetter
from flask import current_app as app, request, jsonify, url_for
@@ -16,7 +15,6 @@
from .formatters.utils import add_media
from superdesk import get_resource_service
from newsroom.navigations.navigations import get_navigations_by_company
from newsroom.products.products import get_products_by_company
from newsroom.wire import blueprint
@@ -27,13 +25,14 @@
from newsroom.email import send_email
from newsroom.companies import get_user_company
from newsroom.utils import get_entity_or_404, get_json_or_400, parse_dates, get_type, is_json_request, query_resource, \
- get_agenda_dates, get_location_string, get_public_contacts, get_links, get_items_for_user_action
+ get_agenda_dates, get_location_string, get_public_contacts, get_links, get_items_for_user_action, \
+ get_entities_elastic_or_mongo_or_404
from newsroom.notifications import push_user_notification, push_notification
from newsroom.companies import section
from newsroom.template_filters import is_admin_or_internal
from .search import get_bookmarks_count
from ..upload import ASSETS_RESOURCE
+from newsroom.wire.block_media.download_items import filter_items_download, block_items_by_embedded_data
HOME_ITEMS_CACHE_KEY = 'home_items'
HOME_EXTERNAL_ITEMS_CACHE_KEY = 'home_external_items'
@@ -107,7 +106,7 @@ def get_items_by_card(cards):
# using '/media_card_external' endpoint
items_by_card[card['label']] = None
- app.cache.set(HOME_ITEMS_CACHE_KEY, items_by_card, timeout=300)
+ app.cache.set(HOME_ITEMS_CACHE_KEY, items_by_card, timeout=1)
return items_by_card
@@ -142,6 +141,22 @@ def get_previous_versions(item):
return []
+def get_items_for_user_action_block(_ids, item_type):
+ # Getting entities from elastic first so that we get all fields
+ # even those which are not a part of ItemsResource(content_api) schema.
+ items = get_entities_elastic_or_mongo_or_404(_ids, item_type)
+ if not items or items[0].get('type') != 'text':
+ return items
+ for item in items:
+ if item.get('slugline') and item.get('anpa_take_key'):
+ item['slugline'] = '{0} | {1}'.format(item['slugline'], item['anpa_take_key'])
+ return items
def index():
@@ -190,8 +205,7 @@ def download(_ids):
user = get_user(required=True)
_format = flask.request.args.get('format', 'text')
item_type = get_type()
- items = get_items_for_user_action(_ids.split(','), item_type)
+ items = get_items_for_user_action_block(_ids.split(','), item_type, filter_func=block_items_by_embedded_data)
_file = io.BytesIO()
formatter = app.download_formatters[_format]['formatter']
mimetype = None
@@ -224,7 +238,8 @@ def download(_ids):
for item in items:
formated_item = json.loads(formatter.format_item(item, item_type=item_type))
add_media(zf, item)
- zf.writestr(secure_filename(formatter.format_filename(item)), json.dumps(formated_item).encode('utf-8'))
+ zf.writestr(secure_filename(formatter.format_filename(item)),
+ json.dumps(formated_item).encode('utf-8'))
elif _format == 'htmlpackage':
with zipfile.ZipFile(_file, mode='w') as zf:
diff --git a/package-lock.json b/package-lock.json
index 5d789c40c..3cc8044c0 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -2447,6 +2447,11 @@
"domelementtype": "1"
+ "dompurify": {
+ "version": "3.1.6",
+ "resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.1.6.tgz",
+ "integrity": "sha512-cTOAhc36AalkjtBpfG6O8JimdTMWNXjiePT2xQH/ppBGi/4uIpmj8eKyIkMJErXWARyINV/sB38yf8JCLF5pbQ=="
+ },
"domutils": {
"version": "1.5.1",
"resolved": "https://registry.npmjs.org/domutils/-/domutils-1.5.1.tgz",
@@ -3554,6 +3559,52 @@
"object-assign": "^4.0.1"
+ "file-loader": {
+ "version": "1.1.11",
+ "resolved": "https://registry.npmjs.org/file-loader/-/file-loader-1.1.11.tgz",
+ "integrity": "sha512-TGR4HU7HUsGg6GCOPJnFk06RhWgEWFLAGWiT6rcD+GRC2keU3s9RGJ+b3Z6/U73jwwNb2gKLJ7YCrp+jvU4ALg==",
+ "dev": true,
+ "requires": {
+ "loader-utils": "^1.0.2",
+ "schema-utils": "^0.4.5"
+ },
+ "dependencies": {
+ "ajv": {
+ "version": "6.12.6",
+ "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz",
+ "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==",
+ "dev": true,
+ "requires": {
+ "fast-deep-equal": "^3.1.1",
+ "fast-json-stable-stringify": "^2.0.0",
+ "json-schema-traverse": "^0.4.1",
+ "uri-js": "^4.2.2"
+ }
+ },
+ "fast-deep-equal": {
+ "version": "3.1.3",
+ "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz",
+ "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==",
+ "dev": true
+ },
+ "json-schema-traverse": {
+ "version": "0.4.1",
+ "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz",
+ "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==",
+ "dev": true
+ },
+ "schema-utils": {
+ "version": "0.4.7",
+ "resolved": "https://registry.npmjs.org/schema-utils/-/schema-utils-0.4.7.tgz",
+ "integrity": "sha512-v/iwU6wvwGK8HbU9yi3/nhGzP0yGSuhQMzL6ySiec1FSrZZDkhm4noOSWzrNFo/jEc+SJY6jRTwuwbSXJPDUnQ==",
+ "dev": true,
+ "requires": {
+ "ajv": "^6.1.0",
+ "ajv-keywords": "^3.1.0"
+ }
+ }
+ }
+ },
"file-uri-to-path": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/file-uri-to-path/-/file-uri-to-path-1.0.0.tgz",
diff --git a/package.json b/package.json
index 258a7d15e..02820f73e 100644
--- a/package.json
+++ b/package.json
@@ -18,6 +18,7 @@
"bootstrap": "4.1.3",
"classnames": "^2.2.5",
"css-loader": "^0.28.5",
+ "dompurify": "^3.1.6",
"enzyme-adapter-react-16": "1.7.1",
"extract-text-webpack-plugin": "3.0.2",
"fetch-mock": "^5.12.2",
@@ -55,6 +56,7 @@
"eslint": "^4.8.0",
"eslint-plugin-react": "^7.3.0",
"expect": "^21.1.0",
+ "file-loader": "^1.1.11",
"karma": "^1.7.1",
"karma-chrome-launcher": "^2.2.0",
"karma-jasmine": "^1.1.0",
diff --git a/tests/dev-requirements.txt b/tests/dev-requirements.txt
new file mode 100644
index 000000000..d3ccd8786
--- /dev/null
+++ b/tests/dev-requirements.txt
@@ -0,0 +1,11 @@
+-r requirements.txt
diff --git a/tests/fixtures.py b/tests/fixtures.py
index 63b19be8f..8186a7765 100644
--- a/tests/fixtures.py
+++ b/tests/fixtures.py
@@ -3,10 +3,11 @@
from pytest import fixture
from datetime import datetime, timedelta
from superdesk.utc import utcnow
-from tests.test_users import test_login_succeeds_for_admin, init as users_init
+from flask import url_for
PUBLIC_USER_ID = ObjectId('59b4c5c61d41c8d736852fbf')
TEST_USER_ID = ObjectId('5cc94454bc43165c045ffec9')
+ADMIN_USER_ID = '5cc94b99bc4316684dc7dc07'
items = [
@@ -216,3 +217,28 @@ def setup_user_company(app):
def init_company(app):
+def test_login_succeeds_for_admin(client):
+ response = client.post(
+ url_for('auth.login'),
+ data={'email': 'admin@sourcefabric.org', 'password': 'admin'},
+ follow_redirects=True
+ )
+ assert response.status_code == 200
+def users_init(app):
+ app.data.insert('users', [{
+ '_id': ObjectId(ADMIN_USER_ID),
+ 'first_name': 'admin',
+ 'last_name': 'admin',
+ 'email': 'admin@sourcefabric.org',
+ 'password': '$2b$12$HGyWCf9VNfnVAwc2wQxQW.Op3Ejk7KIGE6urUXugpI0KQuuK6RWIG',
+ 'user_type': 'administrator',
+ 'is_validated': True,
+ 'is_enabled': True,
+ 'is_approved': True,
+ 'receive_email': True,
+ }])
diff --git a/tests/test_block_content.py b/tests/test_block_content.py
new file mode 100644
index 000000000..b5cc74ea4
--- /dev/null
+++ b/tests/test_block_content.py
@@ -0,0 +1,464 @@
+import io
+import json
+import zipfile
+from datetime import timedelta, datetime
+import re
+import bson
+import lxml.etree
+from superdesk.utc import utcnow
+from .fixtures import items, init_items, init_auth, agenda_items, init_agenda_items # noqa
+from .test_push import upload_binary
+import pytest
+items_ids = [item['_id'] for item in items[:2]]
+item = items[:2][0]
+def download_zip_file(client, _format, section):
+ resp = client.get(f'/download/{",".join(items_ids)}?format={_format}&type={section}', follow_redirects=True)
+ assert resp.status_code == 200
+ assert resp.mimetype == 'application/zip'
+ content_disposition = resp.headers.get('Content-Disposition')
+ assert content_disposition is not None, "Content-Disposition header is missing"
+ filename_match = re.search(r'filename=(\d{12})-newsroom\.zip', content_disposition)
+ assert filename_match, f"Filename in Content-Disposition does not match expected pattern: {content_disposition}"
+ filename_timestamp = filename_match.group(1)
+ file_datetime = datetime.strptime(filename_timestamp, "%Y%m%d%H%M")
+ now = datetime.utcnow()
+ assert now - timedelta(
+ minutes=5) <= file_datetime <= now, f"Filename timestamp {filename_timestamp} is not within the expected range"
+ return io.BytesIO(resp.get_data())
+def text_content_test(content):
+ content = content.decode('utf-8').split('\n')
+ assert 'AMAZON-BOOKSTORE-OPENING' in content[0]
+ assert 'Amazon Is Opening More Bookstores' in content[1]
+ assert '' not in content
+ assert 'Block 4' == content[-2]
+def nitf_content_test(content):
+ root = lxml.etree.fromstring(content)
+ assert 'nitf' == root.tag
+ head = root.find('head')
+ assert items[0]['headline'] == head.find('title').text
+def ninjs_content_test(content):
+ data = json.loads(content)
+ assert data.get('associations', {}).get('editor_1')
+ assert not data.get('associations', {}).get('editor_0')
+ assert not data.get('associations', {}).get('editor_2')
+ assert data['headline'] == 'Amazon Is Opening More Bookstores'
+ assert 'editor_1' in data['body_html']
+ assert 'editor_0' not in data['body_html']
+def ninjs_block_download_example(content):
+ data = json.loads(content)
+ assert data.get('associations', {}).get('editor_1')
+ assert not data.get('associations', {}).get('editor_0')
+ assert not data.get('associations', {}).get('editor_2')
+ assert data['headline'] == 'Amazon Is Opening More Bookstores'
+ assert 'video' in data['body_html']
+ assert 'img' not in data['body_html']
+ assert 'blockquote' not in data['body_html']
+ assert 'audio' not in data['body_html']
+def newsmlg2_content_test(content):
+ root = lxml.etree.fromstring(content)
+ assert 'newsMessage' in root.tag
+def filename(name, item):
+ return f'{item["versioncreated"].strftime("%Y%m%d%H%M")}-{name}'
+wire_formats = [
+ {
+ 'format': 'text',
+ 'mimetype': 'text/plain',
+ 'filename': filename('amazon-bookstore-opening.txt', item),
+ 'test_content': text_content_test,
+ },
+ {
+ 'format': 'nitf',
+ 'mimetype': 'application/xml',
+ 'filename': filename('amazon-bookstore-opening.xml', item),
+ 'test_content': nitf_content_test,
+ },
+ {
+ 'format': 'newsmlg2',
+ 'mimetype': 'application/vnd.iptc.g2.newsitem+xml',
+ 'filename': filename('amazon-bookstore-opening.xml', item),
+ 'test_content': newsmlg2_content_test,
+ },
+ {
+ 'format': 'picture',
+ 'mimetype': 'image/jpeg',
+ 'filename': 'baseimage.jpg',
+ },
+def setup_block_embeds(client, app):
+ media_id = bson.ObjectId()
+ associations = {
+ 'featuremedia': {
+ 'mimetype': 'image/jpeg',
+ 'type': 'picture',
+ 'renditions': {
+ 'baseImage': {
+ 'mimetype': 'image/jpeg',
+ 'media': str(media_id),
+ 'href': 'http://a.b.c/xxx.jpg',
+ },
+ '16-9': {
+ 'mimetype': 'image/jpeg',
+ 'href': 'http://a.b.c/xxx.jpg',
+ 'media': str(media_id),
+ 'width': 1280,
+ 'height': 720,
+ },
+ '4-3': {
+ "href": "/assets/633d11b9fb5122dcf06a6f02",
+ "width": 800,
+ "height": 600,
+ 'media': str(media_id),
+ "mimetype": "image/jpeg",
+ },
+ },
+ },
+ "editor_1": {
+ "type": "video",
+ "renditions": {
+ "original": {
+ "mimetype": "video/mp4",
+ "href": "/assets/640ff0bdfb5122dcf06a6fc3",
+ 'media': str(media_id),
+ },
+ },
+ "mimetype": "video/mp4",
+ "products": [
+ {"code": "123", "name": "Product A"},
+ {"code": "321", "name": "Product B"},
+ ],
+ },
+ "editor_0": {
+ "type": "audio",
+ "renditions": {
+ "original": {
+ "mimetype": "audio/mp3",
+ "href": "/assets/640feb9bfb5122dcf06a6f7c",
+ "media": "640feb9bfb5122dcf06a6f7c",
+ },
+ },
+ "mimetype": "audio/mp3",
+ "products": [{"code": "999", "name": "NSW News"}],
+ },
+ "editor_2": {
+ "type": "picture",
+ "renditions": {
+ "4-3": {
+ "href": "/assets/633d11b9fb5122dcf06a6f02",
+ "width": 800,
+ "height": 600,
+ "mimetype": "image/jpeg",
+ "media": "633d11b9fb5122dcf06a6f02",
+ },
+ "16-9": {
+ "href": "/assets/633d0f59fb5122dcf06a6ee8",
+ "width": 1280,
+ "height": 720,
+ "mimetype": "image/jpeg",
+ "media": "633d0f59fb5122dcf06a6ee8",
+ "poi": {},
+ },
+ },
+ "products": [{"code": "888"}],
+ },
+ "editor_3": None,
+ }
+ upload_binary('picture.jpg', client, media_id=str(media_id))
+ app.data.update('items', item['_id'], {
+ 'associations': associations,
+ 'body_html': (
+ '
Block 1
+ ''
+ ''
+ ''
+ '
+ 'Block 2
+ ''
+ ''
+ ''
+ '
Block 3'
+ ''
+ ''
+ ''
+ 'Block 4
+ ''
+ ''
+ ''
+ '
+ )
+ }, item)
+def test_download_single_block(client, app):
+ setup_block_embeds(client, app)
+ for _format in wire_formats:
+ resp = client.get(f'/download/{item["_id"]}?format={_format["format"]}', follow_redirects=True)
+ assert resp.status_code == 200
+ assert resp.mimetype == _format['mimetype']
+ assert (resp.headers.get('Content-Disposition') in
+ [f'attachment; filename={_format["filename"]}', f'attachment; filename="{_format["filename"]}"'])
+def setup_data(client, app):
+ setup_block_embeds(client, app)
+ app.config['EMBED_PRODUCT_FILTERING'] = True
+ app.data.insert('companies', [{
+ '_id': '3',
+ 'name': 'Block Conent.',
+ 'is_enabled': True,
+ 'embedded': {
+ "social_media_display": True,
+ "sdpermit_display": True,
+ "video_display": False,
+ "audio_display": True,
+ "images_display": True,
+ "all_display": False,
+ "social_media_download": True,
+ "video_download": True,
+ "audio_download": False,
+ "images_download": True,
+ "all_download": False,
+ "sdpermit_download": True
+ }
+ }])
+ user = app.data.find_one('users', req=None, first_name='admin')
+ assert user
+ app.data.update('users', user['_id'], {'company': '3'}, user)
+ app.data.insert('products', [{
+ '_id': 13,
+ 'name': 'product test',
+ 'sd_product_id': '123',
+ 'companies': ['3'],
+ 'is_enabled': True,
+ 'product_type': 'wire'
+ }])
+def start_test_block_download(client, app, setup_data):
+ for _format in wire_formats:
+ _file = download_zip_file(client, _format['format'], 'wire')
+ with zipfile.ZipFile(_file) as zf:
+ assert _format['filename'] in zf.namelist()
+ content = zf.open(_format['filename']).read()
+ if _format.get('test_content'):
+ _format['test_content'](content)
+def assert_history(app):
+ history = app.data.find('history', None, None)
+ assert (len(wire_formats) * len(items_ids)) == history.count()
+ assert 'download' == history[0]['action']
+ assert history[0].get('user')
+ assert history[0].get('versioncreated') + timedelta(seconds=2) >= utcnow()
+ assert history[0].get('item') in items_ids
+ assert history[0].get('version')
+ assert history[0].get('company') == '3'
+ assert history[0].get('section') == 'wire'
+def test_block_download_with_config(client, app, setup_data):
+ start_test_block_download(client, app, setup_data)
+ assert_history(app)
+def test_ninjs_download(client, app):
+ setup_block_embeds(client, app)
+ app.config['EMBED_PRODUCT_FILTERING'] = True
+ app.data.insert('companies', [{
+ '_id': '1',
+ 'name': 'Press co.',
+ 'is_enabled': True,
+ 'embedded': {
+ "social_media_display": True,
+ "video_display": True,
+ "audio_display": True,
+ "images_display": True,
+ "all_display": True,
+ "social_media_download": True,
+ "video_download": True,
+ "audio_download": True,
+ "images_download": True,
+ "all_download": True,
+ "sdpermit_display": True,
+ "sdpermit_download": True
+ }
+ }])
+ user = app.data.find_one('users', req=None, first_name='admin')
+ assert user
+ app.data.update('users', user['_id'], {'company': '1'}, user)
+ app.data.insert('products', [{
+ '_id': 10,
+ 'name': 'product test',
+ 'sd_product_id': '123',
+ 'companies': ['1'],
+ 'is_enabled': True,
+ 'product_type': 'wire'
+ }])
+ app.general_setting('news_api_allowed_renditions', 'Foo', default='16-9,4-3')
+ _file = download_zip_file(client, 'downloadninjs', 'wire')
+ with zipfile.ZipFile(_file) as zf:
+ assert filename('amazon-bookstore-opening.json', item) in zf.namelist()
+ content = zf.open(filename('amazon-bookstore-opening.json', item)).read()
+ ninjs_content_test(content)
+ history = app.data.find('history', None, None)
+ assert 4 == history.count()
+ assert 'download' in history[0]['action']
+ assert 'download' in history[1]['action']
+ assert history[0].get('user')
+ assert history[0].get('versioncreated') + timedelta(seconds=2) >= utcnow()
+ assert history[0].get('item') in items_ids
+ assert history[0].get('version')
+ assert history[0].get('company') == '1'
+ assert history[0].get('section') == 'wire'
+def test_ninjs_block_download_default(client, app):
+ setup_block_embeds(client, app)
+ app.config['EMBED_PRODUCT_FILTERING'] = True
+ app.data.insert('companies', [{
+ '_id': '1',
+ 'name': 'Press co.',
+ 'is_enabled': True,
+ 'embedded': {
+ "social_media_display": True,
+ "video_display": True,
+ "audio_display": True,
+ "images_display": True,
+ "all_display": True,
+ "social_media_download": False,
+ "video_download": False,
+ "audio_download": False,
+ "images_download": False,
+ "all_download": False,
+ "sdpermit_display": True,
+ "sdpermit_download": False
+ }
+ }])
+ user = app.data.find_one('users', req=None, first_name='admin')
+ assert user
+ app.data.update('users', user['_id'], {'company': '1'}, user)
+ app.data.insert('products', [{
+ '_id': 10,
+ 'name': 'product test',
+ 'sd_product_id': '123',
+ 'companies': ['1'],
+ 'is_enabled': True,
+ 'product_type': 'wire'
+ }])
+ app.general_setting('news_api_allowed_renditions', 'Foo', default='16-9,4-3')
+ _file = download_zip_file(client, 'downloadninjs', 'wire')
+ with zipfile.ZipFile(_file) as zf:
+ assert filename('amazon-bookstore-opening.json', item) in zf.namelist()
+ content = zf.open(filename('amazon-bookstore-opening.json', item)).read()
+ ninjs_content_test(content)
+ history = app.data.find('history', None, None)
+ assert 4 == history.count()
+ assert 'download' in history[0]['action']
+ assert 'download' in history[1]['action']
+ assert history[0].get('user')
+ assert history[0].get('versioncreated') + timedelta(seconds=2) >= utcnow()
+ assert history[0].get('item') in items_ids
+ assert history[0].get('version')
+ assert history[0].get('company') == '1'
+ assert history[0].get('section') == 'wire'
+def test_ninjs_block_download_example(client, app):
+ setup_block_embeds(client, app)
+ app.config['EMBED_PRODUCT_FILTERING'] = True
+ app.data.insert('companies', [{
+ '_id': '2',
+ 'name': 'Press01 co.',
+ 'is_enabled': True,
+ 'embedded': {
+ "social_media_display": True,
+ "video_display": True,
+ "audio_display": True,
+ "images_display": True,
+ "all_display": True,
+ "social_media_download": False,
+ "video_download": True,
+ "audio_download": False,
+ "images_download": False,
+ "all_download": False,
+ "sdpermit_display": True,
+ "sdpermit_download": False
+ }
+ }])
+ user = app.data.find_one('users', req=None, first_name='admin')
+ assert user
+ app.data.update('users', user['_id'], {'company': '2'}, user)
+ app.data.insert('products', [{
+ '_id': 10,
+ 'name': 'product test',
+ 'sd_product_id': '123',
+ 'companies': ['2'],
+ 'is_enabled': True,
+ 'product_type': 'wire'
+ }])
+ app.general_setting('news_api_allowed_renditions', 'Foo', default='16-9,4-3')
+ _file = download_zip_file(client, 'downloadninjs', 'wire')
+ with zipfile.ZipFile(_file) as zf:
+ assert filename('amazon-bookstore-opening.json', item) in zf.namelist()
+ content = zf.open(filename('amazon-bookstore-opening.json', item)).read()
+ ninjs_block_download_example(content)
+ history = app.data.find('history', None, None)
+ assert 4 == history.count()
+ assert 'download' in history[0]['action']
+ assert 'download' in history[1]['action']
+ assert history[0].get('user')
+ assert history[0].get('versioncreated') + timedelta(seconds=2) >= utcnow()
+ assert history[0].get('item') in items_ids
+ assert history[0].get('version')
+ assert history[0].get('company') == '2'
+ assert history[0].get('section') == 'wire'
diff --git a/tests/test_block_media.py b/tests/test_block_media.py
new file mode 100644
index 000000000..f98d32fd9
--- /dev/null
+++ b/tests/test_block_media.py
@@ -0,0 +1,589 @@
+import io
+import json
+import zipfile
+from datetime import timedelta
+import re
+import bson
+import lxml.etree
+import pytest
+from superdesk.utc import utcnow
+from .fixtures import items, init_items, init_auth, agenda_items, init_agenda_items # noqa
+from .test_push import upload_binary
+items_ids = [item['_id'] for item in items[:2]]
+item = items[:2][0]
+def setup_block_embeds(client, app):
+ media_id = bson.ObjectId()
+ associations = {
+ 'featuremedia': {
+ 'mimetype': 'image/jpeg',
+ 'type': 'picture',
+ 'renditions': {
+ 'baseImage': {
+ 'mimetype': 'image/jpeg',
+ 'media': str(media_id),
+ 'href': 'http://a.b.c/xxx.jpg',
+ },
+ '16-9': {
+ 'mimetype': 'image/jpeg',
+ 'href': 'http://a.b.c/xxx.jpg',
+ 'media': str(media_id),
+ 'width': 1280,
+ 'height': 720,
+ },
+ '4-3': {
+ "href": "/assets/633d11b9fb5122dcf06a6f02",
+ "width": 800,
+ "height": 600,
+ 'media': str(media_id),
+ "mimetype": "image/jpeg",
+ },
+ },
+ },
+ "editor_1": {
+ "type": "video",
+ "renditions": {
+ "original": {
+ "mimetype": "video/mp4",
+ "href": "/assets/640ff0bdfb5122dcf06a6fc3",
+ 'media': str(media_id),
+ },
+ },
+ "mimetype": "video/mp4",
+ # for base permission check,pass
+ "products": [
+ {"code": "123", "name": "Product A"},
+ {"code": "321", "name": "Product B"},
+ ],
+ },
+ "editor_0": {
+ "type": "audio",
+ "renditions": {
+ "original": {
+ "mimetype": "audio/mp3",
+ "href": "/assets/640feb9bfb5122dcf06a6f7c",
+ "media": "640feb9bfb5122dcf06a6f7c",
+ },
+ },
+ "mimetype": "audio/mp3",
+ # for base permission check ,disable
+ "products": [{"code": "999", "name": "NSW News"}],
+ },
+ "editor_2": {
+ "type": "picture",
+ "renditions": {
+ "4-3": {
+ "href": "/assets/633d11b9fb5122dcf06a6f02",
+ "width": 800,
+ "height": 600,
+ "mimetype": "image/jpeg",
+ "media": "633d11b9fb5122dcf06a6f02",
+ },
+ "16-9": {
+ "href": "/assets/633d0f59fb5122dcf06a6ee8",
+ "width": 1280,
+ "height": 720,
+ "mimetype": "image/jpeg",
+ "media": "633d0f59fb5122dcf06a6ee8",
+ "poi": {},
+ },
+ },
+ # for base permission check, pass
+ "products": [{"code": "123"}],
+ },
+ "editor_3": None,
+ }
+ upload_binary('picture.jpg', client, media_id=str(media_id))
+ app.data.update('items', item['_id'], {
+ 'associations': associations,
+ 'body_html': (
+ 'Block 1
+ ''
+ ''
+ ''
+ '
+ 'Block 2
+ ''
+ ''
+ ''
+ '
Block 3'
+ ''
+ ''
+ ''
+ 'Block 4
+ ''
+ ''
+ ''
+ '
+ )
+ }, item)
+def configure_app(app):
+ app.config['EMBED_PRODUCT_FILTERING'] = True
+ return app
+def setup_company_data(app, company_id, company_name, embedded):
+ app.data.insert('companies', [{
+ '_id': company_id,
+ 'name': company_name,
+ 'is_enabled': True,
+ 'embedded': embedded
+ }])
+ user = app.data.find_one('users', req=None, first_name='admin')
+ assert user
+ app.data.update('users', user['_id'], {'company': company_id}, user)
+ app.data.insert('products', [{
+ '_id': int(company_id) * 10,
+ 'name': 'product test',
+ # base product check
+ 'sd_product_id': '123',
+ 'companies': [company_id],
+ 'is_enabled': True,
+ 'product_type': 'wire'
+ }])
+ ('3', 'Block Conent.', {
+ "social_media_display": True, "sdpermit_display": True, "video_display": False,
+ "audio_display": True, "images_display": True, "all_display": False,
+ "social_media_download": True, "video_download": True, "audio_download": False,
+ "images_download": True, "all_download": False, "sdpermit_download": True
+ }),
+ ('2', 'Press01 co.', {
+ "social_media_display": True, "video_display": True, "audio_display": True,
+ "images_display": True, "all_display": True, "social_media_download": False,
+ "video_download": True, "audio_download": False, "images_download": False,
+ "all_download": False, "sdpermit_display": True, "sdpermit_download": False
+ }),
+ ('1', 'Press co.', {
+ "social_media_display": True, "video_display": True, "audio_display": True,
+ "images_display": True, "all_display": True, "social_media_download": False,
+ "video_download": False, "audio_download": False, "images_download": False,
+ "all_download": False, "sdpermit_display": True, "sdpermit_download": False
+ })
+def company_data(request):
+ return request.param
+def setup_data(client, app, configure_app, setup_block_embeds, company_data):
+ company_id, company_name, embedded = company_data
+ app.data.insert('companies', [{
+ '_id': company_id,
+ 'name': company_name,
+ 'is_enabled': True,
+ 'embedded': embedded
+ }])
+ user = app.data.find_one('users', req=None, first_name='admin')
+ assert user
+ app.data.update('users', user['_id'], {'company': company_id}, user)
+ app.data.insert('products', [{
+ '_id': int(company_id * 10),
+ 'name': 'product test',
+ 'sd_product_id': '123',
+ 'companies': [company_id],
+ 'is_enabled': True,
+ 'product_type': 'wire'
+ }])
+ return app, company_id
+def download_zip_file(client, _format, section):
+ resp = client.get('/download/{0}?format={1}&type={2}'.format(','.join(items_ids), _format, section),
+ follow_redirects=True)
+ assert resp.status_code == 200
+ assert resp.mimetype == 'application/zip'
+ assert resp.headers.get('Content-Disposition') == (
+ 'attachment; filename={0}-newsroom.zip'.format(utcnow().strftime("%Y%m%d%H%M"))
+ )
+ return io.BytesIO(resp.get_data())
+def text_content_test(content):
+ content = content.decode('utf-8').split('\n')
+ assert 'AMAZON-BOOKSTORE-OPENING' in content[0]
+ assert 'Amazon Is Opening More Bookstores' in content[1]
+ assert '' not in content
+def nitf_content_test(content):
+ root = lxml.etree.fromstring(content)
+ assert 'nitf' == root.tag
+ head = root.find('head')
+ assert items[0]['headline'] == head.find('title').text
+def ninjs_content_test(content):
+ data = json.loads(content)
+ assert data.get('associations', {}).get('editor_1')
+ assert not data.get('associations', {}).get('editor_0')
+ assert not data.get('associations', {}).get('editor_2')
+ assert data['headline'] == 'Amazon Is Opening More Bookstores'
+ assert 'editor_1' in data['body_html']
+ assert 'editor_0' not in data['body_html']
+def ninjs_block_download_video(content):
+ data = json.loads(content)
+ assert data.get('associations', {}).get('editor_1')
+ assert not data.get('associations', {}).get('editor_0')
+ assert not data.get('associations', {}).get('editor_2')
+ assert data['headline'] == 'Amazon Is Opening More Bookstores'
+ assert 'video' in data['body_html']
+ assert 'img' not in data['body_html']
+ assert 'blockquote' not in data['body_html']
+ assert 'audio' not in data['body_html']
+def ninjs_block_download_audio_image(content):
+ data = json.loads(content)
+ assert not data.get('associations', {}).get('editor_1')
+ assert not data.get('associations', {}).get('editor_0')
+ assert data.get('associations', {}).get('editor_2')
+ assert data['headline'] == 'Amazon Is Opening More Bookstores'
+ assert 'video' not in data['body_html']
+ assert 'img' in data['body_html']
+ assert 'blockquote' not in data['body_html']
+ assert 'audio' not in data['body_html']
+def htmlpackage_block_download_video(content):
+ data = json.loads(content)
+ assert data.get('associations', {}).get('editor_1')
+ assert not data.get('associations', {}).get('editor_0')
+ assert not data.get('associations', {}).get('editor_2')
+ assert data['headline'] == 'Amazon Is Opening More Bookstores'
+ assert 'video' in data['body_html']
+ assert 'img' not in data['body_html']
+ assert 'blockquote' not in data['body_html']
+ assert 'audio' not in data['body_html']
+def htmlpackage_block_download_audio_image(html_content_file):
+ html_content = html_content_file.decode('utf-8')
+ assert re.search(r'
Amazon Is Opening More Bookstores
', html_content)
+ assert not re.search(r'