diff --git a/.eslintrc.js b/.eslintrc.js index 8a1a8b3c8..6b87c2a40 100644 --- a/.eslintrc.js +++ b/.eslintrc.js @@ -10,45 +10,39 @@ module.exports = { "extends": ["eslint:recommended", "plugin:react/recommended"], "parserOptions": { "ecmaFeatures": { - "experimentalObjectRestSpread": true, "jsx": true }, + "ecmaVersion": 2018, "sourceType": "module" }, "plugins": [ "react" ], "rules": { - "indent": [ - "error", - 4 - ], - "linebreak-style": [ - "error", - "unix" - ], - "quotes": [ - "error", - "single" - ], - "semi": [ - "error", - "always" - ], - "no-console": [ - "error", - {"allow": ["warn", "error"]} - ], - "react/no-deprecated": [ - 1, - ], - "react/jsx-no-target-blank": [ - 0, - ], + "indent": ["error", 4], + "linebreak-style": ["error", "unix"], + "quotes": ["error", "single"], + "semi": ["error", "always"], + "no-console": ["error", {"allow": ["warn", "error"]}], + "react/no-deprecated": [1], + "react/jsx-no-target-blank": [0] }, "settings": { "react": { "version": "16.2" } - } + }, + "overrides": [ + { + "files": ["webpack.config.js", "*.config.js"], + "env": { + "node": true, + "commonjs": true + }, + "globals": { + "__dirname": "readonly", + "process": "readonly" + } + } + ] }; diff --git a/.github/workflows/tests-py38.yml b/.github/workflows/tests-py38.yml index afa638535..95c5aadc0 100644 --- a/.github/workflows/tests-py38.yml +++ b/.github/workflows/tests-py38.yml @@ -17,7 +17,11 @@ jobs: python-version: 3.8 - name: docker-compose - run: docker-compose -f .actions-docker-compose.yml up -d + run: docker compose -f .actions-docker-compose.yml up -d + + - run: | + sudo apt-get update + sudo apt-get install pkg-config libxml2-dev libxmlsec1-dev libxmlsec1-openssl - name: cache pip uses: actions/cache@v2 @@ -30,7 +34,7 @@ jobs: - name: pip install run: | - python -m pip install --upgrade pip wheel setuptools + python -m pip install --upgrade pip==24.0 wheel setuptools pip install -r dev-requirements.txt - name: flake8 diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 2427ff764..c11e638e9 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -17,7 +17,7 @@ jobs: python-version: 3.6 - name: docker-compose - run: docker-compose -f .actions-docker-compose.yml up -d + run: docker compose -f .actions-docker-compose.yml up -d - run: | sudo apt-get update diff --git a/assets/companies/actions.js b/assets/companies/actions.js index a568e31c1..157e3a9c0 100644 --- a/assets/companies/actions.js +++ b/assets/companies/actions.js @@ -116,7 +116,17 @@ export function postCompany() { }; } - +export function savePermissions(company, permissions) { + return function (dispatch) { + return server.postWithCsrfToken(`/companies/${company._id}/permissions`, permissions) + .then(() => { + notify.success(gettext('Company updated successfully')); + dispatch(fetchProducts()); + dispatch(fetchCompanies()); + }) + .catch((error) => errorHandler(error, dispatch, setError)); + }; +} /** * Fetches products @@ -137,19 +147,6 @@ export function fetchProducts() { * Save permissions for a company * */ -export function savePermissions(company, permissions) { - return function (dispatch) { - return server.post(`/companies/${company._id}/permissions`, permissions) - .then(() => { - notify.success(gettext('Company updated successfully')); - dispatch(fetchProducts()); - dispatch(fetchCompanies()); - }) - .catch((error) => errorHandler(error, dispatch, setError)); - }; -} - - /** * Deletes a company * diff --git a/assets/companies/components/CompanyPermissions.jsx b/assets/companies/components/CompanyPermissions.jsx index 0b6b16622..cac52cd31 100644 --- a/assets/companies/components/CompanyPermissions.jsx +++ b/assets/companies/components/CompanyPermissions.jsx @@ -1,41 +1,50 @@ -import React from 'react'; +import React, {Component} from 'react'; import PropTypes from 'prop-types'; -import { connect } from 'react-redux'; -import { gettext } from 'utils'; -import { get } from 'lodash'; +import {connect} from 'react-redux'; +import {gettext} from 'utils'; +import {get} from 'lodash'; import CheckboxInput from 'components/CheckboxInput'; import {savePermissions} from '../actions'; -class CompanyPermissions extends React.Component { - +class CompanyPermissions extends Component { constructor(props) { super(props); this.state = this.setup(); + this.handleSubmit = this.handleSubmit.bind(this); + this.handleChange = this.handleChange.bind(this); + this.togglePermission = this.togglePermission.bind(this); } setup() { - const products = {}; - - this.props.products.forEach((product) => { - products[product._id] = get(product, 'companies', []).includes(this.props.company._id); - }); - - const sections = {}; - - if (this.props.company.sections) { - Object.assign(sections, this.props.company.sections); - } else { - this.props.sections.forEach((section) => { - sections[section._id] = true; - }); - } + const {company, sections, products} = this.props; - const archive_access = !!this.props.company.archive_access; - const events_only = !!this.props.company.events_only; + const permissions = { + sections: company.sections || sections.reduce((acc, section) => ({...acc, [section._id]: true}), {}), + products: products.reduce((acc, product) => ({ + ...acc, + [product._id]: get(product, 'companies', []).includes(company._id) + }), {}), + archive_access: company.archive_access || false, + events_only: company.events_only || false, + embedded: { + social_media_display: get(company, 'embedded.social_media_display', false), + video_display: get(company, 'embedded.video_display', false), + audio_display: get(company, 'embedded.audio_display', false), + images_display: get(company, 'embedded.images_display', false), + all_display: get(company, 'embedded.all_display', false), + social_media_download: get(company, 'embedded.social_media_download', false), + video_download: get(company, 'embedded.video_download', false), + audio_download: get(company, 'embedded.audio_download', false), + images_download: get(company, 'embedded.images_download', false), + all_download: get(company, 'embedded.all_download', false), + sdpermit_display: get(company, 'embedded.sdpermit_display', false), + sdpermit_download: get(company, 'embedded.sdpermit_download', false), + }, + }; - return {sections, products, archive_access, events_only}; + return permissions; } componentDidUpdate(prevProps) { @@ -44,20 +53,57 @@ class CompanyPermissions extends React.Component { } } - toggle(key, _id) { - const field = this.state[key]; - field[_id] = !field[_id]; - this.setState({[key]: field}); + handleSubmit(event) { + event.preventDefault(); + this.props.savePermissions(this.props.company, this.state); + } + + handleChange(key, value) { + this.setState((prevState) => { + if (key.startsWith('embedded.')) { + const [, embeddedKey] = key.split('.'); + return { + ...prevState, + embedded: { + ...prevState.embedded, + [embeddedKey]: value, + }, + }; + } else { + return { + ...prevState, + [key]: value, + }; + } + }); + } + + togglePermission(key, _id, value) { + this.setState((prevState) => ({ + ...prevState, + [key]: { + ...prevState[key], + [_id]: value, + }, + })); } render() { + const {sections, products} = this.props; + const { + archive_access, + events_only, + embedded = {}, + } = this.state; + + const optionLabels = { + Display: 'Allow Visualization', + Download: 'Allow Download' + }; return ( -
-
{ - event.preventDefault(); - this.props.savePermissions(this.props.company, this.state); - }}> -
+
+ +
    @@ -65,59 +111,105 @@ class CompanyPermissions extends React.Component { this.setState({archive_access: !this.state.archive_access})} + value={archive_access} + onChange={() => this.handleChange('archive_access', !archive_access)} />
  • this.setState({events_only: !this.state.events_only})} + value={events_only} + onChange={() => this.handleChange('events_only', !events_only)} />
-
+
+ +

+ Default: All Content Types if none selected. Also SDpermit Media option + can start to use it if required. +

+
+ {['Display', 'Download'].map((option) => ( +
+
+ +
    + {[ + {label: 'Images', key: 'images'}, + {label: 'Audios', key: 'audio'}, + {label: 'Videos', key: 'video'}, + {label: 'Social Media', key: 'social_media'}, + {label: 'SDpermit Media', key: 'sdpermit'}, + {label: 'All Above', key: 'all'}, + ].map(({label, key}) => ( +
  • + + this.handleChange( + `embedded.${key}_${option.toLowerCase()}`, + !embedded[`${key}_${option.toLowerCase()}`] + ) + } + /> +
  • + ))} +
+
+
+ ))} +
+
+ +
    - {this.props['sections'].map((item) => ( -
  • + {sections.map((section) => ( +
  • this.toggle('sections', item._id)} /> + name={section._id} + label={section.name} + value={this.state.sections[section._id] || false} + onChange={(value) => this.togglePermission('sections', section._id, value)} + />
  • ))}
-
- {this.props['sections'].map((section) => ( - [, -
    - {this.props['products'].filter((p) => (p.product_type || 'wire').toLowerCase() === section._id.toLowerCase()) +
    + {sections.map((section) => ( + + +
      + {products + .filter((p) => (p.product_type || 'wire').toLowerCase() === section._id.toLowerCase()) .map((product) => (
    • this.toggle('products', product._id)} /> + value={this.state.products[product._id] || false} + onChange={(value) => this.togglePermission('products', product._id, value)} + />
    • ))} -
    ] +
+ ))}
-
-
+ +
@@ -133,14 +225,26 @@ CompanyPermissions.propTypes = { sections: PropTypes.object, archive_access: PropTypes.bool, events_only: PropTypes.bool, + embedded: PropTypes.shape({ + social_media_display: PropTypes.bool, + video_display: PropTypes.bool, + audio_display: PropTypes.bool, + images_display: PropTypes.bool, + all_display: PropTypes.bool, + sdpermit_display: PropTypes.bool, + social_media_download: PropTypes.bool, + video_download: PropTypes.bool, + audio_download: PropTypes.bool, + images_download: PropTypes.bool, + sdpermit_download: PropTypes.bool, + all_download: PropTypes.bool, + }), }).isRequired, - sections: PropTypes.arrayOf(PropTypes.shape({ _id: PropTypes.string.isRequired, name: PropTypes.string.isRequired, })), products: PropTypes.arrayOf(PropTypes.object).isRequired, - savePermissions: PropTypes.func.isRequired, }; diff --git a/assets/components/cards/render/CardMeta.jsx b/assets/components/cards/render/CardMeta.jsx index 77f674a49..d7a7588d0 100644 --- a/assets/components/cards/render/CardMeta.jsx +++ b/assets/components/cards/render/CardMeta.jsx @@ -1,9 +1,9 @@ import React from 'react'; import PropTypes from 'prop-types'; import {isEmpty} from 'lodash'; -import { gettext, shortDate, fullDate } from 'utils'; +import {gettext, shortDate, fullDate} from 'utils'; -function CardMeta({wordCount, pictureAvailable, source, versioncreated, displayDivider, slugline ,audio, video}) { +function CardMeta({wordCount, pictureAvailable, source, versioncreated, displayDivider, slugline, audio, video}) { return (
@@ -15,12 +15,13 @@ function CardMeta({wordCount, pictureAvailable, source, versioncreated, displayD } {!isEmpty(audio) && - + + } {!isEmpty(video) && - + } {displayDivider && } diff --git a/assets/images.js b/assets/images.js new file mode 100644 index 000000000..327d09176 --- /dev/null +++ b/assets/images.js @@ -0,0 +1 @@ +import './images/poster_default.jpg'; diff --git a/assets/images/poster_default.jpg b/assets/images/poster_default.jpg new file mode 100644 index 000000000..c4e4e7bdd Binary files /dev/null and b/assets/images/poster_default.jpg differ diff --git a/assets/server.js b/assets/server.js index 68b6dab53..f5d690710 100644 --- a/assets/server.js +++ b/assets/server.js @@ -29,6 +29,12 @@ class Server { * @param {String} url * @return {Promise} */ + + getCsrfToken() { + return this.get('/companies/get-csrf-token') + .then(response => response.csrf_token); + } + get(url) { return fetch(url, options({})) .then(checkStatus); @@ -63,6 +69,19 @@ class Server { })).then(checkStatus); } + postWithCsrfToken(url, data) { + return this.getCsrfToken().then(csrfToken => { + return fetch(url, options({ + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'X-CSRF-Token': csrfToken + }, + body: JSON.stringify(data), + })).then(checkStatus); + }); + } + /** * Make POST request to url in keeps the format of the input * diff --git a/assets/ui/components/ArticleBodyHtml.jsx b/assets/ui/components/ArticleBodyHtml.jsx index 761801a9d..448fe0b56 100644 --- a/assets/ui/components/ArticleBodyHtml.jsx +++ b/assets/ui/components/ArticleBodyHtml.jsx @@ -4,62 +4,83 @@ import {get, memoize} from 'lodash'; import {formatHTML} from 'utils'; import {connect} from 'react-redux'; import {selectCopy} from '../../wire/actions'; +import DOMPurify from 'dompurify'; +const fallbackDefault = '/static/poster_default.jpg'; -/** - * using component to fix iframely loading - * https://iframely.com/docs/reactjs - */ class ArticleBodyHtml extends React.PureComponent { constructor(props) { super(props); + this.state = { + sanitizedHtml: '', + }; this.copyClicked = this.copyClicked.bind(this); this.clickClicked = this.clickClicked.bind(this); - - // use memoize so this function is only called when `body_html` changes + this.preventContextMenu = this.preventContextMenu.bind(this); this.getBodyHTML = memoize(this._getBodyHTML.bind(this)); - this.bodyRef = React.createRef(); + this.players = new Map(); } componentDidMount() { + this.updateSanitizedHtml(); this.loadIframely(); + this.setupPlyrPlayers(); this.executeScripts(); document.addEventListener('copy', this.copyClicked); document.addEventListener('click', this.clickClicked); + this.addContextMenuEventListeners(); } - clickClicked(event) { - if (event != null) { - const target = event.target; - - if (target && target.tagName === 'A' && this.isLinkExternal(target.href)) { - event.preventDefault(); - event.stopPropagation(); - - // security https://mathiasbynens.github.io/rel-noopener/ - var nextWindow = window.open(); - - nextWindow.opener = null; - nextWindow.location.href = target.href; - } + componentDidUpdate(prevProps) { + if (prevProps.item !== this.props.item) { + this.updateSanitizedHtml(); } + this.loadIframely(); + this.executeScripts(); + this.setupPlyrPlayers(); + this.addContextMenuEventListeners(); } - isLinkExternal(href) { - try { - const url = new URL(href); + componentWillUnmount() { + document.removeEventListener('copy', this.copyClicked); + document.removeEventListener('click', this.clickClicked); + this.removeContextMenuEventListeners(); - // Check if the hosts are different and protocol is http or https - return url.host !== window.location.host && ['http:', 'https:'].includes(url.protocol); - } catch (e) { - // will throw if string is not a valid link - return false; - } + this.players.forEach(player => player.destroy()); + this.players.clear(); } - componentDidUpdate() { - this.loadIframely(); - this.executeScripts(); + updateSanitizedHtml() { + const item = this.props.item; + const html = this.getBodyHTML( + get(item, 'es_highlight.body_html.length', 0) > 0 ? + item.es_highlight.body_html[0] : + item.body_html + ); + this.sanitizeHtml(html); + } + + sanitizeHtml(html) { + if (!html) { + this.setState({sanitizedHtml: ''}); + return; + } + const sanitizedHtml = DOMPurify.sanitize(html, { + ADD_TAGS: ['iframe', 'video', 'audio', 'figure', 'figcaption', 'script', 'twitter-widget', 'fb:like', + 'blockquote', 'div'], + ADD_ATTR: [ + 'allow', 'allowfullscreen', 'frameborder', 'scrolling', 'src', 'width', 'height', + 'data-plyr-config', 'data-plyr', 'aria-label', 'aria-hidden', 'focusable', + 'class', 'role', 'tabindex', 'controls', 'download', 'target', + 'async', 'defer', 'data-tweet-id', 'data-href', + 'data-instgrm-captioned', 'data-instgrm-permalink', + 'data-flourish-embed', 'data-src' + ], + ALLOW_DATA_ATTR: true, + ALLOW_UNKNOWN_PROTOCOLS: true, + KEEP_CONTENT: true + }); + this.setState({sanitizedHtml}); } loadIframely() { @@ -70,6 +91,7 @@ class ArticleBodyHtml extends React.PureComponent { } } + executeScripts() { const tree = this.bodyRef.current; const loaded = []; @@ -78,10 +100,6 @@ class ArticleBodyHtml extends React.PureComponent { return; } - if (window.Plyr != null) { - window.Plyr.setup('.js-player'); - } - tree.querySelectorAll('script').forEach((s) => { if (s.hasAttribute('src') && !loaded.includes(s.getAttribute('src'))) { let url = s.getAttribute('src'); @@ -117,8 +135,9 @@ class ArticleBodyHtml extends React.PureComponent { document.body.removeChild(script); }; - script.onerrror = (error) => { - throw new URIError('The script ' + error.target.src + 'didn\'t load.'); + script.onerror = (error) => { + console.error('Script load error:', error); + throw new URIError('The script ' + error.target.src + ' didn\'t load.'); }; document.body.appendChild(script); @@ -126,13 +145,130 @@ class ArticleBodyHtml extends React.PureComponent { }); } - copyClicked() { - this.props.reportCopy(this.props.item); + setupPlyrPlayers() { + const tree = this.bodyRef.current; + if (tree == null || window.Plyr == null) { + return; + } + + tree.querySelectorAll('.js-player:not(.plyr--setup)').forEach(element => { + if (!this.players.has(element)) { + const player = new window.Plyr(element, { + seekTime: 1, + keyboard: {focused: true, global: true}, + tooltips: {controls: true, seek: true}, + captions: {active: true, language: 'auto', update: true} + }); + this.players.set(element, player); + this.checkVideoLoading(player, element.getAttribute('src')); + this.setupMovePlayback(player); + } + }); } - componentWillUnmount() { - document.removeEventListener('copy', this.copyClicked); - document.removeEventListener('click', this.clickClicked); + setupMovePlayback(player) { + const container = player.elements.container; + let isScrubbing = false; + let wasPaused = false; + + container.addEventListener('mousedown', startScrubbing); + document.addEventListener('mousemove', scrub); + document.addEventListener('mouseup', stopScrubbing); + + function startScrubbing(event) { + if (event.target.closest('.plyr__progress')) { + isScrubbing = true; + wasPaused = player.paused; + player.pause(); + scrub(event); + } + } + + function scrub(event) { + if (!isScrubbing) return; + + const progress = player.elements.progress; + const rect = progress.getBoundingClientRect(); + const percent = Math.min(Math.max((event.clientX - rect.left) / rect.width, 0), 1); + player.currentTime = percent * player.duration; + } + + function stopScrubbing() { + if (isScrubbing) { + isScrubbing = false; + if (!wasPaused) { + player.play(); + } + } + } + + } + + checkVideoLoading(player, videoSrc) { + if (!videoSrc || !videoSrc.startsWith('/assets/')) { + return; + } + const loadHandler = () => { + const checkVideoContent = () => { + if (player.media.videoWidth > 0 && player.media.videoHeight > 0) { + const canvas = document.createElement('canvas'); + canvas.width = player.media.videoWidth; + canvas.height = player.media.videoHeight; + const ctx = canvas.getContext('2d'); + + ctx.drawImage(player.media, 0, 0, canvas.width, canvas.height); + const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height); + const data = imageData.data; + // loop for none blank pixel + let stepSize = 10; // Adjust the step size + for (let i = 0; i < data.length; i += stepSize * 4) { + if (data[i] > 0 || data[i + 1] > 0 || data[i + 2] > 0) { + console.warn('Pixel content detected, poster not needed'); + return true; + } + } + } + return false; + }; + + const attemptContentCheck = () => { + if (checkVideoContent()) { + player.poster = null; + console.warn('Pixel content detected, poster removed'); + return true; + } + return false; + }; + + let attemptCount = 0; + const maxAttempts = 1; + const checkInterval = setInterval(() => { + if (attemptContentCheck() || attemptCount >= maxAttempts) { + clearInterval(checkInterval); + player.off('loadeddata', loadHandler); + + if (attemptCount >= maxAttempts) { + console.warn('Setting fallback poster'); + player.poster = fallbackDefault; + } + } + attemptCount++; + }, 500); + }; + + player.on('error', (error) => { + console.error('Error details and location:', { + message: error.message, + code: error.code, + type: error.type, + target: error.target, + currentTarget: error.currentTarget, + originalTarget: error.originalTarget, + error: error.error + }); + player.poster = fallbackDefault; + }); + player.on('loadeddata', loadHandler); } _getBodyHTML(bodyHtml) { @@ -141,17 +277,9 @@ class ArticleBodyHtml extends React.PureComponent { this._updateImageEmbedSources(formatHTML(bodyHtml)); } - /** - * Update Image Embeds to use the Web APIs Assets endpoint - * - * @param html - The `body_html` value (could also be the ES Highlight version) - * @returns {string} - * @private - */ _updateImageEmbedSources(html) { const item = this.props.item; - // Get the list of Original Rendition IDs for all Image Associations const imageEmbedOriginalIds = Object .keys(item.associations || {}) .filter((key) => key.startsWith('editor_')) @@ -159,35 +287,27 @@ class ArticleBodyHtml extends React.PureComponent { .filter((value) => value); if (!imageEmbedOriginalIds.length) { - // This item has no Image Embeds - // return the supplied html as-is return html; } - // Create a DOM node tree from the supplied html - // We can then efficiently find and update the image sources const container = document.createElement('div'); let imageSourcesUpdated = false; container.innerHTML = html; container - .querySelectorAll('img') + .querySelectorAll('img,video,audio') .forEach((imageTag) => { - // Using the tag's `src` attribute, find the Original Rendition's ID const originalMediaId = imageEmbedOriginalIds.find((mediaId) => ( !imageTag.src.startsWith('/assets/') && imageTag.src.includes(mediaId)) ); if (originalMediaId) { - // We now have the Original Rendition's ID - // Use that to update the `src` attribute to use Newshub's Web API imageSourcesUpdated = true; imageTag.src = `/assets/${originalMediaId}`; } }); - // Find all Audio and Video tags and mark them up for the player container.querySelectorAll('video, audio') .forEach((vTag) => { vTag.classList.add('js-player'); @@ -195,7 +315,6 @@ class ArticleBodyHtml extends React.PureComponent { vTag.setAttribute('data-plyr-config', '{"controls": ["play-large", "play",' + '"progress", "volume", "mute", "rewind", "fast-forward", "current-time",' + '"captions", "restart", "duration"]}'); - } else { vTag.setAttribute('data-plyr-config', '{"controls": ["play-large", "play",' + '"progress", "volume", "mute", "rewind", "fast-forward", "current-time",' + @@ -203,35 +322,77 @@ class ArticleBodyHtml extends React.PureComponent { '"' + vTag.getAttribute('src') + '?item_id=' + item._id + '"' + '}}'); } - imageSourcesUpdated = true; }); - - // If Image tags were not updated, then return the supplied html as-is return imageSourcesUpdated ? container.innerHTML : html; } - render() { - const item = this.props.item; - const html = this.getBodyHTML( - get(item, 'es_highlight.body_html.length', 0) > 0 ? - item.es_highlight.body_html[0] : - item.body_html - ); + clickClicked(event) { + if (event != null) { + const target = event.target; + if (target && target.tagName === 'A' && this.isLinkExternal(target.href)) { + event.preventDefault(); + event.stopPropagation(); + const nextWindow = window.open(target.href, '_blank', 'noopener'); - if (!html) { + if (nextWindow) { + nextWindow.opener = null; + } + } + } + } + + isLinkExternal(href) { + try { + const url = new URL(href); + return url.host !== window.location.host && ['http:', 'https:'].includes(url.protocol); + } catch (e) { + return false; + } + } + + copyClicked() { + this.props.reportCopy(this.props.item); + } + + addContextMenuEventListeners() { + const tree = this.bodyRef.current; + if (tree) { + tree.querySelectorAll('[data-disable-download="true"]').forEach((element) => { + element.addEventListener('contextmenu', this.preventContextMenu); + }); + } + } + + removeContextMenuEventListeners() { + const tree = this.bodyRef.current; + if (tree) { + tree.querySelectorAll('[data-disable-download="true"]').forEach((element) => { + element.removeEventListener('contextmenu', this.preventContextMenu); + }); + } + } + + preventContextMenu(event) { + event.preventDefault(); + } + + render() { + if (!this.state.sanitizedHtml) { return null; } return ( -
+
+
+
); } } @@ -252,4 +413,4 @@ const mapDispatchToProps = (dispatch) => ({ reportCopy: (item) => dispatch(selectCopy(item)) }); -export default connect(null, mapDispatchToProps)(ArticleBodyHtml); \ No newline at end of file +export default connect(null, mapDispatchToProps)(ArticleBodyHtml); diff --git a/dev-requirements.txt b/dev-requirements.txt index 541fde948..a1de46a22 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -3,7 +3,7 @@ flake8 sphinx sphinx-autobuild -pytest==3.10.0 +pytest==7.0.0 pytest-cov==2.6.1 pytest-mock==1.10.1 responses>=0.10.6,<0.11 diff --git a/features/news_api_auth.feature b/features/news_api_auth.feature index ccef347d8..4403bf4e9 100644 --- a/features/news_api_auth.feature +++ b/features/news_api_auth.feature @@ -22,7 +22,7 @@ Feature: News API Authorization "company" : "#companies._id#", "enabled" : true }] - """+ + """ When we save API token Given "products" """ @@ -118,7 +118,7 @@ Feature: News API Authorization "product_type": "news_api" }] """ - When we set header "X-Forwarded-For" to value "123.123.123.001, 192.192.192.192" + When we set header "X-Forwarded-For" to value "123.123.123.1, 192.192.192.192" When we get "news/search?q=fish&include_fields=body_html" Then we get list with 1 items """ diff --git a/features/news_api_syndicate.feature b/features/news_api_syndicate.feature new file mode 100755 index 000000000..14ca27283 --- /dev/null +++ b/features/news_api_syndicate.feature @@ -0,0 +1,162 @@ +Feature: News API Syndicate ATOM Search + + Background: Initial setup + Given "companies" + """ + [{"name": "Test Company", "is_enabled" : true}] + """ + Given "news_api_tokens" + """ + [{"company" : "#companies._id#", "enabled" : true}] + """ + When we save API token + + Scenario: Test atom of syndicate request response restricted by product + Given "items" + """ + [{"body_html": "Once upon a time there was a fish who could swim", "headline": "headline 1", + "firstpublished": "#DATE-1#", "versioncreated": "#DATE#"}, + {"body_html": "Once upon a time there was a aardvark that could not swim", "headline": "headline 2", + "firstpublished": "#DATE-1#", "versioncreated": "#DATE#"}] + """ + Given "products" + """ + [{"name": "A fishy Product", + "decsription": "a product for those interested in fish", + "companies" : [ + "#companies._id#" + ], + "query": "fish", + "product_type": "news_api" + }] + """ + When we get "syndicate?formatter=atom" + Then we get OK response + Then we "get" "<![CDATA[headline 1]]>" in syndicate xml response + Then we "don't get" "<![CDATA[headline 2]]>" in syndicate xml response + + Scenario: test syndicate atom request search q + Given "items" + """ + [{"body_html": "

Once upon a time there was a monkey who could swim

", "headline": "headline 1", + "byline": "S Smith", "pubstatus": "usable", "service" : [{"name" : "Australian General News", "code" : "a"}], + "description_text": "summary", + "associations" : { + "featuremedia" : { + "mimetype" : "image/jpeg", + "description_text" : "Deputy Prime Minister Michael McCormack during Question Time", + "version" : "1", + "byline" : "Mick Tsikas/AAP PHOTOS", + "body_text" : "QUESTION TIME ALT", + "renditions" : { + "16-9" : { + "href" : "/assets/5fc5dce16369ab07be3325fa", + "height" : 720, + "width" : 1280, + "media" : "5fc5dce16369ab07be3325fa", + "poi" : { + "x" : 453, + "y" : 335 + }, + "mimetype" : "image/jpeg" + } + } + }}, + "firstpublished": "#DATE-1#", "versioncreated": "#DATE#"}] + """ + When we get "syndicate?formatter=atom&q=monkey" + Then we get OK response + Then we "get" "<![CDATA[headline 1]]>" in syndicate xml response + Then we "get" "Mick Tsikas/AAP PHOTOS" in syndicate xml response + + Scenario: test atom request with embedded image + Given "products" + """ + [{"name": "A fishy Product", + "decsription": "a product for those interested in fish", + "companies" : [ + "#companies._id#" + ], + "query": "fish", + "product_type": "news_api" + }, + {"name": "A pic product", + "decsription": "pic product", + "companies" : [ + "#companies._id#" + ], + "query": "", + "sd_product_id": "1", + "product_type": "news_api" + }] + """ + Given "items" + """ + [{"body_html": "

Once upon a time there was a fish who could swim

\"altSome caption
", + "headline": "headline 1", + "byline": "S Smith", "pubstatus": "usable", "service" : [{"name" : "Australian General News", "code" : "a"}], + "description_text": "summary", + "associations" : { + "editor_19" : { + "mimetype" : "image/jpeg", + "description_text" : "Deputy Prime Minister Michael McCormack during Question Time", + "version" : "1", + "byline" : "Mick Tsikas/AAP PHOTOS", + "body_text" : "QUESTION TIME ALT", + "products": [{"code": "1"}], + "renditions" : { + "16-9" : { + "href" : "/assets/5fc5dce16369ab07be3325fa", + "height" : 720, + "width" : 1280, + "media" : "5fc5dce16369ab07be3325fa", + "poi" : { + "x" : 453, + "y" : 335 + }, + "mimetype" : "image/jpeg" + } + } + }}, + "firstpublished": "#DATE-1#", "versioncreated": "#DATE#"}] + """ + When we get "syndicate?formatter=atom" + Then we get OK response + Then we "get" "<![CDATA[headline 1]]>" in syndicate xml response + Then we "get" "5fc5dce16369ab07be3325fa" in atom xml response + Then we "get" "src="http://" in atom xml response + + Scenario: Atom request response restricted by featured image product + Given "items" + """ + [{"body_html": "Once upon a time there was a fish who could swim", "headline": "headline 1", + "firstpublished": "#DATE-1#", "versioncreated": "#DATE#", + "associations": {"featuremedia": {"products": [{"code": "1234"}], "renditions": {"original": {}} }}}, + {"body_html": "Once upon a time there was a aardvark that could not swim", "headline": "headline 2", + "firstpublished": "#DATE-1#", "versioncreated": "#DATE#", + "associations": {"featuremedia": {"products": [{"code": "4321"}], "renditions": {"original": {}} }}}] + """ + Given "products" + """ + [{"name": "A fishy Product", + "decsription": "a product for those interested in fish", + "companies" : [ + "#companies._id#" + ], + "query": "Once upon a time", + "product_type": "news_api" + }, + {"name": "A fishy superdesk product", + "description": "a superdesk product restricting images in the atom feed", + "companies" : [ + "#companies._id#" + ], + "sd_product_id": "1234", + "product_type": "news_api" + } + ] + """ + When we get "syndicate?formatter=atom" + Then we get OK response + Then we "get" "<![CDATA[headline 1]]>" in syndicate xml response + Then we "don't get" "<![CDATA[headline 2]]>" in syndicate xml response diff --git a/features/steps/steps.py b/features/steps/steps.py index 9bd4f6aeb..60342b2a8 100644 --- a/features/steps/steps.py +++ b/features/steps/steps.py @@ -17,6 +17,7 @@ from wooper.general import ( get_body ) +import logging @when('we save API token') @@ -74,3 +75,21 @@ def we_get_text_in_atom_xml_response(context, get, text): assert (text in get_body(context.response)) else: assert (text not in get_body(context.response)) + + +@then('we "{get}" "{text}" in syndicate xml response') +def we_get_text_in_syndicate_xml_response(context, get, text): + with context.app.test_request_context(context.app.config['URL_PREFIX']): + response_body = get_body(context.response) + logging.info("Response body: %s", response_body) + assert (isinstance(get_body(context.response), str)) + try: + tree = lxml.etree.fromstring(response_body.encode('utf-8')) + assert '{http://www.w3.org/2005/Atom}feed' == tree.tag + if get == 'get': + assert (text in response_body) + else: + assert (text not in response_body) + except lxml.etree.XMLSyntaxError as e: + logging.error("XML parsing error: %s", e) + raise AssertionError("Response is not valid XML") diff --git a/karma.conf.js b/karma.conf.js index d17325f2e..d8e96e201 100644 --- a/karma.conf.js +++ b/karma.conf.js @@ -13,6 +13,7 @@ module.exports = function(config) { 'assets/tests.js': ['webpack', 'sourcemap'], }, + webpack: { module: webpackConfig.module, resolve: webpackConfig.resolve, diff --git a/newsroom/companies/companies.py b/newsroom/companies/companies.py index 270744a79..71f48ec50 100644 --- a/newsroom/companies/companies.py +++ b/newsroom/companies/companies.py @@ -47,13 +47,66 @@ class CompaniesResource(newsroom.Resource): 'archive_access': { 'type': 'boolean', }, + 'company_type': { + 'type': 'string', + 'nullable': True, + }, 'events_only': { 'type': 'boolean', 'default': False, }, - 'company_type': { - 'type': 'string', - 'nullable': True, + 'embedded': { + 'type': 'dict', + 'schema': { + 'video_display': { + 'type': 'boolean', + 'default': False, + }, + 'audio_display': { + 'type': 'boolean', + 'default': False, + }, + 'social_media_display': { + 'type': 'boolean', + 'default': False, + }, + 'images_display': { + 'type': 'boolean', + 'default': False, + }, + 'sdpermit_display': { + 'type': 'boolean', + 'default': False, + }, + 'all_display': { + 'type': 'boolean', + 'default': False, + }, + 'social_media_download': { + 'type': 'boolean', + 'default': False, + }, + 'video_download': { + 'type': 'boolean', + 'default': False, + }, + 'audio_download': { + 'type': 'boolean', + 'default': False, + }, + 'images_download': { + 'type': 'boolean', + 'default': False, + }, + 'sdpermit_download': { + 'type': 'boolean', + 'default': False, + }, + 'all_download': { + 'type': 'boolean', + 'default': False, + } + } }, 'account_manager': { 'type': 'string' diff --git a/newsroom/companies/views.py b/newsroom/companies/views.py index b80a78e4a..2144b7b7e 100644 --- a/newsroom/companies/views.py +++ b/newsroom/companies/views.py @@ -3,7 +3,7 @@ import flask from bson import ObjectId -from flask import jsonify, current_app as app +from flask import current_app as app from flask_babel import gettext from superdesk import get_resource_service from werkzeug.exceptions import NotFound @@ -13,6 +13,8 @@ from newsroom.utils import query_resource, find_one, get_entity_or_404, get_json_or_400, set_original_creator, \ set_version_creator import ipaddress +from flask import request, jsonify, current_app, session +import secrets def get_company_types_options(company_types): @@ -154,14 +156,45 @@ def update_products(updates, company_id): def update_company(data, _id): updates = {k: v for k, v in data.items() if k in ('sections', 'archive_access', 'events_only')} + + embedded_fields = [ + 'video_display', 'audio_display', 'social_media_display', 'images_display', 'sdpermit_display', 'all_display', + 'social_media_download', 'video_download', 'audio_download', 'images_download', 'sdpermit_download', + 'all_download' + ] + + if 'embedded' in data: + embedded_updates = {k: v for k, v in data['embedded'].items() if k in embedded_fields} + if embedded_updates: + updates['embedded'] = embedded_updates + get_resource_service('companies').patch(_id, updates=updates) @blueprint.route('/companies/<_id>/permissions', methods=['POST']) @account_manager_only def save_company_permissions(_id): + csrf_token = request.headers.get('X-CSRF-Token') + expected_csrf_token = session.pop('csrf_token') orig = get_entity_or_404(_id, 'companies') data = get_json_or_400() - update_products(data['products'], _id) - update_company(data, orig['_id']) - return jsonify(), 200 + if not csrf_token or csrf_token != expected_csrf_token: + current_app.logger.error("Permisson CSRF validation failed:") + return jsonify({"error": "Permisson CSRF token validation failed"}), 403 + + try: + update_products(data['products'], _id) + update_company(data, orig['_id']) + except Exception as e: + current_app.logger.error(f"Error updating company permissions: {str(e)}") + return jsonify({"error": "An error occurred while updating permissions"}), 500 + + return jsonify({"message": "Permissions updated successfully"}), 200 + + +@blueprint.route('/companies/get-csrf-token', methods=['GET']) +@account_manager_only +def get_csrf_token(): + csrf_token = secrets.token_hex(32) + session['csrf_token'] = csrf_token + return jsonify({'csrf_token': csrf_token}) diff --git a/newsroom/news_api/news/atom/atom.py b/newsroom/news_api/news/atom/atom.py deleted file mode 100644 index ebc49dad6..000000000 --- a/newsroom/news_api/news/atom/atom.py +++ /dev/null @@ -1,157 +0,0 @@ -import superdesk -import flask -from eve.methods.get import get_internal -from lxml import etree -from lxml.etree import SubElement -from superdesk.utc import utcnow -from flask import current_app as app, g -import datetime -import logging -from newsroom.news_api.utils import check_featuremedia_association_permission, update_embed_urls -from newsroom.wire.formatters.utils import remove_unpermissioned_embeds - -blueprint = superdesk.Blueprint('atom', __name__) - - -logger = logging.getLogger(__name__) - - -def init_app(app): - superdesk.blueprint(blueprint, app) - - -@blueprint.route('/atom', methods=['GET']) -@blueprint.route('/atom/', methods=['GET']) -def get_atom(token=None): - - def _format_date(date): - iso8601 = date.isoformat() - if date.tzinfo: - return iso8601 - return iso8601 + 'Z' - - def _format_update_date(date): - DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S" - return date.strftime(DATETIME_FORMAT) + 'Z' - - auth = app.auth - if not auth.authorized([], None, flask.request.method): - if token: - if not auth.check_auth(token, allowed_roles=None, resource=None, method='GET'): - return auth.authenticate() - else: - return auth.authenticate() - - XML_ROOT = '' - - _message_nsmap = {None: 'http://www.w3.org/2005/Atom', 'dcterms': 'http://purl.org/dc/terms/', - 'media': 'http://search.yahoo.com/mrss/', - 'mi': 'http://schemas.ingestion.microsoft.com/common/'} - -# feed = etree.Element('feed', attrib={'lang': 'en-us'}, nsmap=_message_nsmap) - feed = etree.Element('feed', nsmap=_message_nsmap) - SubElement(feed, 'title').text = etree.CDATA('{} Atom Feed'.format(app.config['SITE_NAME'])) - SubElement(feed, 'updated').text = _format_update_date(utcnow()) - SubElement(SubElement(feed, 'author'), 'name').text = app.config['SITE_NAME'] - SubElement(feed, 'id').text = flask.url_for('atom.get_atom', _external=True) - SubElement(feed, 'link', attrib={'href': flask.url_for('atom.get_atom', _external=True), 'rel': 'self'}) - - response = get_internal('news/search') -# TODO allow products to be passed as arguments -# req = ParsedRequest() -# req.args = ImmutableMultiDict([('include_fields', 'associations')]) -# response = superdesk.get_resource_service('news/search').get(req=req, lookup=None) - - # for item in response.docs: - for item in response[0].get('_items'): - try: - complete_item = superdesk.get_resource_service('items').find_one(req=None, _id=item.get('_id')) - - # If featuremedia is not allowed for the company don't add the item - if ((complete_item.get('associations') or {}).get('featuremedia') or {}).get('renditions'): - if not check_featuremedia_association_permission(complete_item): - continue - remove_unpermissioned_embeds(complete_item, g.user, 'news_api') - - entry = SubElement(feed, 'entry') - - # If the item has any parents we use the id of the first, this should be constant throught the update - # history - if complete_item.get('ancestors') and len(complete_item.get('ancestors')): - SubElement(entry, 'id').text = complete_item.get('ancestors')[0] - else: - SubElement(entry, 'id').text = complete_item.get('_id') - - SubElement(entry, 'title').text = etree.CDATA(complete_item.get('headline')) - SubElement(entry, 'published').text = _format_date(complete_item.get('firstpublished')) - SubElement(entry, 'updated').text = _format_update_date(complete_item.get('versioncreated')) - if token: - SubElement(entry, 'link', attrib={'rel': 'self', 'href': flask.url_for('news/item.get_item', - item_id=item.get('_id'), - format='TextFormatter', - token=token, - _external=True)}) - else: - SubElement(entry, 'link', attrib={'rel': 'self', 'href': flask.url_for('news/item.get_item', - item_id=item.get('_id'), - format='TextFormatter', - _external=True)}) - - if complete_item.get('byline'): - name = complete_item.get('byline') - if complete_item.get('source') and not app.config['COPYRIGHT_HOLDER'].lower() == complete_item.get( - 'source', '').lower(): - name = name + " - " + complete_item.get('source') - SubElement(SubElement(entry, 'author'), 'name').text = name - else: - SubElement(SubElement(entry, 'author'), 'name').text = complete_item.get('source') if complete_item.get( - 'source') else app.config['COPYRIGHT_HOLDER'] - - SubElement(entry, 'rights').text = complete_item.get('source', '') - - if complete_item.get('pubstatus') == 'usable': - SubElement(entry, etree.QName(_message_nsmap.get('dcterms'), 'valid')).text = \ - 'start={}; end={}; scheme=W3C-DTF'.format(_format_date(utcnow()), - _format_date(utcnow() + datetime.timedelta(days=30))) - else: - # in effect a kill set the end date into the past - SubElement(entry, etree.QName(_message_nsmap.get('dcterms'), 'valid')).text = \ - 'start={}; end={}; scheme=W3C-DTF'.format(_format_date(utcnow()), - _format_date(utcnow() - datetime.timedelta(days=30))) - - categories = [{'name': s.get('name')} for s in complete_item.get('service', [])] - for category in categories: - SubElement(entry, 'category', attrib={'term': category.get('name')}) - - SubElement(entry, 'summary').text = etree.CDATA(complete_item.get('description_text', '')) - - update_embed_urls(complete_item, token) - - SubElement(entry, 'content', attrib={'type': 'html'}).text = etree.CDATA(complete_item.get('body_html', '')) - - if ((complete_item.get('associations') or {}).get('featuremedia') or {}).get('renditions'): - image = ((complete_item.get('associations') or {}).get('featuremedia') or {}).get('renditions').get( - "16-9") - metadata = ((complete_item.get('associations') or {}).get('featuremedia') or {}) - - url = flask.url_for('assets.get_item', _external=True, asset_id=image.get('media'), - token=token) if token else flask.url_for( - 'assets.get_item', _external=True, asset_id=image.get('media')) - - media = SubElement(entry, etree.QName(_message_nsmap.get('media'), 'content'), - attrib={'url': url, 'type': image.get('mimetype'), 'medium': 'image'}) - - SubElement(media, etree.QName(_message_nsmap.get('media'), 'credit')).text = metadata.get('byline') - SubElement(media, etree.QName(_message_nsmap.get('media'), 'title')).text = metadata.get( - 'description_text') - SubElement(media, etree.QName(_message_nsmap.get('media'), 'text')).text = metadata.get('body_text') - focr = SubElement(media, etree.QName(_message_nsmap.get('mi'), 'focalRegion')) - SubElement(focr, etree.QName(_message_nsmap.get('mi'), 'x1')).text = str(image.get('poi').get('x')) - SubElement(focr, etree.QName(_message_nsmap.get('mi'), 'x2')).text = str(image.get('poi').get('x')) - SubElement(focr, etree.QName(_message_nsmap.get('mi'), 'y1')).text = str(image.get('poi').get('y')) - SubElement(focr, etree.QName(_message_nsmap.get('mi'), 'y2')).text = str(image.get('poi').get('y')) - except Exception as ex: - logger.exception('processing {} - {}'.format(item.get('_id'), ex)) - - return flask.Response(XML_ROOT + etree.tostring(feed, method='xml', pretty_print=True).decode('utf-8'), - mimetype='application/atom+xml') diff --git a/newsroom/news_api/news/rss/rss.py b/newsroom/news_api/news/rss/rss.py deleted file mode 100644 index af6a733f6..000000000 --- a/newsroom/news_api/news/rss/rss.py +++ /dev/null @@ -1,164 +0,0 @@ -import superdesk -import flask -from eve.methods.get import get_internal -from lxml import etree -from lxml.etree import SubElement -from superdesk.utc import utcnow -from flask import current_app as app, g -from email import utils -import datetime -import logging -from newsroom.news_api.utils import check_featuremedia_association_permission, update_embed_urls -from newsroom.wire.formatters.utils import remove_unpermissioned_embeds - -blueprint = superdesk.Blueprint('rss', __name__) - - -logger = logging.getLogger(__name__) - - -def init_app(app): - superdesk.blueprint(blueprint, app) - - -@blueprint.route('/rss', methods=['GET']) -@blueprint.route('/rss/', methods=['GET']) -def get_rss(token=None): - - def _format_date(date): - iso8601 = date.isoformat() - if date.tzinfo: - return iso8601 - return iso8601 + 'Z' - - def _format_date_2(date): - DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S" - return date.strftime(DATETIME_FORMAT) + 'Z' - - def _format_date_3(date): - return utils.format_datetime(date) - - auth = app.auth - if not auth.authorized([], None, flask.request.method): - if token: - if not auth.check_auth(token, allowed_roles=None, resource=None, method='GET'): - return auth.authenticate() - else: - return auth.authenticate() - - XML_ROOT = '' - - _message_nsmap = {'dcterms': 'http://purl.org/dc/terms/', 'media': 'http://search.yahoo.com/mrss/', - 'dc': 'http://purl.org/dc/elements/1.1/', 'mi': 'http://schemas.ingestion.microsoft.com/common/', - 'content': 'http://purl.org/rss/1.0/modules/content/'} - -# feed = etree.Element('feed', attrib={'lang': 'en-us'}, nsmap=_message_nsmap) - feed = etree.Element('rss', attrib={'version': '2.0'}, nsmap=_message_nsmap) - channel = SubElement(feed, 'channel') - SubElement(channel, 'title').text = '{} RSS Feed'.format(app.config['SITE_NAME']) - SubElement(channel, 'description').text = '{} RSS Feed'.format(app.config['SITE_NAME']) - SubElement(channel, 'link').text = flask.url_for('rss.get_rss', _external=True) - - response = get_internal('news/search') -# req = ParsedRequest() -# req.args = {'include_fields': 'abstract'} -# response = superdesk.get_resource_service('news/search').get(req=req, lookup=None) - - for item in response[0].get('_items'): - try: - complete_item = superdesk.get_resource_service('items').find_one(req=None, _id=item.get('_id')) - - # If featuremedia is not allowed for the company don't add the item - if ((complete_item.get('associations') or {}).get('featuremedia') or {}).get('renditions'): - if not check_featuremedia_association_permission(complete_item): - continue - remove_unpermissioned_embeds(complete_item, g.user, 'news_api') - - entry = SubElement(channel, 'item') - - # If the item has any parents we use the id of the first, this should be constant throught the update - # history - if complete_item.get('ancestors') and len(complete_item.get('ancestors')): - SubElement(entry, 'guid').text = complete_item.get('ancestors')[0] - else: - SubElement(entry, 'guid').text = complete_item.get('_id') - - SubElement(entry, 'title').text = etree.CDATA(complete_item.get('headline')) - SubElement(entry, 'pubDate').text = _format_date_3(complete_item.get('firstpublished')) - SubElement(entry, etree.QName(_message_nsmap.get('dcterms'), 'modified')).text = _format_date_2( - complete_item.get('versioncreated')) - if token: - SubElement(entry, 'link').text = flask.url_for('news/item.get_item', - item_id=item.get('_id'), - format='TextFormatter', - token=token, - _external=True) - else: - SubElement(entry, 'link').text = flask.url_for('news/item.get_item', - item_id=item.get('_id'), - format='TextFormatter', - _external=True) - - if complete_item.get('byline'): - name = complete_item.get('byline') - if complete_item.get('source') and not app.config['COPYRIGHT_HOLDER'].lower() == complete_item.get( - 'source', '').lower(): - name = name + " - " + complete_item.get('source') - SubElement(entry, etree.QName(_message_nsmap.get('dc'), 'creator')).text = name - else: - SubElement(entry, etree.QName(_message_nsmap.get('dc'), 'creator')).text = \ - complete_item.get('source') if complete_item.get('source') else app.config['COPYRIGHT_HOLDER'] - - SubElement(entry, 'source', attrib={'url': flask.url_for('rss.get_rss', _external=True)}).text = \ - complete_item.get('source', '') - - if complete_item.get('pubstatus') == 'usable': - SubElement(entry, etree.QName(_message_nsmap.get('dcterms'), 'valid')).text = \ - 'start={}; end={}; scheme=W3C-DTF'.format(_format_date(utcnow()), - _format_date(utcnow() + datetime.timedelta(days=30))) - else: - # in effect a kill set the end date into the past - SubElement(entry, etree.QName(_message_nsmap.get('dcterms'), 'valid')).text = \ - 'start={}; end={}; scheme=W3C-DTF'.format(_format_date(utcnow()), - _format_date(utcnow() - datetime.timedelta(days=30))) - - categories = [{'name': s.get('name')} for s in complete_item.get('service', [])] \ - + [{'name': s.get('name')} for s in complete_item.get('subject', [])] \ - + [{'name': s.get('name')} for s in complete_item.get('place', [])] \ - + [{'name': k} for k in complete_item.get('keywords', [])] - for category in categories: - SubElement(entry, 'category').text = category.get('name') - - SubElement(entry, 'description').text = etree.CDATA(complete_item.get('description_text', '')) - - update_embed_urls(complete_item, token) - - SubElement(entry, etree.QName(_message_nsmap.get('content'), 'encoded')).text = etree.CDATA( - complete_item.get('body_html', '')) - - if ((complete_item.get('associations') or {}).get('featuremedia') or {}).get('renditions'): - image = ((complete_item.get('associations') or {}).get('featuremedia') or {}).get('renditions').get( - "16-9") - metadata = ((complete_item.get('associations') or {}).get('featuremedia') or {}) - - url = flask.url_for('assets.get_item', _external=True, asset_id=image.get('media'), - token=token) if token else flask.url_for( - 'assets.get_item', _external=True, asset_id=image.get('media')) - - media = SubElement(entry, etree.QName(_message_nsmap.get('media'), 'content'), - attrib={'url': url, 'type': image.get('mimetype'), 'medium': 'image'}) - - SubElement(media, etree.QName(_message_nsmap.get('media'), 'credit')).text = metadata.get('byline') - SubElement(media, etree.QName(_message_nsmap.get('media'), 'title')).text = metadata.get( - 'description_text') - SubElement(media, etree.QName(_message_nsmap.get('media'), 'text')).text = metadata.get('body_text') - focr = SubElement(media, etree.QName(_message_nsmap.get('mi'), 'focalRegion')) - SubElement(focr, etree.QName(_message_nsmap.get('mi'), 'x1')).text = str(image.get('poi').get('x')) - SubElement(focr, etree.QName(_message_nsmap.get('mi'), 'x2')).text = str(image.get('poi').get('x')) - SubElement(focr, etree.QName(_message_nsmap.get('mi'), 'y1')).text = str(image.get('poi').get('y')) - SubElement(focr, etree.QName(_message_nsmap.get('mi'), 'y2')).text = str(image.get('poi').get('y')) - except Exception as ex: - logger.exception('processing {} - {}'.format(item.get('_id'), ex)) - - return flask.Response(XML_ROOT + etree.tostring(feed, method='xml', pretty_print=True).decode('utf-8'), - mimetype='application/rss+xml') diff --git a/newsroom/news_api/news/syndicate/__init__.py b/newsroom/news_api/news/syndicate/__init__.py new file mode 100644 index 000000000..4cf192da9 --- /dev/null +++ b/newsroom/news_api/news/syndicate/__init__.py @@ -0,0 +1,44 @@ +import superdesk +import logging +from flask import request, make_response, jsonify +from eve.methods.get import get_internal +from .error_handlers import process_error_response +from .auth import authenticate +from .syndicate_handlers import FORMAT_HANDLERS, FEED_GENERATORS as FORMAT_HANDLERS_INIT +from .resource import NewsAPISyndicateResource +from .service import NewsAPISyndicateService +from werkzeug.routing import BaseConverter + +syndicate_blueprint = superdesk.Blueprint('syndicate', __name__) + +logger = logging.getLogger(__name__) + + +class RegExConverter(BaseConverter): + def __init__(self, map, regex='[^/]+'): + super().__init__(map) + self.regex = regex + + +@syndicate_blueprint.route('/', methods=['GET']) +@syndicate_blueprint.route('//', methods=['GET']) +@authenticate +def get_syndicate_feed(syndicate_type, token=None): + response = get_internal('news/syndicate') + format_param = request.args.get('formatter') + if format_param: + format_param = format_param.upper().strip() + try: + return FORMAT_HANDLERS[format_param]['handler'](response[0], format_param) + except ValueError as e: + error_message = f"An error occurred in converting response to {format_param}: {e}" + error_response = make_response(jsonify({'error': error_message}), 400) + return process_error_response(error_response) + + return FORMAT_HANDLERS_INIT[syndicate_type.lower()](response[0]) + + +def init_app(app): + superdesk.register_resource('news/syndicate', NewsAPISyndicateResource, NewsAPISyndicateService, _app=app) + app.url_map.converters['regex'] = RegExConverter + superdesk.blueprint(syndicate_blueprint, app) diff --git a/newsroom/news_api/news/syndicate/auth.py b/newsroom/news_api/news/syndicate/auth.py new file mode 100644 index 000000000..ed0d2074d --- /dev/null +++ b/newsroom/news_api/news/syndicate/auth.py @@ -0,0 +1,18 @@ +from functools import wraps +from flask import current_app as app +import flask + + +def authenticate(func): + @wraps(func) + def wrapper(*args, **kwargs): + auth = app.auth + token = kwargs.get('token') + if not auth.authorized([], None, flask.request.method): + if token: + if not auth.check_auth(token, allowed_roles=None, resource=None, method=flask.request.method): + return auth.authenticate() + else: + return auth.authenticate() + return func(*args, **kwargs) + return wrapper diff --git a/newsroom/news_api/news/syndicate/error_handlers.py b/newsroom/news_api/news/syndicate/error_handlers.py new file mode 100644 index 000000000..e53efe803 --- /dev/null +++ b/newsroom/news_api/news/syndicate/error_handlers.py @@ -0,0 +1,54 @@ +from typing import Union, Mapping, Dict +from flask import request, make_response, jsonify + + +def handle_unsupported_format(data, formatter=None): + error_message = f"Unsupported formatter: {formatter if formatter is not None else ''} " + error_response = make_response(jsonify({'error': error_message}), 400) + return process_error_response(error_response) + + +def process_error_response(response): + error_message: Union[bytes, str] = response.data.decode( + 'utf-8') if response.data else 'error message empty,contact admin for log information' + + def syndicate_examples() -> Mapping[str, str]: + examples = { + 'json': ( + f"{request.url_root}syndicate?format=json&q=trump&start_date=2020-04-01" + f"&timezone=Australia/Sydney" + ), + 'atom': ( + f"{request.url_root}syndicate?format=atom&start_date=now-30d&end_date=now" + f"&timezone=Australia/Sydney&include_fields=headline,byline,slugline,description_html," + f"located,keywords,source,subject,place,wordcount,charcount,body_html,readtime,profile," + f"service,genre,associations" + ), + 'rss': ( + f"{request.url_root}syndicate?format=rss&exclude_fields=version,versioncreated," + f"firstcreated" + ) + } + return examples + + def syndicate_parameters() -> Dict[str, str]: + parameters = { + 'format': "Specifies the desired format of the response. Accepts 'json', 'atom', or 'rss'.", + # ... (other parameters) ... + } + return parameters + + error_payload: Dict[str, Dict[str, Union[int, str, Dict[str, str], Mapping[str, str]]]] = { + "error": { + "code": response.status_code, + "message": error_message, + }, + "usage": { + "endpoint": str(request.url), + "method": request.method, + "description": "This API endpoint allows formats (JSON, ATOM, RSS).", + "parameters": syndicate_examples(), + "examples": syndicate_parameters(), + }, + } + return jsonify(error_payload) diff --git a/newsroom/news_api/news/syndicate/resource.py b/newsroom/news_api/news/syndicate/resource.py new file mode 100644 index 000000000..b37e32d23 --- /dev/null +++ b/newsroom/news_api/news/syndicate/resource.py @@ -0,0 +1,12 @@ +from newsroom import Resource + + +class NewsAPISyndicateResource(Resource): + resource_title = 'News Syndicate' + datasource = { + 'search_backend': 'elastic', + 'source': 'items', + } + + item_methods = [] + resource_methods = [] diff --git a/newsroom/news_api/news/syndicate/service.py b/newsroom/news_api/news/syndicate/service.py new file mode 100644 index 000000000..1f4934ff2 --- /dev/null +++ b/newsroom/news_api/news/syndicate/service.py @@ -0,0 +1,335 @@ +from content_api.errors import BadParameterValueError +from newsroom.news_api.news.search_service import NewsAPINewsService +from superdesk import get_resource_service +from lxml import etree +from lxml.etree import SubElement +from superdesk.utc import utcnow +from flask import current_app as app, g, Response, url_for +import logging +from newsroom.news_api.utils import check_featuremedia_association_permission, update_embed_urls +from newsroom.wire.formatters.utils import remove_unpermissioned_embeds +from datetime import timedelta +from email import utils + + +class NewsAPISyndicateService(NewsAPINewsService): + allowed_params = { + 'start_date', 'end_date', + 'include_fields', 'exclude_fields', + 'max_results', 'page_size', 'page', 'timezone', + 'version', 'where', 'item_source', + 'q', 'default_operator', 'filter', + 'service', 'subject', 'genre', 'urgency', + 'priority', 'type', 'item_source', 'timezone', 'products', + 'exclude_ids', 'formatter', 'sort' + } + default_sort = [{'versioncreated': 'asc'}] + + allowed_exclude_fields = {'version', 'firstcreated', 'headline', 'byline', 'slugline'} + + def on_fetched(self, doc): + self._enhance_hateoas(doc) + super().on_fetched(doc) + + def _enhance_hateoas(self, doc): + doc.setdefault('_links', {}) + doc['_links']['parent'] = { + 'title': 'Home', + 'href': '/' + }, + self._hateoas_set_item_links(doc) + + def _hateoas_set_item_links(self, doc): + for item in doc.get('_items') or []: + doc_id = str(item['_id']) + item.setdefault('_links', {}) + item['_links']['self'] = { + 'href': 'news/item/{}'.format(doc_id), + 'title': 'News Item' + } + item.pop('_updated', None) + item.pop('_created', None) + item.pop('_etag', None) + + def prefill_search_query(self, search, req=None, lookup=None): + super().prefill_search_query(search, req, lookup) + + if search.args.get('exclude_ids'): + search.args['exclude_ids'] = search.args['exclude_ids'].split(',') + + try: + search.args['max_results'] = int(search.args.get('max_results') or 200) + except ValueError: + raise BadParameterValueError('Max Results must be a number') + + search.args['size'] = search.args['max_results'] + + @staticmethod + def _format_date(date): + iso8601 = date.isoformat() + if date.tzinfo: + return iso8601 + return iso8601 + 'Z' + + @staticmethod + def _format_update_date(date): + DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S" + return date.strftime(DATETIME_FORMAT) + 'Z' + + @staticmethod + def _format_date_publish(date): + return utils.format_datetime(date) + + @staticmethod + def generate_atom_feed(response, token=None): + XML_ROOT = '' + _message_nsmap = {None: 'http://www.w3.org/2005/Atom', 'dcterms': 'http://purl.org/dc/terms/', + 'media': 'http://search.yahoo.com/mrss/', + 'mi': 'http://schemas.ingestion.microsoft.com/common/'} + + feed = etree.Element('feed', nsmap=_message_nsmap) + SubElement(feed, 'title').text = etree.CDATA('{} Atom Feed'.format(app.config['SITE_NAME'])) + SubElement(feed, 'updated').text = __class__._format_update_date(utcnow()) + SubElement(SubElement(feed, 'author'), 'name').text = app.config['SITE_NAME'] + feed_url = url_for('syndicate.get_syndicate_feed', + syndicate_type='syndicate', + _external=True, + formatter='atom') + + SubElement(feed, 'id').text = feed_url + SubElement(feed, 'link', + attrib={'href': feed_url, 'rel': 'self'}) + item_resource = get_resource_service('items') + image = None + for item in response['_items']: + try: + complete_item = item_resource.find_one(req=None, _id=item.get('_id')) + # If featuremedia is not allowed for the company don't add the item + if ((complete_item.get('associations') or {}).get('featuremedia') or {}).get('renditions'): + if not check_featuremedia_association_permission(complete_item): + continue + remove_unpermissioned_embeds(complete_item, g.user, 'news_api') + entry = SubElement(feed, 'entry') + # If the item has any parents we use the id of the first, this should be constant throught the update + # history + if complete_item.get('ancestors') and len(complete_item.get('ancestors')): + SubElement(entry, 'id').text = complete_item.get('ancestors')[0] + else: + SubElement(entry, 'id').text = complete_item.get('_id') + + SubElement(entry, 'title').text = etree.CDATA(complete_item.get('headline')) + SubElement(entry, 'published').text = __class__._format_date(complete_item.get('firstpublished')) + SubElement(entry, 'updated').text = __class__._format_update_date(complete_item.get('versioncreated')) + if token: + SubElement(entry, 'link', attrib={'rel': 'self', 'href': url_for('news/item.get_item', + item_id=item.get('_id'), + format='TextFormatter', + token=token, + _external=True)}) + else: + SubElement(entry, 'link', attrib={'rel': 'self', 'href': url_for('news/item.get_item', + item_id=item.get('_id'), + format='TextFormatter', + _external=True)}) + + if complete_item.get('byline'): + name = complete_item.get('byline') + if complete_item.get('source') and not app.config['COPYRIGHT_HOLDER'].lower() == complete_item.get( + 'source', '').lower(): + name = name + " - " + complete_item.get('source') + SubElement(SubElement(entry, 'author'), 'name').text = name + else: + SubElement(SubElement(entry, 'author'), 'name').text = complete_item.get( + 'source') if complete_item.get( + 'source') else app.config['COPYRIGHT_HOLDER'] + + SubElement(entry, 'rights').text = complete_item.get('source', '') + + if complete_item.get('pubstatus') == 'usable': + SubElement(entry, etree.QName(_message_nsmap.get('dcterms'), 'valid')).text = \ + 'start={}; end={}; scheme=W3C-DTF'.format(__class__._format_date(utcnow()), + __class__._format_date(utcnow() + timedelta(days=30))) + else: + SubElement(entry, etree.QName(_message_nsmap.get('dcterms'), 'valid')).text = \ + 'start={}; end={}; scheme=W3C-DTF'.format(__class__._format_date(utcnow()), + __class__._format_date(utcnow() - timedelta(days=30))) + + categories = [{'name': s.get('name')} for s in complete_item.get('service', [])] + for category in categories: + SubElement(entry, 'category', attrib={'term': category.get('name')}) + + SubElement(entry, 'summary').text = etree.CDATA(complete_item.get('description_text', '')) + update_embed_urls(complete_item, token) + SubElement(entry, 'content', attrib={'type': 'html'}).text = etree.CDATA( + complete_item.get('body_html', '')) + if ((complete_item.get('associations') or {}).get('featuremedia') or {}).get('renditions'): + image = ((complete_item.get('associations') or {}).get('featuremedia') or {}).get('renditions').get( + "16-9") + if image: + metadata = ((complete_item.get('associations') or {}).get('featuremedia') or {}) + + url = url_for('assets.get_item', _external=True, asset_id=image.get('media'), + token=token) if token else url_for( + 'assets.get_item', _external=True, asset_id=image.get('media')) + + media = SubElement(entry, etree.QName(_message_nsmap.get('media'), 'content'), + attrib={'url': url, 'type': image.get('mimetype'), 'medium': 'image'}) + + SubElement(media, etree.QName(_message_nsmap.get('media'), 'credit')).text = metadata.get( + 'byline') + SubElement(media, etree.QName(_message_nsmap.get('media'), 'title')).text = metadata.get( + 'description_text') + SubElement(media, etree.QName(_message_nsmap.get('media'), 'text')).text = metadata.get( + 'body_text') + if image.get('poi'): + focr = SubElement(media, etree.QName(_message_nsmap.get('mi'), 'focalRegion')) + SubElement(focr, etree.QName(_message_nsmap.get('mi'), 'x1')).text = str( + image.get('poi').get('x')) + SubElement(focr, etree.QName(_message_nsmap.get('mi'), 'x2')).text = str( + image.get('poi').get('x')) + SubElement(focr, etree.QName(_message_nsmap.get('mi'), 'y1')).text = str( + image.get('poi').get('y')) + SubElement(focr, etree.QName(_message_nsmap.get('mi'), 'y2')).text = str( + image.get('poi').get('y')) + except Exception as ex: + __class__.handle_exception(item, ex) + continue + return Response(XML_ROOT + etree.tostring(feed, method='xml', pretty_print=True).decode('utf-8'), + mimetype='application/atom+xml') + + @staticmethod + def generate_rss_feed(response, token=None): + XML_ROOT = '' + + _message_nsmap = {'dcterms': 'http://purl.org/dc/terms/', 'media': 'http://search.yahoo.com/mrss/', + 'dc': 'http://purl.org/dc/elements/1.1/', + 'mi': 'http://schemas.ingestion.microsoft.com/common/', + 'content': 'http://purl.org/rss/1.0/modules/content/'} + + feed = etree.Element('rss', attrib={'version': '2.0'}, nsmap=_message_nsmap) + channel = SubElement(feed, 'channel') + SubElement(channel, 'title').text = '{} RSS Feed'.format(app.config['SITE_NAME']) + SubElement(channel, 'description').text = '{} RSS Feed'.format(app.config['SITE_NAME']) + feed_url = url_for('syndicate.get_syndicate_feed', + syndicate_type='syndicate', + _external=True, + formatter='rss') + SubElement(channel, 'link').text = feed_url + item_resource = get_resource_service('items') + image = None + for item in response['_items']: + try: + complete_item = item_resource.find_one(req=None, _id=item.get('_id')) + + if ((complete_item.get('associations') or {}).get('featuremedia') or {}).get('renditions'): + if not check_featuremedia_association_permission(complete_item): + continue + remove_unpermissioned_embeds(complete_item, g.user, 'news_api') + + entry = SubElement(channel, 'item') + if complete_item.get('ancestors') and len(complete_item.get('ancestors')): + SubElement(entry, 'guid').text = complete_item.get('ancestors')[0] + else: + SubElement(entry, 'guid').text = complete_item.get('_id') + + SubElement(entry, 'title').text = etree.CDATA(complete_item.get('headline')) + SubElement(entry, 'pubDate').text = __class__._format_date_publish(complete_item.get('firstpublished')) + SubElement(entry, + etree.QName(_message_nsmap.get('dcterms'), 'modified')).text = __class__._format_update_date( + complete_item.get('versioncreated')) + if token: + SubElement(entry, 'link').text = url_for('news/item.get_item', + item_id=item.get('_id'), + format='TextFormatter', + token=token, + _external=True) + else: + SubElement(entry, 'link').text = url_for('news/item.get_item', + item_id=item.get('_id'), + format='TextFormatter', + _external=True) + + if complete_item.get('byline'): + name = complete_item.get('byline') + if complete_item.get('source') and not app.config[ + 'COPYRIGHT_HOLDER'].lower() == complete_item.get( + 'source', '').lower(): + name = name + " - " + complete_item.get('source') + SubElement(entry, etree.QName(_message_nsmap.get('dc'), 'creator')).text = name + else: + SubElement(entry, etree.QName(_message_nsmap.get('dc'), 'creator')).text = \ + complete_item.get('source') if complete_item.get('source') else app.config[ + 'COPYRIGHT_HOLDER'] + + SubElement(entry, 'source', + attrib={'url': feed_url}).text = \ + complete_item.get('source', '') + + if complete_item.get('pubstatus') == 'usable': + SubElement(entry, etree.QName(_message_nsmap.get('dcterms'), 'valid')).text = \ + 'start={}; end={}; scheme=W3C-DTF'.format(__class__._format_date_publish( + complete_item.get('firstpublished')), + __class__._format_date( + utcnow() + timedelta(days=30))) + else: + # in effect a kill set the end date into the past + SubElement(entry, etree.QName(_message_nsmap.get('dcterms'), 'valid')).text = \ + 'start={}; end={}; scheme=W3C-DTF'.format(__class__._format_date(utcnow()), + __class__._format_date( + utcnow() - timedelta(days=30))) + + categories = [{'name': s.get('name')} for s in complete_item.get('service', [])] \ + + [{'name': s.get('name')} for s in complete_item.get('subject', [])] \ + + [{'name': s.get('name')} for s in complete_item.get('place', [])] \ + + [{'name': k} for k in complete_item.get('keywords', [])] + for category in categories: + SubElement(entry, 'category').text = category.get('name') + + SubElement(entry, 'description').text = etree.CDATA(complete_item.get('description_text', '')) + + update_embed_urls(complete_item, token) + + SubElement(entry, etree.QName(_message_nsmap.get('content'), 'encoded')).text = etree.CDATA( + complete_item.get('body_html', '')) + + if ((complete_item.get('associations') or {}).get('featuremedia') or {}).get('renditions'): + image = ((complete_item.get('associations') or {}).get('featuremedia') or {}).get( + 'renditions').get( + "16-9") + if image: + metadata = ((complete_item.get('associations') or {}).get('featuremedia') or {}) + + url = url_for('assets.get_item', _external=True, asset_id=image.get('media'), + token=token) if token else url_for( + 'assets.get_item', _external=True, asset_id=image.get('media')) + + media = SubElement(entry, etree.QName(_message_nsmap.get('media'), 'content'), + attrib={'url': url, 'type': image.get('mimetype'), 'medium': 'image'}) + + SubElement(media, etree.QName(_message_nsmap.get('media'), 'credit')).text = metadata.get( + 'byline') + SubElement(media, etree.QName(_message_nsmap.get('media'), 'title')).text = metadata.get( + 'description_text') + SubElement(media, etree.QName(_message_nsmap.get('media'), 'text')).text = metadata.get( + 'body_text') + if image.get('poi'): + focr = SubElement(media, etree.QName(_message_nsmap.get('mi'), 'focalRegion')) + SubElement(focr, etree.QName(_message_nsmap.get('mi'), 'x1')).text = str( + image.get('poi').get('x')) + SubElement(focr, etree.QName(_message_nsmap.get('mi'), 'x2')).text = str( + image.get('poi').get('x')) + SubElement(focr, etree.QName(_message_nsmap.get('mi'), 'y1')).text = str( + image.get('poi').get('y')) + SubElement(focr, etree.QName(_message_nsmap.get('mi'), 'y2')).text = str( + image.get('poi').get('y')) + except Exception as ex: + __class__.handle_exception(item, ex) + continue + return Response(XML_ROOT + etree.tostring(feed, method='xml', pretty_print=True).decode('utf-8'), + mimetype='application/rss+xml') + + @staticmethod + def handle_exception(item, ex): + item_id = item.get('_id') + log_message = f"Processing {item_id} - {str(ex)}" + logging.exception(log_message) diff --git a/newsroom/news_api/news/syndicate/syndicate_handlers.py b/newsroom/news_api/news/syndicate/syndicate_handlers.py new file mode 100644 index 000000000..934449abe --- /dev/null +++ b/newsroom/news_api/news/syndicate/syndicate_handlers.py @@ -0,0 +1,41 @@ +from collections import defaultdict +from .service import NewsAPISyndicateService +from flask import make_response, jsonify +from .error_handlers import process_error_response + + +def convert_to_syndicate(data, formatter): + # remove token from requirments + if formatter and formatter == 'ATOM': + return NewsAPISyndicateService.generate_atom_feed(data) + elif formatter and formatter == 'RSS': + return NewsAPISyndicateService.generate_rss_feed(data) + elif formatter and formatter == 'JSON': + return jsonify(data) + else: + raise ValueError("Invalid formatter specified") + + +FORMAT_HANDLERS = defaultdict( + lambda: {'handler': handle_unsupported_format, 'content_type': 'application/json'}, + { + 'ATOM': {'handler': convert_to_syndicate, 'content_type': 'application/xml'}, + 'RSS': {'handler': convert_to_syndicate, 'content_type': 'application/xml'}, + 'JSON': {'handler': convert_to_syndicate, 'content_type': 'application/json'}, + } +) +FEED_GENERATORS = defaultdict( + lambda: handle_unsupported_format, + { + 'atom': NewsAPISyndicateService.generate_atom_feed, + 'rss': NewsAPISyndicateService.generate_rss_feed, + } +) + + +def handle_unsupported_format(data, formatter=None): + if formatter and formatter != 'JSON': + error_message = f"Unsupported formatter: {formatter if formatter is not None else 'empty value'} " + error_response = make_response(jsonify({'error': error_message}), 400) + return process_error_response(error_response) + return jsonify(data) diff --git a/newsroom/news_api/settings.py b/newsroom/news_api/settings.py index 3bb4c3849..e327c0e9b 100644 --- a/newsroom/news_api/settings.py +++ b/newsroom/news_api/settings.py @@ -12,6 +12,7 @@ 'content_api.items_versions', 'newsroom.news_api.section_filters', 'newsroom.news_api.products', + 'newsroom.news_api.news.syndicate', 'newsroom.news_api.formatters', 'newsroom.news_api.news', 'newsroom.news_api.news.item.item', @@ -21,8 +22,6 @@ 'newsroom.news_api.api_audit', 'newsroom.news_api.news.assets.assets', 'newsroom.upload', - 'newsroom.news_api.news.atom.atom', - 'newsroom.news_api.news.rss.rss', 'newsroom.history' ] diff --git a/newsroom/static/poster_default.jpg b/newsroom/static/poster_default.jpg new file mode 100644 index 000000000..c4e4e7bdd Binary files /dev/null and b/newsroom/static/poster_default.jpg differ diff --git a/newsroom/upload.py b/newsroom/upload.py index 3d179b6b7..6da0c9f46 100644 --- a/newsroom/upload.py +++ b/newsroom/upload.py @@ -1,21 +1,36 @@ - import flask import newsroom import bson.errors from werkzeug.wsgi import wrap_file +from werkzeug.http import parse_range_header from werkzeug.utils import secure_filename from flask import request, url_for, current_app as newsroom_app from superdesk.upload import upload_url as _upload_url from superdesk import get_resource_service from newsroom.decorator import login_required - cache_for = 3600 * 24 * 7 # 7 days cache ASSETS_RESOURCE = 'upload' blueprint = flask.Blueprint(ASSETS_RESOURCE, __name__) +class MediaFileLoader: + _loaded_files = {} + + @classmethod + def get_media_file(cls, media_id): + if media_id in cls._loaded_files: + return cls._loaded_files[media_id] + + media_file = flask.current_app.media.get(media_id, ASSETS_RESOURCE) + + if media_file and 'video' in media_file.content_type: + cls._loaded_files[media_id] = media_file + + return media_file + + def get_file(key): file = request.files.get(key) if file: @@ -27,19 +42,68 @@ def get_file(key): @blueprint.route('/assets/', methods=['GET']) @login_required def get_upload(media_id): + is_safari = ('Safari' in request.headers.get('User-Agent', '') and 'Chrome' + not in request.headers.get('User-Agent', '')) try: - media_file = flask.current_app.media.get(media_id, ASSETS_RESOURCE) + if is_safari: + media_file = flask.current_app.media.get(media_id, ASSETS_RESOURCE) + else: + media_file = MediaFileLoader.get_media_file(media_id) except bson.errors.InvalidId: media_file = None if not media_file: - flask.abort(404) - - data = wrap_file(flask.request.environ, media_file, buffer_size=1024 * 256) - response = flask.current_app.response_class( - data, - mimetype=media_file.content_type, - direct_passthrough=True) - response.content_length = media_file.length + flask.abort(404, description="File not found") + + file_size = media_file.length + content_type = media_file.content_type or 'application/octet-stream' + range_header = request.headers.get('Range') + if not is_safari and range_header: + try: + ranges = parse_range_header(range_header) + if ranges and len(ranges.ranges) == 1: + start, end = ranges.ranges[0] + if start is None: + flask.abort(416, description="Invalid range header") + if end is None or end >= file_size: + end = file_size - 1 + length = end - start + 1 + + def range_generate(): + media_file.seek(start) + remaining = length + chunk_size = 8192 + while remaining: + chunk = media_file.read(min(chunk_size, remaining)) + if not chunk: + break + remaining -= len(chunk) + yield chunk + + response = flask.Response( + flask.stream_with_context(range_generate()), + 206, + mimetype=content_type, + direct_passthrough=True, + ) + response.headers.add('Content-Range', f'bytes {start}-{end}/{file_size}') + response.headers.add('Accept-Ranges', 'bytes') + response.headers.add('Content-Length', str(length)) + else: + flask.abort(416, description="Requested range not satisfiable") + except ValueError: + flask.abort(400, description="Invalid range header") + else: + data = wrap_file(flask.request.environ, media_file, buffer_size=1024 * 256) + response = flask.current_app.response_class( + data, + mimetype=media_file.content_type, + direct_passthrough=True) + response.content_length = media_file.length + + response.headers['Access-Control-Allow-Origin'] = '*' + response.headers['Access-Control-Allow-Methods'] = 'GET, OPTIONS' + response.headers.pop('Content-Disposition', None) + response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate' response.last_modified = media_file.upload_date response.set_etag(media_file.md5) response.cache_control.max_age = cache_for @@ -47,15 +111,17 @@ def get_upload(media_id): response.cache_control.public = True response.make_conditional(flask.request) - if flask.request.args.get('filename'): - response.headers['Content-Type'] = media_file.content_type - response.headers['Content-Disposition'] = 'attachment; filename="%s"' % flask.request.args['filename'] + if request.args.get('filename'): + response.headers['Content-Disposition'] = f'attachment; filename="{request.args["filename"]}"' else: response.headers['Content-Disposition'] = 'inline' item_id = request.args.get('item_id') if item_id: - get_resource_service('history').log_media_download(item_id, media_id) + try: + get_resource_service('history').log_media_download(item_id, media_id) + except Exception as e: + newsroom_app.logger.error(f"Error logging media download: {str(e)}") return response diff --git a/newsroom/wire/block_media/company_factory.py b/newsroom/wire/block_media/company_factory.py new file mode 100644 index 000000000..cc8d522f1 --- /dev/null +++ b/newsroom/wire/block_media/company_factory.py @@ -0,0 +1,84 @@ +import time +from flask import session, g +from superdesk import get_resource_service + + +class CompanyFactory: + _company_cache = {} + _cache_expiration_time = 30 + + @staticmethod + def get_user_company(user): + current_time = time.time() + if not user.get('company'): + return [] + if user and user.get('company') in CompanyFactory._company_cache: + cached_data = CompanyFactory._company_cache[user['company']] + if current_time - cached_data['timestamp'] < CompanyFactory._cache_expiration_time: + return cached_data['company'] + + company = get_resource_service('companies').find_one(req=None, _id=user['company']) + if company: + CompanyFactory._company_cache[user['company']] = { + 'company': company, + 'timestamp': current_time + } + CompanyFactory._update_embedded_data_in_session(user, company) + return company + + company = get_resource_service('companies').find_one(req=None, _id=g.user) if hasattr(g, 'user') else None + if company: + CompanyFactory._company_cache[g.user] = { + 'company': company, + 'timestamp': current_time + } + CompanyFactory._update_embedded_data_in_session(g.user, company) + return company + + @staticmethod + def get_embedded_data(user): + company = CompanyFactory.get_user_company(user) + if not company: + return { + "embedded": { + "social_media_display": False, + "video_display": False, + "audio_display": False, + "images_display": False, + "all_display": True, + "social_media_download": False, + "video_download": False, + "audio_download": False, + "images_download": False, + "all_download": False, + "sdpermit_display": False, + "sdpermit_download": False + } + } + + embedded = session.get(f"embedded_data_{user['company']}", {}) + + if embedded != company.get("embedded", {}): + CompanyFactory._update_embedded_data_in_session(user, company) + embedded = company.get("embedded", {}) + + return embedded + + @staticmethod + def _update_embedded_data_in_session(user, company): + session[f"embedded_data_{user['company']}"] = company.get("embedded", { + "social_media_display": False, + "video_display": False, + "audio_display": False, + "images_display": False, + "all_display": True, + "social_media_download": False, + "video_download": False, + "audio_download": False, + "images_download": False, + "all_download": False, + "sdpermit_display": False, + "sdpermit_download": False + }) + session.permanent = False + session.modified = True diff --git a/newsroom/wire/block_media/download_items.py b/newsroom/wire/block_media/download_items.py new file mode 100644 index 000000000..765476d60 --- /dev/null +++ b/newsroom/wire/block_media/download_items.py @@ -0,0 +1,105 @@ +from newsroom.auth import get_user +from newsroom.wire.block_media.company_factory import CompanyFactory +from newsroom.wire.block_media.filter_media import get_allowed_tags + +from lxml import html as lxml_html +import re +import logging +logger = logging.getLogger(__name__) + + +def filter_items_download(func): + """ + A decorator that filters downloaded items based on a given filter function. + + :param func: The function to be decorated. It should take _ids and item_type as parameters + and return a list of items. + :return: A wrapper function that adds filtering capability to the decorated function. + """ + def wrapper(_ids, item_type, filter_func=None): + """ + Wrapper function that calls the decorated function and applies optional filtering. + + :param _ids: List of IDs to download items for. + :param item_type: Type of items to download . + :param filter_func: Optional function to filter the downloaded items. + default is None, no filtering is applied. + :return: A list of downloaded items, potentially filtered if a filter_func is provided + and the item_type is not 'agenda'. + """ + items = func(_ids, item_type) + if filter_func and items and (item_type != 'agenda'): + items = filter_func(items) + return items + return wrapper + + +def block_items_by_embedded_data(items): + def remove_editors_media(item, allowed_tags): + associations = item.get("associations") + if associations: + editors_to_remove = [] + allowed_tags = ['picture' if tag == 'img' else tag for tag in allowed_tags] + for key, value in associations.items(): + if key.startswith("editor_") and ((value and value.get("type") not in allowed_tags)): + editors_to_remove.append(key) + + for editor in editors_to_remove: + associations.pop(editor, None) + + item["associations"] = associations + return item + + download_social_tag = False + user = get_user(required=True) + embedded_data = CompanyFactory.get_embedded_data(user) + embedded_tags = get_allowed_tags(embedded_data) + allowed_tags = embedded_tags['download_tags'] + if 'all' in allowed_tags or (not any(allowed_tags)): + allowed_tags = ['video', 'audio', 'img', 'social_media'] + download_social_tag = True + if 'social_media' in allowed_tags: + download_social_tag = True + filtered_items = [] + for item in items: + html_updated = False + root_elem = lxml_html.fromstring(item.get('body_html', '')) + + if allowed_tags: + tag_map = {'video': 'Video', 'audio': 'Audio', 'img': 'Image', 'social_media': 'social_media'} + excluded_tags = set(tag_map.keys()) - set(allowed_tags) + regex_parts = [tag_map[tag] for tag in excluded_tags] + regex = rf" EMBED START (?:{'|'.join(regex_parts)}) {{id: \"editor_([0-9]+)" + comments = root_elem.xpath('//comment()') + for comment in comments: + m = re.search(regex, comment.text) + if m and m.group(1): + figure = comment.getnext() + for elem in figure.iterchildren(): + if (elem.tag in excluded_tags + and ('data-disable-download' not in elem.attrib + or elem.attrib['data-disable-download'] != 'true')): + elem.attrib['data-disable-download'] = 'true' + html_updated = True + break + if not download_social_tag: + social_media_embeds = root_elem.xpath('//div[@class="embed-block"]') + for social_media_embed in social_media_embeds: + if 'disabled-embed' not in social_media_embed.attrib.get('class', ''): + social_media_embed.attrib['class'] = social_media_embed.attrib.get('class', '') + ' disabled-embed' + blockquote_elements = social_media_embed.xpath('.//blockquote') + for blockquote in blockquote_elements: + if 'data-disable-download' not in blockquote.attrib: + blockquote.attrib['data-disable-download'] = 'true' + html_updated = True + break + + if html_updated: + for elem in root_elem.xpath('//*[@data-disable-download="true"]'): + elem.getparent().remove(elem) + item["body_html"] = lxml_html.tostring(root_elem, encoding='unicode', method="html") + + item_remove = remove_editors_media(item, allowed_tags) + filtered_items.append(item_remove) + + return filtered_items diff --git a/newsroom/wire/block_media/filter_htmlpackage.py b/newsroom/wire/block_media/filter_htmlpackage.py new file mode 100644 index 000000000..6fe5761c7 --- /dev/null +++ b/newsroom/wire/block_media/filter_htmlpackage.py @@ -0,0 +1,14 @@ +from functools import wraps +import flask +from newsroom.auth import get_user +from newsroom.wire.block_media.company_factory import CompanyFactory + + +def filter_embedded_data(func): + @wraps(func) + def wrapper(self, item, item_type='items'): + embedded_data = CompanyFactory.get_embedded_data(get_user(required=True)) + if any(embedded_data): + return str.encode(flask.render_template('download_embed.html', item=item), 'utf-8') + return func(self, item, item_type) + return wrapper diff --git a/newsroom/wire/block_media/filter_media.py b/newsroom/wire/block_media/filter_media.py new file mode 100644 index 000000000..2de6d29b5 --- /dev/null +++ b/newsroom/wire/block_media/filter_media.py @@ -0,0 +1,169 @@ +from functools import wraps +from flask import current_app as app +from newsroom.auth import get_user +from newsroom.wire.block_media.company_factory import CompanyFactory +from lxml import html as lxml_html +import re +import logging +from superdesk.etree import to_string +logger = logging.getLogger(__name__) + + +def filter_media(func): + @wraps(func) + def wrapper(*args, **kwargs): + if not app.config.get("EMBED_PRODUCT_FILTERING"): + return func(*args, **kwargs) + + item_arg = get_item_argument(args, kwargs) + if item_arg is None: + return func(*args, **kwargs) + + embedded_data = get_embedded_data() + if not any(embedded_data.values()): + return func(*args, **kwargs) + + item_arg = process_item_embeds(item_arg, embedded_data) + + return func(*args, **kwargs) + + return wrapper + + +def get_item_argument(args, kwargs): + if len(args) > 1 and isinstance(args[1], dict) and 'body_html' in args[1]: + return args[1] + + for arg in args: + if isinstance(arg, dict) and 'body_html' in arg: + return arg + + return kwargs.get('item') + + +def get_embedded_data(): + try: + user = get_user(required=True) + return CompanyFactory.get_embedded_data(user) + except Exception as e: + logger.error(f"Error in from embedded data: {str(e)}") + return {} + + +def process_item_embeds(item_arg, embedded_data): + html_updated = False + html_string = item_arg.get('body_html', '') + root_elem = lxml_html.fromstring(html_string) + + allowed_tags = get_allowed_tags(embedded_data) + + if allowed_tags: + html_updated = process_allowed_tags(root_elem, allowed_tags) + + if html_updated: + item_arg["body_html"] = to_string(root_elem, method="html") + + es_highlight = item_arg.get('es_highlight', {}) + es_highlight_body_html = es_highlight.get('body_html', []) + + if len(es_highlight_body_html) > 0: + es_highlight_html_string = es_highlight_body_html[0] + es_highlight_root_elem = lxml_html.fromstring(es_highlight_html_string) + + es_highlight_allowed_tags = allowed_tags + + if es_highlight_allowed_tags: + es_highlight_html_updated = process_allowed_tags(es_highlight_root_elem, es_highlight_allowed_tags) + + if es_highlight_html_updated: + item_arg['es_highlight']['body_html'][0] = to_string(es_highlight_root_elem, method="html") + + return item_arg + + +def get_allowed_tags(embedded_data): + tag_mapping = { + 'video': ('video_display', 'video_download'), + 'audio': ('audio_display', 'audio_download'), + 'img': ('images_display', 'images_download'), + 'all': ('all_display', 'all_download'), + 'social_media': ('social_media_display', 'social_media_download'), + 'sd': ('sdpermit_display', 'sdpermit_download'), + } + + allowed_tags = { + 'display_tags': [tag for tag, (display_key, _) in tag_mapping.items() if embedded_data.get(display_key, False)], + 'download_tags': [tag for tag, (_, download_key) in tag_mapping.items() + if embedded_data.get(download_key, False)], + } + + return allowed_tags + + +def process_allowed_tags(root_elem, allowed_tags): + html_updated = False + + display_social_tag = False + download_social_tag = False + + display_tags = allowed_tags['display_tags'] + + if 'all' in display_tags or (not any(display_tags)): + display_tags = ['video', 'audio', 'img', 'social_media'] + display_social_tag = True + if 'social_media' in display_tags: + display_social_tag = True + + download_tags = allowed_tags['download_tags'] + if 'all' in download_tags or (not any(download_tags)): + download_tags = ['video', 'audio', 'img', 'social_media'] + download_social_tag = True + if 'social_media' in download_tags: + download_social_tag = True + + tag_map = {'video': 'Video', 'audio': 'Audio', 'img': 'Image'} + display_regex_parts = ['|'.join(tag_map[tag] for tag in tag_map if tag not in display_tags)] + + display_regex = rf" EMBED START (?:{'|'.join(display_regex_parts)}) {{id: \"editor_([0-9]+)" + download_regex_parts = ['|'.join(tag_map[tag] for tag in tag_map if tag not in download_tags)] + download_regex = rf" EMBED START (?:{'|'.join(download_regex_parts)}) {{id: \"editor_([0-9]+)" + + comments = root_elem.xpath('//comment()') + for comment in comments: + display_match = re.search(display_regex, comment.text) + download_match = re.search(download_regex, comment.text) + + if display_match and display_match.group(1): + figure = comment.getnext() + for elem in figure.iterchildren(): + if elem.tag not in display_tags: + figure.attrib['class'] = 'disabled-embed' + html_updated = True + break + + figure = comment.getnext() + if figure is None: + continue + if download_match and download_match.group(1): + for elem in figure.iterchildren(): + if elem.tag not in download_tags: + elem.attrib['data-disable-download'] = 'true' + html_updated = True + break + + if not display_social_tag: + social_media_embeds = root_elem.xpath('//div[@class="embed-block"]') + for social_media_embed in social_media_embeds: + social_media_embed.attrib['class'] = 'embed-block disabled-embed' + html_updated = True + + if not download_social_tag: + social_media_embeds = root_elem.xpath('//div[@class="embed-block"]') + for social_media_embed in social_media_embeds: + blockquote_elements = social_media_embed.xpath('.//blockquote') + for blockquote in blockquote_elements: + blockquote.attrib['data-disable-download'] = 'true' + html_updated = True + break + + return html_updated diff --git a/newsroom/wire/formatters/downloadninjs.py b/newsroom/wire/formatters/downloadninjs.py index 6d3b54fd7..793214111 100644 --- a/newsroom/wire/formatters/downloadninjs.py +++ b/newsroom/wire/formatters/downloadninjs.py @@ -13,21 +13,27 @@ def __init__(self): self.direct_copy_properties += ('associations',) def rewire_embeded_images(self, item): - def _get_source_ref(marker, item): widest = -1 src_rendition = "" - for rendition in item.get("associations").get(marker).get("renditions"): - width = item.get("associations").get(marker).get("renditions").get(rendition).get("width") - if width > widest: - widest = width - src_rendition = rendition + associations = item.get("associations") + if associations: + marker_association = associations.get(marker) + if marker_association: + renditions = marker_association.get("renditions") + if renditions: + for rendition in renditions: + width = renditions.get(rendition, {}).get("width") + if width and width > widest: + widest = width + src_rendition = rendition - if widest > 0: - return item.get("associations").get(marker).get("renditions").get(src_rendition).get("href").lstrip('/') + if widest > 0 and src_rendition: + href = associations.get(marker, {}).get("renditions", {}).get(src_rendition, {}).get("href") + if href: + return href.lstrip('/') - logger.warning( - "href not found for the original in NINJSDownload formatter") + logger.warning("href not found for the original in NINJSDownload formatter") return None def _get_source_set_refs(marker, item): @@ -38,13 +44,17 @@ def _get_source_set_refs(marker, item): :return: """ srcset = [] - for rendition in item.get("associations").get(marker).get("renditions"): - srcset.append( - item.get("associations").get(marker).get("renditions").get(rendition).get("href").lstrip('/') - + " " - + str(item.get("associations").get(marker).get("renditions").get(rendition).get("width")) - + "w" - ) + associations = item.get("associations") + if associations: + marker_association = associations.get(marker) + if marker_association: + renditions = marker_association.get("renditions") + if renditions: + for rendition in renditions: + href = renditions.get(rendition, {}).get("href") + width = renditions.get(rendition, {}).get("width") + if href and width: + srcset.append(href.lstrip('/') + " " + str(width) + "w") return ",".join(srcset) def update_image(item, elem, group): diff --git a/newsroom/wire/formatters/htmlwithmedia.py b/newsroom/wire/formatters/htmlwithmedia.py index 4d5be1d75..7a5a712b7 100644 --- a/newsroom/wire/formatters/htmlwithmedia.py +++ b/newsroom/wire/formatters/htmlwithmedia.py @@ -3,6 +3,7 @@ from .utils import remove_internal_renditions, log_media_downloads, remove_unpermissioned_embeds from newsroom.utils import update_embeds_in_body from ...upload import ASSETS_RESOURCE + import base64 diff --git a/newsroom/wire/formatters/utils.py b/newsroom/wire/formatters/utils.py index 2b5267201..6a4a7a84e 100644 --- a/newsroom/wire/formatters/utils.py +++ b/newsroom/wire/formatters/utils.py @@ -40,22 +40,39 @@ def remove_internal_renditions(item, remove_media=False): def add_media(zf, item): - """ - Add the media files associated with the item - :param zf: Zipfile - :param item: - :return: - """ added_files = [] - for _key, associated_item in item.get('associations', {}).items(): - for rendition in associated_item.get('renditions'): - name = associated_item.get('renditions').get(rendition).get('href').lstrip('/') + associations = item.get('associations', {}) + for associated_item in associations.values(): + if not associated_item: + continue + + renditions = associated_item.get('renditions') + if not renditions or not isinstance(renditions, dict): + continue + + for rendition_data in renditions.values(): + if not rendition_data: + continue + + name = rendition_data.get('href', '').lstrip('/') if name in added_files: continue - file = flask.current_app.media.get(associated_item.get('renditions').get(rendition).get('media'), - ASSETS_RESOURCE) - zf.writestr(name, file.read()) - added_files.append(name) + + media_id = rendition_data.get('media') + if not media_id: + flask.current_app.logger.warning(f"Media ID not found for rendition: {name}") + continue + + file = flask.current_app.media.get(media_id, ASSETS_RESOURCE) + if not file: + flask.current_app.logger.warning(f"File not found: {name}") + continue + + try: + zf.writestr(name, file.read()) + added_files.append(name) + except Exception as e: + flask.current_app.logger.error(f"Error adding file to zip: {name}. Error: {str(e)}") def rewire_featuremedia(item): diff --git a/newsroom/wire/search.py b/newsroom/wire/search.py index d587e0489..013bf2c4a 100644 --- a/newsroom/wire/search.py +++ b/newsroom/wire/search.py @@ -1,7 +1,6 @@ import logging from datetime import datetime, timedelta from copy import deepcopy - from eve.utils import ParsedRequest, config from flask import current_app as app, json, request from superdesk import get_resource_service @@ -19,8 +18,7 @@ from newsroom.auth import get_user from newsroom.companies import get_user_company from newsroom.products.products import get_products_by_company -from newsroom.user_roles import UserRole - +from newsroom.wire.block_media.filter_media import filter_media logger = logging.getLogger(__name__) @@ -54,9 +52,6 @@ class WireSearchResource(newsroom.Resource): item_methods = ['GET'] resource_methods = ['GET'] - allowed_roles = [role for role in UserRole] - allowed_item_roles = allowed_roles - def versioncreated_range(created): _range = {} @@ -189,7 +184,6 @@ def get_product_items(self, product_id, size): search.source['post_filter'] = {'bool': {'must': []}} internal_req = self.get_internal_request(search) docs = list(self.internal_get(internal_req, None)) - if app.config.get("EMBED_PRODUCT_FILTERING"): for item in docs: self.permission_embeds_in_item(item, self.get_permitted_products()) @@ -545,18 +539,12 @@ def get_permitted_products(self): get_products_by_company(company.get('_id'), None, request.args.get('type', 'wire')) if p.get('sd_product_id')] + @filter_media def permission_embeds_in_item(self, item, permitted_products): - """ - Given the permitted products for the current user and an item, mark any video or audio embedded elements - that are not associated with any products that the user is allowed. - :param item: - :param permitted_products: - :return: - """ disable_download = [] for key, embed_item in item.get("associations", {}).items(): - if key.startswith("editor_") and embed_item and (embed_item.get('type', '')) in ['audio', 'video']: - # get the list of products that the embedded item matched in Superdesk + if (key.startswith("editor_") and embed_item + and embed_item.get('type', '') in ['audio', 'video', 'picture']): embed_products = [p.get('code') for p in ((item.get('associations') or {}).get(key) or {}).get('products', [])] @@ -564,22 +552,24 @@ def permission_embeds_in_item(self, item, permitted_products): disable_download.append(key) if len(disable_download) == 0: + logger.info("No embedded items require download disabling.") return - # mark the each embed as allowed or not, except for images root_elem = lxml_html.fromstring(item.get('body_html', '')) - regex = r" EMBED START (?:Video|Audio) {id: \"editor_([0-9]+)" + regex = r" EMBED START (?:Video|Audio|Image) {id: \"editor_([0-9]+)" html_updated = False comments = root_elem.xpath('//comment()') for comment in comments: m = re.search(regex, comment.text) - # if we've found an Embed Start comment if m and m.group(1): figure = comment.getnext() for elem in figure.iterchildren(): - if elem.tag in ['video', 'audio']: + if elem.tag in ['video', 'audio', 'img']: if "editor_" + m.group(1) in disable_download: - elem.attrib['data-disable-download'] = 'true' + if 'data-disable-download' not in elem.attrib or \ + elem.attrib['data-disable-download'] != 'true': + elem.attrib['data-disable-download'] = 'true' + html_updated = True if elem.text and ' EMBED END ' in elem.text: break html_updated = True diff --git a/newsroom/wire/utils.py b/newsroom/wire/utils.py index 86a289cff..74c3d6c34 100644 --- a/newsroom/wire/utils.py +++ b/newsroom/wire/utils.py @@ -9,9 +9,9 @@ def get_picture(item): def get_body_picture(item): - pictures = [assoc for assoc in item.get('associations', {}).values() if assoc.get('type') == 'picture'] - if pictures: - return pictures[0] + pictures = [assoc for assoc in item.get('associations', {}).values() + if assoc is not None and assoc.get('type') == 'picture'] + return pictures[0] if pictures else None def get_caption(picture): diff --git a/newsroom/wire/views.py b/newsroom/wire/views.py index 32801e3d2..456ac1eed 100644 --- a/newsroom/wire/views.py +++ b/newsroom/wire/views.py @@ -4,7 +4,6 @@ import superdesk import json from html import escape - from bson import ObjectId from operator import itemgetter from flask import current_app as app, request, jsonify, url_for @@ -16,7 +15,6 @@ from .formatters.utils import add_media from superdesk import get_resource_service - from newsroom.navigations.navigations import get_navigations_by_company from newsroom.products.products import get_products_by_company from newsroom.wire import blueprint @@ -27,13 +25,14 @@ from newsroom.email import send_email from newsroom.companies import get_user_company from newsroom.utils import get_entity_or_404, get_json_or_400, parse_dates, get_type, is_json_request, query_resource, \ - get_agenda_dates, get_location_string, get_public_contacts, get_links, get_items_for_user_action + get_agenda_dates, get_location_string, get_public_contacts, get_links, get_items_for_user_action, \ + get_entities_elastic_or_mongo_or_404 from newsroom.notifications import push_user_notification, push_notification from newsroom.companies import section from newsroom.template_filters import is_admin_or_internal - from .search import get_bookmarks_count from ..upload import ASSETS_RESOURCE +from newsroom.wire.block_media.download_items import filter_items_download, block_items_by_embedded_data HOME_ITEMS_CACHE_KEY = 'home_items' HOME_EXTERNAL_ITEMS_CACHE_KEY = 'home_external_items' @@ -107,7 +106,7 @@ def get_items_by_card(cards): # using '/media_card_external' endpoint items_by_card[card['label']] = None - app.cache.set(HOME_ITEMS_CACHE_KEY, items_by_card, timeout=300) + app.cache.set(HOME_ITEMS_CACHE_KEY, items_by_card, timeout=1) return items_by_card @@ -142,6 +141,22 @@ def get_previous_versions(item): return [] +@filter_items_download +def get_items_for_user_action_block(_ids, item_type): + # Getting entities from elastic first so that we get all fields + # even those which are not a part of ItemsResource(content_api) schema. + items = get_entities_elastic_or_mongo_or_404(_ids, item_type) + + if not items or items[0].get('type') != 'text': + return items + + for item in items: + if item.get('slugline') and item.get('anpa_take_key'): + item['slugline'] = '{0} | {1}'.format(item['slugline'], item['anpa_take_key']) + + return items + + @blueprint.route('/') @login_required def index(): @@ -190,8 +205,7 @@ def download(_ids): user = get_user(required=True) _format = flask.request.args.get('format', 'text') item_type = get_type() - items = get_items_for_user_action(_ids.split(','), item_type) - + items = get_items_for_user_action_block(_ids.split(','), item_type, filter_func=block_items_by_embedded_data) _file = io.BytesIO() formatter = app.download_formatters[_format]['formatter'] mimetype = None @@ -224,7 +238,8 @@ def download(_ids): for item in items: formated_item = json.loads(formatter.format_item(item, item_type=item_type)) add_media(zf, item) - zf.writestr(secure_filename(formatter.format_filename(item)), json.dumps(formated_item).encode('utf-8')) + zf.writestr(secure_filename(formatter.format_filename(item)), + json.dumps(formated_item).encode('utf-8')) _file.seek(0) elif _format == 'htmlpackage': with zipfile.ZipFile(_file, mode='w') as zf: diff --git a/package-lock.json b/package-lock.json index 5d789c40c..3cc8044c0 100644 --- a/package-lock.json +++ b/package-lock.json @@ -2447,6 +2447,11 @@ "domelementtype": "1" } }, + "dompurify": { + "version": "3.1.6", + "resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.1.6.tgz", + "integrity": "sha512-cTOAhc36AalkjtBpfG6O8JimdTMWNXjiePT2xQH/ppBGi/4uIpmj8eKyIkMJErXWARyINV/sB38yf8JCLF5pbQ==" + }, "domutils": { "version": "1.5.1", "resolved": "https://registry.npmjs.org/domutils/-/domutils-1.5.1.tgz", @@ -3554,6 +3559,52 @@ "object-assign": "^4.0.1" } }, + "file-loader": { + "version": "1.1.11", + "resolved": "https://registry.npmjs.org/file-loader/-/file-loader-1.1.11.tgz", + "integrity": "sha512-TGR4HU7HUsGg6GCOPJnFk06RhWgEWFLAGWiT6rcD+GRC2keU3s9RGJ+b3Z6/U73jwwNb2gKLJ7YCrp+jvU4ALg==", + "dev": true, + "requires": { + "loader-utils": "^1.0.2", + "schema-utils": "^0.4.5" + }, + "dependencies": { + "ajv": { + "version": "6.12.6", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz", + "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==", + "dev": true, + "requires": { + "fast-deep-equal": "^3.1.1", + "fast-json-stable-stringify": "^2.0.0", + "json-schema-traverse": "^0.4.1", + "uri-js": "^4.2.2" + } + }, + "fast-deep-equal": { + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz", + "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==", + "dev": true + }, + "json-schema-traverse": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz", + "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==", + "dev": true + }, + "schema-utils": { + "version": "0.4.7", + "resolved": "https://registry.npmjs.org/schema-utils/-/schema-utils-0.4.7.tgz", + "integrity": "sha512-v/iwU6wvwGK8HbU9yi3/nhGzP0yGSuhQMzL6ySiec1FSrZZDkhm4noOSWzrNFo/jEc+SJY6jRTwuwbSXJPDUnQ==", + "dev": true, + "requires": { + "ajv": "^6.1.0", + "ajv-keywords": "^3.1.0" + } + } + } + }, "file-uri-to-path": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/file-uri-to-path/-/file-uri-to-path-1.0.0.tgz", diff --git a/package.json b/package.json index 258a7d15e..02820f73e 100644 --- a/package.json +++ b/package.json @@ -18,6 +18,7 @@ "bootstrap": "4.1.3", "classnames": "^2.2.5", "css-loader": "^0.28.5", + "dompurify": "^3.1.6", "enzyme-adapter-react-16": "1.7.1", "extract-text-webpack-plugin": "3.0.2", "fetch-mock": "^5.12.2", @@ -55,6 +56,7 @@ "eslint": "^4.8.0", "eslint-plugin-react": "^7.3.0", "expect": "^21.1.0", + "file-loader": "^1.1.11", "karma": "^1.7.1", "karma-chrome-launcher": "^2.2.0", "karma-jasmine": "^1.1.0", diff --git a/tests/dev-requirements.txt b/tests/dev-requirements.txt new file mode 100644 index 000000000..d3ccd8786 --- /dev/null +++ b/tests/dev-requirements.txt @@ -0,0 +1,11 @@ +-r requirements.txt + +flake8 +sphinx +sphinx-autobuild +pytest==8.1.1 +pytest-cov==8.1.1 +pytest-mock==3.14.0 +responses>=0.10.6,<0.11 +wooper==0.4.4 +httmock==1.4.0 diff --git a/tests/fixtures.py b/tests/fixtures.py index 63b19be8f..8186a7765 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -3,10 +3,11 @@ from pytest import fixture from datetime import datetime, timedelta from superdesk.utc import utcnow -from tests.test_users import test_login_succeeds_for_admin, init as users_init +from flask import url_for PUBLIC_USER_ID = ObjectId('59b4c5c61d41c8d736852fbf') TEST_USER_ID = ObjectId('5cc94454bc43165c045ffec9') +ADMIN_USER_ID = '5cc94b99bc4316684dc7dc07' items = [ { @@ -216,3 +217,28 @@ def setup_user_company(app): @fixture(autouse=True) def init_company(app): setup_user_company(app) + + +def test_login_succeeds_for_admin(client): + response = client.post( + url_for('auth.login'), + data={'email': 'admin@sourcefabric.org', 'password': 'admin'}, + follow_redirects=True + ) + assert response.status_code == 200 + + +def users_init(app): + + app.data.insert('users', [{ + '_id': ObjectId(ADMIN_USER_ID), + 'first_name': 'admin', + 'last_name': 'admin', + 'email': 'admin@sourcefabric.org', + 'password': '$2b$12$HGyWCf9VNfnVAwc2wQxQW.Op3Ejk7KIGE6urUXugpI0KQuuK6RWIG', + 'user_type': 'administrator', + 'is_validated': True, + 'is_enabled': True, + 'is_approved': True, + 'receive_email': True, + }]) diff --git a/tests/test_block_content.py b/tests/test_block_content.py new file mode 100644 index 000000000..b5cc74ea4 --- /dev/null +++ b/tests/test_block_content.py @@ -0,0 +1,464 @@ +import io +import json +import zipfile +from datetime import timedelta, datetime + +import re +import bson +import lxml.etree +from superdesk.utc import utcnow + +from .fixtures import items, init_items, init_auth, agenda_items, init_agenda_items # noqa +from .test_push import upload_binary +import pytest + +items_ids = [item['_id'] for item in items[:2]] +item = items[:2][0] + + +def download_zip_file(client, _format, section): + resp = client.get(f'/download/{",".join(items_ids)}?format={_format}&type={section}', follow_redirects=True) + assert resp.status_code == 200 + assert resp.mimetype == 'application/zip' + + content_disposition = resp.headers.get('Content-Disposition') + assert content_disposition is not None, "Content-Disposition header is missing" + + filename_match = re.search(r'filename=(\d{12})-newsroom\.zip', content_disposition) + assert filename_match, f"Filename in Content-Disposition does not match expected pattern: {content_disposition}" + + filename_timestamp = filename_match.group(1) + file_datetime = datetime.strptime(filename_timestamp, "%Y%m%d%H%M") + + now = datetime.utcnow() + assert now - timedelta( + minutes=5) <= file_datetime <= now, f"Filename timestamp {filename_timestamp} is not within the expected range" + + return io.BytesIO(resp.get_data()) + + +def text_content_test(content): + content = content.decode('utf-8').split('\n') + assert 'AMAZON-BOOKSTORE-OPENING' in content[0] + assert 'Amazon Is Opening More Bookstores' in content[1] + assert '

' not in content + assert 'Block 4' == content[-2] + + +def nitf_content_test(content): + root = lxml.etree.fromstring(content) + assert 'nitf' == root.tag + head = root.find('head') + assert items[0]['headline'] == head.find('title').text + + +def ninjs_content_test(content): + data = json.loads(content) + assert data.get('associations', {}).get('editor_1') + assert not data.get('associations', {}).get('editor_0') + assert not data.get('associations', {}).get('editor_2') + assert data['headline'] == 'Amazon Is Opening More Bookstores' + assert 'editor_1' in data['body_html'] + assert 'editor_0' not in data['body_html'] + + +def ninjs_block_download_example(content): + data = json.loads(content) + assert data.get('associations', {}).get('editor_1') + assert not data.get('associations', {}).get('editor_0') + assert not data.get('associations', {}).get('editor_2') + assert data['headline'] == 'Amazon Is Opening More Bookstores' + assert 'video' in data['body_html'] + assert 'img' not in data['body_html'] + assert 'blockquote' not in data['body_html'] + assert 'audio' not in data['body_html'] + + +def newsmlg2_content_test(content): + root = lxml.etree.fromstring(content) + assert 'newsMessage' in root.tag + + +def filename(name, item): + return f'{item["versioncreated"].strftime("%Y%m%d%H%M")}-{name}' + + +wire_formats = [ + { + 'format': 'text', + 'mimetype': 'text/plain', + 'filename': filename('amazon-bookstore-opening.txt', item), + 'test_content': text_content_test, + }, + { + 'format': 'nitf', + 'mimetype': 'application/xml', + 'filename': filename('amazon-bookstore-opening.xml', item), + 'test_content': nitf_content_test, + }, + { + 'format': 'newsmlg2', + 'mimetype': 'application/vnd.iptc.g2.newsitem+xml', + 'filename': filename('amazon-bookstore-opening.xml', item), + 'test_content': newsmlg2_content_test, + }, + { + 'format': 'picture', + 'mimetype': 'image/jpeg', + 'filename': 'baseimage.jpg', + }, +] + + +def setup_block_embeds(client, app): + media_id = bson.ObjectId() + associations = { + 'featuremedia': { + 'mimetype': 'image/jpeg', + 'type': 'picture', + 'renditions': { + 'baseImage': { + 'mimetype': 'image/jpeg', + 'media': str(media_id), + 'href': 'http://a.b.c/xxx.jpg', + }, + '16-9': { + 'mimetype': 'image/jpeg', + 'href': 'http://a.b.c/xxx.jpg', + 'media': str(media_id), + 'width': 1280, + 'height': 720, + }, + '4-3': { + "href": "/assets/633d11b9fb5122dcf06a6f02", + "width": 800, + "height": 600, + 'media': str(media_id), + "mimetype": "image/jpeg", + }, + }, + }, + "editor_1": { + "type": "video", + "renditions": { + "original": { + "mimetype": "video/mp4", + "href": "/assets/640ff0bdfb5122dcf06a6fc3", + 'media': str(media_id), + }, + }, + "mimetype": "video/mp4", + "products": [ + {"code": "123", "name": "Product A"}, + {"code": "321", "name": "Product B"}, + ], + }, + "editor_0": { + "type": "audio", + "renditions": { + "original": { + "mimetype": "audio/mp3", + "href": "/assets/640feb9bfb5122dcf06a6f7c", + "media": "640feb9bfb5122dcf06a6f7c", + }, + }, + "mimetype": "audio/mp3", + "products": [{"code": "999", "name": "NSW News"}], + }, + "editor_2": { + "type": "picture", + "renditions": { + "4-3": { + "href": "/assets/633d11b9fb5122dcf06a6f02", + "width": 800, + "height": 600, + "mimetype": "image/jpeg", + "media": "633d11b9fb5122dcf06a6f02", + }, + "16-9": { + "href": "/assets/633d0f59fb5122dcf06a6ee8", + "width": 1280, + "height": 720, + "mimetype": "image/jpeg", + "media": "633d0f59fb5122dcf06a6ee8", + "poi": {}, + }, + }, + "products": [{"code": "888"}], + }, + "editor_3": None, + } + upload_binary('picture.jpg', client, media_id=str(media_id)) + + app.data.update('items', item['_id'], { + 'associations': associations, + 'body_html': ( + '

Block 1

' + '' + '
' + '' + '
minns
' + '
' + '' + '


' + '

Block 2

' + '' + '
' + '' + '
Scomo whinging
' + '
' + '' + '


Block 3

' + '' + '
' + '' + '
Prime Minister Scott Morrison and Liberal member for Higgins Katie Allen
' + '
' + '' + '

Block 4

' + '
' + '' + '' + '
' + ) + }, item) + + +def test_download_single_block(client, app): + setup_block_embeds(client, app) + for _format in wire_formats: + resp = client.get(f'/download/{item["_id"]}?format={_format["format"]}', follow_redirects=True) + assert resp.status_code == 200 + assert resp.mimetype == _format['mimetype'] + assert (resp.headers.get('Content-Disposition') in + [f'attachment; filename={_format["filename"]}', f'attachment; filename="{_format["filename"]}"']) + + +@pytest.fixture +def setup_data(client, app): + setup_block_embeds(client, app) + app.config['EMBED_PRODUCT_FILTERING'] = True + app.data.insert('companies', [{ + '_id': '3', + 'name': 'Block Conent.', + 'is_enabled': True, + 'embedded': { + "social_media_display": True, + "sdpermit_display": True, + "video_display": False, + "audio_display": True, + "images_display": True, + "all_display": False, + "social_media_download": True, + "video_download": True, + "audio_download": False, + "images_download": True, + "all_download": False, + "sdpermit_download": True + } + }]) + user = app.data.find_one('users', req=None, first_name='admin') + assert user + app.data.update('users', user['_id'], {'company': '3'}, user) + app.data.insert('products', [{ + '_id': 13, + 'name': 'product test', + 'sd_product_id': '123', + 'companies': ['3'], + 'is_enabled': True, + 'product_type': 'wire' + }]) + + +def start_test_block_download(client, app, setup_data): + for _format in wire_formats: + _file = download_zip_file(client, _format['format'], 'wire') + with zipfile.ZipFile(_file) as zf: + assert _format['filename'] in zf.namelist() + content = zf.open(_format['filename']).read() + if _format.get('test_content'): + _format['test_content'](content) + + +def assert_history(app): + history = app.data.find('history', None, None) + assert (len(wire_formats) * len(items_ids)) == history.count() + assert 'download' == history[0]['action'] + assert history[0].get('user') + assert history[0].get('versioncreated') + timedelta(seconds=2) >= utcnow() + assert history[0].get('item') in items_ids + assert history[0].get('version') + assert history[0].get('company') == '3' + assert history[0].get('section') == 'wire' + + +def test_block_download_with_config(client, app, setup_data): + start_test_block_download(client, app, setup_data) + assert_history(app) + + +def test_ninjs_download(client, app): + setup_block_embeds(client, app) + app.config['EMBED_PRODUCT_FILTERING'] = True + app.data.insert('companies', [{ + '_id': '1', + 'name': 'Press co.', + 'is_enabled': True, + 'embedded': { + "social_media_display": True, + "video_display": True, + "audio_display": True, + "images_display": True, + "all_display": True, + "social_media_download": True, + "video_download": True, + "audio_download": True, + "images_download": True, + "all_download": True, + "sdpermit_display": True, + "sdpermit_download": True + } + }]) + user = app.data.find_one('users', req=None, first_name='admin') + assert user + app.data.update('users', user['_id'], {'company': '1'}, user) + app.data.insert('products', [{ + '_id': 10, + 'name': 'product test', + 'sd_product_id': '123', + 'companies': ['1'], + 'is_enabled': True, + 'product_type': 'wire' + }]) + app.general_setting('news_api_allowed_renditions', 'Foo', default='16-9,4-3') + + _file = download_zip_file(client, 'downloadninjs', 'wire') + with zipfile.ZipFile(_file) as zf: + assert filename('amazon-bookstore-opening.json', item) in zf.namelist() + content = zf.open(filename('amazon-bookstore-opening.json', item)).read() + ninjs_content_test(content) + + history = app.data.find('history', None, None) + assert 4 == history.count() + assert 'download' in history[0]['action'] + assert 'download' in history[1]['action'] + assert history[0].get('user') + assert history[0].get('versioncreated') + timedelta(seconds=2) >= utcnow() + assert history[0].get('item') in items_ids + assert history[0].get('version') + assert history[0].get('company') == '1' + assert history[0].get('section') == 'wire' + + +def test_ninjs_block_download_default(client, app): + setup_block_embeds(client, app) + app.config['EMBED_PRODUCT_FILTERING'] = True + app.data.insert('companies', [{ + '_id': '1', + 'name': 'Press co.', + 'is_enabled': True, + 'embedded': { + "social_media_display": True, + "video_display": True, + "audio_display": True, + "images_display": True, + "all_display": True, + "social_media_download": False, + "video_download": False, + "audio_download": False, + "images_download": False, + "all_download": False, + "sdpermit_display": True, + "sdpermit_download": False + } + }]) + user = app.data.find_one('users', req=None, first_name='admin') + assert user + app.data.update('users', user['_id'], {'company': '1'}, user) + app.data.insert('products', [{ + '_id': 10, + 'name': 'product test', + 'sd_product_id': '123', + 'companies': ['1'], + 'is_enabled': True, + 'product_type': 'wire' + }]) + app.general_setting('news_api_allowed_renditions', 'Foo', default='16-9,4-3') + _file = download_zip_file(client, 'downloadninjs', 'wire') + with zipfile.ZipFile(_file) as zf: + assert filename('amazon-bookstore-opening.json', item) in zf.namelist() + content = zf.open(filename('amazon-bookstore-opening.json', item)).read() + ninjs_content_test(content) + + history = app.data.find('history', None, None) + assert 4 == history.count() + assert 'download' in history[0]['action'] + assert 'download' in history[1]['action'] + assert history[0].get('user') + assert history[0].get('versioncreated') + timedelta(seconds=2) >= utcnow() + assert history[0].get('item') in items_ids + assert history[0].get('version') + assert history[0].get('company') == '1' + assert history[0].get('section') == 'wire' + + +def test_ninjs_block_download_example(client, app): + setup_block_embeds(client, app) + app.config['EMBED_PRODUCT_FILTERING'] = True + app.data.insert('companies', [{ + '_id': '2', + 'name': 'Press01 co.', + 'is_enabled': True, + 'embedded': { + "social_media_display": True, + "video_display": True, + "audio_display": True, + "images_display": True, + "all_display": True, + "social_media_download": False, + "video_download": True, + "audio_download": False, + "images_download": False, + "all_download": False, + "sdpermit_display": True, + "sdpermit_download": False + } + }]) + user = app.data.find_one('users', req=None, first_name='admin') + assert user + app.data.update('users', user['_id'], {'company': '2'}, user) + app.data.insert('products', [{ + '_id': 10, + 'name': 'product test', + 'sd_product_id': '123', + 'companies': ['2'], + 'is_enabled': True, + 'product_type': 'wire' + }]) + app.general_setting('news_api_allowed_renditions', 'Foo', default='16-9,4-3') + _file = download_zip_file(client, 'downloadninjs', 'wire') + with zipfile.ZipFile(_file) as zf: + assert filename('amazon-bookstore-opening.json', item) in zf.namelist() + content = zf.open(filename('amazon-bookstore-opening.json', item)).read() + ninjs_block_download_example(content) + + history = app.data.find('history', None, None) + assert 4 == history.count() + assert 'download' in history[0]['action'] + assert 'download' in history[1]['action'] + assert history[0].get('user') + assert history[0].get('versioncreated') + timedelta(seconds=2) >= utcnow() + assert history[0].get('item') in items_ids + assert history[0].get('version') + assert history[0].get('company') == '2' + assert history[0].get('section') == 'wire' diff --git a/tests/test_block_media.py b/tests/test_block_media.py new file mode 100644 index 000000000..f98d32fd9 --- /dev/null +++ b/tests/test_block_media.py @@ -0,0 +1,589 @@ +import io +import json +import zipfile +from datetime import timedelta +import re +import bson +import lxml.etree +import pytest +from superdesk.utc import utcnow + +from .fixtures import items, init_items, init_auth, agenda_items, init_agenda_items # noqa +from .test_push import upload_binary + +items_ids = [item['_id'] for item in items[:2]] +item = items[:2][0] + + +@pytest.fixture +def setup_block_embeds(client, app): + media_id = bson.ObjectId() + associations = { + 'featuremedia': { + 'mimetype': 'image/jpeg', + 'type': 'picture', + 'renditions': { + 'baseImage': { + 'mimetype': 'image/jpeg', + 'media': str(media_id), + 'href': 'http://a.b.c/xxx.jpg', + }, + '16-9': { + 'mimetype': 'image/jpeg', + 'href': 'http://a.b.c/xxx.jpg', + 'media': str(media_id), + 'width': 1280, + 'height': 720, + }, + '4-3': { + "href": "/assets/633d11b9fb5122dcf06a6f02", + "width": 800, + "height": 600, + 'media': str(media_id), + "mimetype": "image/jpeg", + }, + }, + }, + "editor_1": { + "type": "video", + "renditions": { + "original": { + "mimetype": "video/mp4", + "href": "/assets/640ff0bdfb5122dcf06a6fc3", + 'media': str(media_id), + }, + }, + "mimetype": "video/mp4", + # for base permission check,pass + "products": [ + {"code": "123", "name": "Product A"}, + {"code": "321", "name": "Product B"}, + ], + }, + "editor_0": { + "type": "audio", + "renditions": { + "original": { + "mimetype": "audio/mp3", + "href": "/assets/640feb9bfb5122dcf06a6f7c", + "media": "640feb9bfb5122dcf06a6f7c", + }, + }, + "mimetype": "audio/mp3", + # for base permission check ,disable + "products": [{"code": "999", "name": "NSW News"}], + }, + "editor_2": { + "type": "picture", + "renditions": { + "4-3": { + "href": "/assets/633d11b9fb5122dcf06a6f02", + "width": 800, + "height": 600, + "mimetype": "image/jpeg", + "media": "633d11b9fb5122dcf06a6f02", + }, + "16-9": { + "href": "/assets/633d0f59fb5122dcf06a6ee8", + "width": 1280, + "height": 720, + "mimetype": "image/jpeg", + "media": "633d0f59fb5122dcf06a6ee8", + "poi": {}, + }, + }, + # for base permission check, pass + "products": [{"code": "123"}], + }, + "editor_3": None, + } + upload_binary('picture.jpg', client, media_id=str(media_id)) + + app.data.update('items', item['_id'], { + 'associations': associations, + 'body_html': ( + '

Block 1

' + '' + '
' + '' + '
minns
' + '
' + '' + '


' + '

Block 2

' + '' + '
' + '' + '
Scomo whinging
' + '
' + '' + '


Block 3

' + '' + '
' + '' + '
Prime Minister Scott Morrison and Liberal member for Higgins Katie Allen
' + '
' + '' + '

Block 4

' + '
' + '' + '' + '
' + ) + }, item) + + +@pytest.fixture +def configure_app(app): + app.config['EMBED_PRODUCT_FILTERING'] = True + return app + + +def setup_company_data(app, company_id, company_name, embedded): + app.data.insert('companies', [{ + '_id': company_id, + 'name': company_name, + 'is_enabled': True, + 'embedded': embedded + }]) + user = app.data.find_one('users', req=None, first_name='admin') + assert user + app.data.update('users', user['_id'], {'company': company_id}, user) + app.data.insert('products', [{ + '_id': int(company_id) * 10, + 'name': 'product test', + # base product check + 'sd_product_id': '123', + 'companies': [company_id], + 'is_enabled': True, + 'product_type': 'wire' + }]) + + +@pytest.fixture(params=[ + ('3', 'Block Conent.', { + "social_media_display": True, "sdpermit_display": True, "video_display": False, + "audio_display": True, "images_display": True, "all_display": False, + "social_media_download": True, "video_download": True, "audio_download": False, + "images_download": True, "all_download": False, "sdpermit_download": True + }), + ('2', 'Press01 co.', { + "social_media_display": True, "video_display": True, "audio_display": True, + "images_display": True, "all_display": True, "social_media_download": False, + "video_download": True, "audio_download": False, "images_download": False, + "all_download": False, "sdpermit_display": True, "sdpermit_download": False + }), + ('1', 'Press co.', { + "social_media_display": True, "video_display": True, "audio_display": True, + "images_display": True, "all_display": True, "social_media_download": False, + "video_download": False, "audio_download": False, "images_download": False, + "all_download": False, "sdpermit_display": True, "sdpermit_download": False + }) +]) +def company_data(request): + return request.param + + +@pytest.fixture +def setup_data(client, app, configure_app, setup_block_embeds, company_data): + company_id, company_name, embedded = company_data + + app.data.insert('companies', [{ + '_id': company_id, + 'name': company_name, + 'is_enabled': True, + 'embedded': embedded + }]) + + user = app.data.find_one('users', req=None, first_name='admin') + assert user + app.data.update('users', user['_id'], {'company': company_id}, user) + + app.data.insert('products', [{ + '_id': int(company_id * 10), + 'name': 'product test', + 'sd_product_id': '123', + 'companies': [company_id], + 'is_enabled': True, + 'product_type': 'wire' + }]) + return app, company_id + + +def download_zip_file(client, _format, section): + resp = client.get('/download/{0}?format={1}&type={2}'.format(','.join(items_ids), _format, section), + follow_redirects=True) + assert resp.status_code == 200 + assert resp.mimetype == 'application/zip' + assert resp.headers.get('Content-Disposition') == ( + 'attachment; filename={0}-newsroom.zip'.format(utcnow().strftime("%Y%m%d%H%M")) + ) + return io.BytesIO(resp.get_data()) + + +def text_content_test(content): + content = content.decode('utf-8').split('\n') + assert 'AMAZON-BOOKSTORE-OPENING' in content[0] + assert 'Amazon Is Opening More Bookstores' in content[1] + assert '

' not in content + + +def nitf_content_test(content): + root = lxml.etree.fromstring(content) + assert 'nitf' == root.tag + head = root.find('head') + assert items[0]['headline'] == head.find('title').text + + +def ninjs_content_test(content): + data = json.loads(content) + assert data.get('associations', {}).get('editor_1') + assert not data.get('associations', {}).get('editor_0') + assert not data.get('associations', {}).get('editor_2') + assert data['headline'] == 'Amazon Is Opening More Bookstores' + assert 'editor_1' in data['body_html'] + assert 'editor_0' not in data['body_html'] + + +def ninjs_block_download_video(content): + data = json.loads(content) + assert data.get('associations', {}).get('editor_1') + assert not data.get('associations', {}).get('editor_0') + assert not data.get('associations', {}).get('editor_2') + assert data['headline'] == 'Amazon Is Opening More Bookstores' + assert 'video' in data['body_html'] + assert 'img' not in data['body_html'] + assert 'blockquote' not in data['body_html'] + assert 'audio' not in data['body_html'] + + +def ninjs_block_download_audio_image(content): + data = json.loads(content) + assert not data.get('associations', {}).get('editor_1') + assert not data.get('associations', {}).get('editor_0') + assert data.get('associations', {}).get('editor_2') + assert data['headline'] == 'Amazon Is Opening More Bookstores' + assert 'video' not in data['body_html'] + assert 'img' in data['body_html'] + assert 'blockquote' not in data['body_html'] + assert 'audio' not in data['body_html'] + + +def htmlpackage_block_download_video(content): + data = json.loads(content) + assert data.get('associations', {}).get('editor_1') + assert not data.get('associations', {}).get('editor_0') + assert not data.get('associations', {}).get('editor_2') + assert data['headline'] == 'Amazon Is Opening More Bookstores' + assert 'video' in data['body_html'] + assert 'img' not in data['body_html'] + assert 'blockquote' not in data['body_html'] + assert 'audio' not in data['body_html'] + + +def htmlpackage_block_download_audio_image(html_content_file): + html_content = html_content_file.decode('utf-8') + assert re.search(r'

Amazon Is Opening More Bookstores

', html_content) + assert not re.search(r'= utcnow() + assert history[0].get('item') in items_ids + assert history[0].get('version') + assert history[0].get('company') == company_id + assert history[0].get('section') == 'wire' + + +COMPANY_DATA = [ + ( + '11', + 'AAP01', + { + "social_media_display": True, + "video_display": True, + "audio_display": True, + "images_display": True, + "all_display": True, + "social_media_download": False, + "video_download": True, + "audio_download": False, + "images_download": False, + "all_download": False, + "sdpermit_display": True, + "sdpermit_download": False + }, + ninjs_block_download_video + ), + ( + '12', + 'AAP02', + { + "social_media_display": False, + "video_display": True, + "audio_display": True, + "images_display": True, + "all_display": False, + "social_media_download": False, + "video_download": False, + "audio_download": True, + "images_download": True, + "all_download": False, + "sdpermit_display": True, + "sdpermit_download": True + }, + ninjs_block_download_audio_image, + ), +] + + +@pytest.mark.parametrize('company_data', COMPANY_DATA) +def test_ninjs_download(client, app, configure_app, setup_block_embeds, company_data): + company_id, company_name, embedded, expected_content_test = company_data + + company = app.data.find_one('companies', req=None, _id=company_id) + if company: + app.data.update('companies', company_id, { + 'name': company_name, + 'is_enabled': True, + 'embedded': embedded + }, company) + else: + app.data.insert('companies', [{ + '_id': company_id, + 'name': company_name, + 'is_enabled': True, + 'embedded': embedded + }]) + + user = app.data.find_one('users', req=None, first_name='admin') + assert user + app.data.update('users', user['_id'], {'company': company_id}, user) + + app.data.insert('products', [{ + '_id': int(company_id), + 'name': 'product test', + 'sd_product_id': '123', + 'companies': [company_id], + 'is_enabled': True, + 'product_type': 'wire' + }]) + + app.general_setting('news_api_allowed_renditions', 'Foo', default='16-9,4-3') + + _file = download_zip_file(client, 'downloadninjs', 'wire') + with zipfile.ZipFile(_file) as zf: + assert filename('amazon-bookstore-opening.json', item) in zf.namelist() + content = zf.open(filename('amazon-bookstore-opening.json', item)).read() + expected_content_test(content) + + history = app.data.find('history', None, None) + assert 4 == history.count() + assert 'download' in history[0]['action'] + assert 'download' in history[1]['action'] + assert history[0].get('user') + assert history[0].get('versioncreated') + timedelta(seconds=2) >= utcnow() + assert history[0].get('item') in items_ids + assert history[0].get('version') + assert history[0].get('company') == company_id + assert history[0].get('section') == 'wire' + + +COMPANY_DATA_HTML = [ + ( + '13', + 'AAP03', + { + "social_media_display": False, + "video_display": True, + "audio_display": True, + "images_display": True, + "all_display": False, + "social_media_download": False, + "video_download": False, + "audio_download": True, + "images_download": True, + "all_download": False, + "sdpermit_display": True, + "sdpermit_download": True + }, + htmlpackage_block_download_audio_image, + ), +] + + +@pytest.mark.parametrize('company_data_demo', COMPANY_DATA_HTML) +def test_htmlpackage_download(client, app, configure_app, setup_block_embeds, company_data_demo): + def extract_nr_timestamps(filenames): + timestamps = [] + for filename in filenames: + match = re.match(r'(\d{12})-', filename) + if match: + timestamps.append(match.group(1)) + return sorted(timestamps, reverse=True) + + company_id, company_name, embedded, expected_content_test = company_data_demo + + company = app.data.find_one('companies', req=None, _id=company_id) + if company: + app.data.update('companies', company_id, { + 'name': company_name, + 'is_enabled': True, + 'embedded': embedded + }, company) + else: + app.data.insert('companies', [{ + '_id': company_id, + 'name': company_name, + 'is_enabled': True, + 'embedded': embedded + }]) + + user = app.data.find_one('users', req=None, first_name='admin') + assert user + app.data.update('users', user['_id'], {'company': company_id}, user) + + app.data.insert('products', [{ + '_id': int(company_id), + 'name': 'product test', + 'sd_product_id': '123', + 'companies': [company_id], + 'is_enabled': True, + 'product_type': 'wire' + }]) + + app.general_setting('news_api_allowed_renditions', 'Foo', default='16-9,4-3') + + _file = download_zip_file(client, 'htmlpackage', 'wire') + with zipfile.ZipFile(_file) as zf: + filenames = [info.filename for info in zf.filelist] + content = zf.open(filename('amazon-bookstore-opening.html', item)).read() + expected_content_test(content) + + timestamps = extract_nr_timestamps(filenames) + + if len(timestamps) >= 2: + current_datetime = timestamps[0] + previous_datetime = timestamps[-1] + + expected_files = [ + f'{current_datetime}-amazon-bookstore-opening.html', + f'{previous_datetime}-weather.html', + 'assets/633d11b9fb5122dcf06a6f02' + ] + + missing_files = [file for file in expected_files if file not in filenames] + + if not missing_files: + print("All files found, Test Pass.") + else: + raise AssertionError( + f"The following expected files were not found in the ZIP file list: {', '.join(missing_files)}") + else: + raise AssertionError("Not enough timestamped files found in the ZIP archive") + + history = app.data.find('history', None, None) + assert 4 == history.count() + assert 'download' in history[0]['action'] + assert 'download' in history[1]['action'] + assert history[0].get('user') + assert history[0].get('versioncreated') + timedelta(seconds=2) >= utcnow() + assert history[0].get('item') in items_ids + assert history[0].get('version') + assert history[0].get('company') == company_id + assert history[0].get('section') == 'wire' diff --git a/tests/test_companies.py b/tests/test_companies.py index b85a463e6..d32e628ac 100644 --- a/tests/test_companies.py +++ b/tests/test_companies.py @@ -85,7 +85,17 @@ def test_save_company_permissions(client, app): test_login_succeeds_for_admin(client) data = json.dumps({'products': {'p-2': True}, 'sections': {'wire': True}, 'archive_access': True}) - client.post('companies/c-1/permissions', data=data, content_type='application/json') + response = client.get('/companies/get-csrf-token') + data_token = json.loads(response.get_data()) + csrf_token = data_token['csrf_token'] + headers = { + 'Content-Type': 'application/json', + 'X-CSRF-Token': csrf_token + } + response = client.post('companies/c-1/permissions', data=data, headers=headers, content_type='application/json') + + assert response.status_code == 200 + assert json.loads(response.get_data())['message'] == 'Permissions updated successfully' response = client.get('/products') data = json.loads(response.get_data()) diff --git a/tests/test_download.py b/tests/test_download.py index 01f4f3547..25db346d3 100644 --- a/tests/test_download.py +++ b/tests/test_download.py @@ -52,6 +52,18 @@ def ninjs_content_test(content): assert 'editor_0' not in data.get('body_html') +def ninjs_block_download_example(content): + data = json.loads(content.decode('utf-8')) + assert data.get('associations').get('editor_1') + assert not data.get('associations').get('editor_0') + assert not data.get('associations').get('editor_2') + assert data.get('headline') == 'Amazon Is Opening More Bookstores' + assert 'video' in data.get('body_html') + assert 'img' not in data.get('body_html') + assert 'blockquote' not in data.get('body_html') + assert 'audio' not in data.get('body_html') + + def newsmlg2_content_test(content): tree = lxml.etree.parse(io.BytesIO(content)) root = tree.getroot() @@ -254,6 +266,128 @@ def setup_embeds(client, app): '

Par 4

'}, item) +def setup_block_embeds(client, app): + media_id = str(bson.ObjectId()) + upload_binary('picture.jpg', client, media_id=media_id) + associations = { + 'featuremedia': { + 'mimetype': 'image/jpeg', + 'type': 'picture', + 'renditions': { + '16-9': { + 'mimetype': 'image/jpeg', + 'href': 'http://a.b.c/xxx.jpg', + 'media': media_id, + 'width': 1280, + 'height': 720, + }, + '4-3': { + "href": "/assets/633d11b9fb5122dcf06a6f02", + "width": 800, + "height": 600, + 'media': media_id, + "mimetype": "image/jpeg", + } + } + }, + "editor_1": { + "type": "video", + "renditions": { + "original": { + "mimetype": "video/mp4", + "href": "/assets/640ff0bdfb5122dcf06a6fc3", + 'media': media_id, + } + }, + "mimetype": "video/mp4", + "products": [{"code": "123", "name": "Product A"}, {"code": "321", "name": "Product B"}] + }, + "editor_0": { + "type": "audio", + "renditions": { + "original": { + "mimetype": "audio/mp3", + "href": "/assets/640feb9bfb5122dcf06a6f7c", + "media": "640feb9bfb5122dcf06a6f7c" + } + }, + "mimetype": "audio/mp3", + "products": [ + { + "code": "999", + "name": "NSW News" + } + ] + }, + "editor_2": { + "type": "picture", + "renditions": { + "4-3": { + "href": "/assets/633d11b9fb5122dcf06a6f02", + "width": 800, + "height": 600, + "mimetype": "image/jpeg", + "media": "633d11b9fb5122dcf06a6f02", + }, + "16-9": { + "href": "/assets/633d0f59fb5122dcf06a6ee8", + "width": 1280, + "height": 720, + "mimetype": "image/jpeg", + "media": "633d0f59fb5122dcf06a6ee8", + "poi": { + } + } + }, + "products": [{"code": "888"}] + }, + "editor_3": None + } + app.data.update('items', item['_id'], { + 'associations': associations, + 'body_html': '

Par 1

' + '' + '
' + '' + '
minns
' + '
' + '' + '


' + '

Par 2

' + '' + '
' + '' + '
Scomo whinging
' + '
' + '' + '


Par 3

' + '' + '
' + '' + '
Prime Minister Scott Morrison and Liberal member for ' + 'Higgins Katie Allen
' + '
' + '' + '

Par 4

' + '
' + '' + '' + '
' + }, item) + + def test_download_single(client, app): setup_image(client, app) for _format in wire_formats: @@ -266,6 +400,37 @@ def test_download_single(client, app): def test_wire_download(client, app): setup_image(client, app) + app.config['EMBED_PRODUCT_FILTERING'] = True + app.data.insert('companies', [{ + '_id': '1', + 'name': 'Press co.', + 'is_enabled': True, + 'embedded': { + "social_media_display": True, + "video_display": True, + "audio_display": True, + "images_display": True, + "all_display": True, + "social_media_download": True, + "video_download": True, + "audio_download": True, + "images_download": True, + "all_download": True, + "sdpermit_display": True, + "sdpermit_download": True + } + }]) + user = app.data.find_one('users', req=None, first_name='admin') + assert user + app.data.update('users', user['_id'], {'company': '1'}, user) + app.data.insert('products', [{ + '_id': 10, + 'name': 'product test', + 'sd_product_id': '123', + 'companies': ['1'], + 'is_enabled': True, + 'product_type': 'wire' + }]) for _format in wire_formats: _file = download_zip_file(client, _format['format'], 'wire') with zipfile.ZipFile(_file) as zf: @@ -280,7 +445,7 @@ def test_wire_download(client, app): assert history[0].get('versioncreated') + timedelta(seconds=2) >= utcnow() assert history[0].get('item') in items_ids assert history[0].get('version') - assert history[0].get('company') is None + assert history[0].get('company') == '1' assert history[0].get('section') == 'wire' @@ -291,6 +456,73 @@ def test_ninjs_download(client, app): '_id': '1', 'name': 'Press co.', 'is_enabled': True, + 'embedded': { + "social_media_display": True, + "video_display": True, + "audio_display": True, + "images_display": True, + "all_display": True, + "social_media_download": True, + "video_download": True, + "audio_download": True, + "images_download": True, + "all_download": True, + "sdpermit_display": True, + "sdpermit_download": True + } + }]) + user = app.data.find_one('users', req=None, first_name='admin') + assert user + app.data.update('users', user['_id'], {'company': '1'}, user) + app.data.insert('products', [{ + '_id': 10, + 'name': 'product test', + 'sd_product_id': '123', + 'companies': ['1'], + 'is_enabled': True, + 'product_type': 'wire' + }]) + app.general_setting('news_api_allowed_renditions', 'Foo', default='16-9,4-3') + + _file = download_zip_file(client, 'downloadninjs', 'wire') + with zipfile.ZipFile(_file) as zf: + assert filename('amazon-bookstore-opening.json', item) in zf.namelist() + content = zf.open(filename('amazon-bookstore-opening.json', item)).read() + ninjs_content_test(content) + + history = app.data.find('history', None, None) + assert 4 == history.count() + assert 'download' in history[0]['action'] + assert 'download' in history[1]['action'] + assert history[0].get('user') + assert history[0].get('versioncreated') + timedelta(seconds=2) >= utcnow() + assert history[0].get('item') in items_ids + assert history[0].get('version') + assert history[0].get('company') == '1' + assert history[0].get('section') == 'wire' + + +def test_ninjs_block_download_default(client, app): + setup_block_embeds(client, app) + app.config['EMBED_PRODUCT_FILTERING'] = True + app.data.insert('companies', [{ + '_id': '1', + 'name': 'Press co.', + 'is_enabled': True, + 'embedded': { + "social_media_display": True, + "video_display": True, + "audio_display": True, + "images_display": True, + "all_display": True, + "social_media_download": False, + "video_download": False, + "audio_download": False, + "images_download": False, + "all_download": False, + "sdpermit_display": True, + "sdpermit_download": False + } }]) user = app.data.find_one('users', req=None, first_name='admin') assert user @@ -323,6 +555,59 @@ def test_ninjs_download(client, app): assert history[0].get('section') == 'wire' +def test_ninjs_block_download_example(client, app): + setup_block_embeds(client, app) + app.config['EMBED_PRODUCT_FILTERING'] = True + app.data.insert('companies', [{ + '_id': '2', + 'name': 'Press01 co.', + 'is_enabled': True, + 'embedded': { + "social_media_display": True, + "video_display": True, + "audio_display": True, + "images_display": True, + "all_display": True, + "social_media_download": False, + "video_download": True, + "audio_download": False, + "images_download": False, + "all_download": False, + "sdpermit_display": True, + "sdpermit_download": False + } + }]) + user = app.data.find_one('users', req=None, first_name='admin') + assert user + app.data.update('users', user['_id'], {'company': '2'}, user) + app.data.insert('products', [{ + '_id': 10, + 'name': 'product test', + 'sd_product_id': '123', + 'companies': ['2'], + 'is_enabled': True, + 'product_type': 'wire' + }]) + app.general_setting('news_api_allowed_renditions', 'Foo', default='16-9,4-3') + + _file = download_zip_file(client, 'downloadninjs', 'wire') + with zipfile.ZipFile(_file) as zf: + assert filename('amazon-bookstore-opening.json', item) in zf.namelist() + content = zf.open(filename('amazon-bookstore-opening.json', item)).read() + ninjs_block_download_example(content) + + history = app.data.find('history', None, None) + assert 4 == history.count() + assert 'download' in history[0]['action'] + assert 'download' in history[1]['action'] + assert history[0].get('user') + assert history[0].get('versioncreated') + timedelta(seconds=2) >= utcnow() + assert history[0].get('item') in items_ids + assert history[0].get('version') + assert history[0].get('company') == '2' + assert history[0].get('section') == 'wire' + + def test_agenda_download(client, app): setup_image(client, app) for _format in agenda_formats: diff --git a/tests/test_wire.py b/tests/test_wire.py index 697776646..a89d094db 100644 --- a/tests/test_wire.py +++ b/tests/test_wire.py @@ -11,6 +11,7 @@ from tests.test_users import ADMIN_USER_ID from tests.test_download import setup_embeds from superdesk import get_resource_service +from unittest.mock import patch def test_item_detail(client): @@ -62,9 +63,9 @@ def test_share_items(client, app): assert str(user_id) in data['shares'] -def get_bookmarks_count(client, user): - resp = client.get('/api/wire_search?bookmarks=%s' % str(user)) - assert resp.status_code == 200 +def get_bookmarks_count(client, user_id): + resp = client.get('/api/wire_search?bookmarks=%s' % str(user_id)) + assert resp.status_code == 200, f"Expected 200, got {resp.status_code}. Response: {resp.data}" data = json.loads(resp.get_data()) return data['_meta']['total'] @@ -73,6 +74,10 @@ def test_bookmarks(client, app): user_id = get_admin_user_id(app) assert user_id + with client.session_transaction() as session: + session['user'] = str(user_id) + session['user_type'] = 'administrator' + assert 0 == get_bookmarks_count(client, user_id) resp = client.post('/wire_bookmark', data=json.dumps({ @@ -82,7 +87,7 @@ def test_bookmarks(client, app): assert 1 == get_bookmarks_count(client, user_id) - client.delete('/wire_bookmark', data=json.dumps({ + resp = client.delete('/wire_bookmark', data=json.dumps({ 'items': [items[0]['_id']], }), content_type='application/json') assert resp.status_code == 200 @@ -109,24 +114,36 @@ def test_bookmarks_by_section(client, app): assert product_id == 1 with client.session_transaction() as session: - session['user'] = '59b4c5c61d41c8d736852fbf' - session['user_type'] = 'public' + session['user'] = str(ADMIN_USER_ID) + session['user_type'] = 'administrator' - assert 0 == get_bookmarks_count(client, PUBLIC_USER_ID) + with client.session_transaction() as session: + print(f"Session user: {session.get('user')}") + print(f"Session user type: {session.get('user_type')}") - resp = client.post('/wire_bookmark', data=json.dumps({ - 'items': [items[0]['_id']], - }), content_type='application/json') - assert resp.status_code == 200 + with client: + initial_count = get_bookmarks_count(client, ADMIN_USER_ID) + assert initial_count == 0, f"Expected 0 bookmarks, got {initial_count}" - assert 1 == get_bookmarks_count(client, PUBLIC_USER_ID) + resp = client.post('/wire_bookmark', + data=json.dumps({ + 'items': [items[0]['_id']], + }), + content_type='application/json') + assert resp.status_code == 200, f"Expected 200, got {resp.status_code}. Response: {resp.data}" - client.delete('/wire_bookmark', data=json.dumps({ - 'items': [items[0]['_id']], - }), content_type='application/json') - assert resp.status_code == 200 + new_count = get_bookmarks_count(client, ADMIN_USER_ID) + assert new_count == 1, f"Expected 1 bookmark, got {new_count}" - assert 0 == get_bookmarks_count(client, PUBLIC_USER_ID) + resp = client.delete('/wire_bookmark', + data=json.dumps({ + 'items': [items[0]['_id']], + }), + content_type='application/json') + assert resp.status_code == 200, f"Expected 200, got {resp.status_code}. Response: {resp.data}" + + final_count = get_bookmarks_count(client, ADMIN_USER_ID) + assert final_count == 0, f"Expected 0 bookmarks, got {final_count}" def test_item_copy(client, app): @@ -586,19 +603,26 @@ def test_search_by_products_and_filtered_by_embargoe(client, app): 'embargoed': (datetime.now() + timedelta(days=10)).replace(tzinfo=pytz.UTC), 'products': [{'code': '10'}] }]) - items = get_resource_service('wire_search').get_product_items(10, 20) - assert 0 == len(items) - # ex-embargoed item is fetched + mock_user = {'_id': 'test_user_id', 'user_type': 'administrator'} + + with patch('newsroom.wire.search.get_user') as mock_get_user: + mock_get_user.return_value = mock_user + items = get_resource_service('wire_search').get_product_items(10, 20) + assert 0 == len(items) + app.data.insert('items', [{ '_id': 'bar', 'headline': 'china story', 'embargoed': (datetime.now() - timedelta(days=10)).replace(tzinfo=pytz.UTC), 'products': [{'code': '10'}] }]) - items = get_resource_service('wire_search').get_product_items(10, 20) - assert 1 == len(items) - assert items[0]['headline'] == 'china story' + + with patch('newsroom.wire.search.get_user') as mock_get_user: + mock_get_user.return_value = mock_user + items = get_resource_service('wire_search').get_product_items(10, 20) + assert 1 == len(items) + assert items[0]['headline'] == 'china story' def test_wire_delete(client, app): @@ -666,4 +690,4 @@ def test_embed_mark_disable_download(client, app): resp = client.get('/wire/search?type=wire') data = json.loads(resp.get_data()) assert "data-disable-download" in data['_items'][0]['body_html'] - assert data['_items'][0]['body_html'].count("data-disable-download") == 1 + assert data['_items'][0]['body_html'].count("data-disable-download") == 2 diff --git a/webpack.config.js b/webpack.config.js index 86a166c7d..dec4e1238 100644 --- a/webpack.config.js +++ b/webpack.config.js @@ -1,5 +1,3 @@ -/* eslint-env node */ - const path = require('path'); const webpack = require('webpack'); const ManifestPlugin = require('webpack-manifest-plugin'); @@ -8,6 +6,7 @@ const NODE_MODULES = process.env.NODE_MODULES || 'node_modules'; module.exports = { entry: { newsroom_js: './assets/index.js', + newsroom_images: './assets/images.js', companies_js: './assets/companies/index.js', users_js: './assets/users/index.js', products_js: './assets/products/index.js', @@ -76,11 +75,26 @@ module.exports = { 'sass-loader', ], }, + { + test: /\.(png|jpe?g|gif|svg)$/i, + use: [ + { + loader: 'file-loader', + options: { + name: '[name].[hash].[ext]', + }, + }, + ], + }, + ] }, resolve: { - extensions: ['.js', '.jsx'], + extensions: ['.js', '.jsx', '.json', '.png', '.jpg', '.gif', '.svg'], modules: [path.resolve(__dirname, 'assets'), NODE_MODULES], + alias: { + assets: path.resolve(__dirname, 'assets'), + }, }, resolveLoader: { modules: [NODE_MODULES],