Skip to content

Commit

Permalink
[SE-3520] Fixes Transcripts Incompletely Uploaded to S3 Bucket (#266)
Browse files Browse the repository at this point in the history
* Encodes File Content before Passing to for Upload

boto/boto#2868

* Adds unit test to ensure transcript content file is utf-8 encoded

* Updates Version in python setup file to v1.4.3
  • Loading branch information
nizarmah authored Nov 5, 2020
1 parent fd7f90c commit dd8b424
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 3 deletions.
2 changes: 1 addition & 1 deletion edxval/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -1194,7 +1194,7 @@ def import_transcript_from_fs(edx_video_id, language_code, file_name, provider,
video_id=edx_video_id,
language_code=language_code,
file_format=file_format,
content=ContentFile(file_content),
content=ContentFile(file_content.encode('utf-8')),
provider=provider
)

Expand Down
42 changes: 41 additions & 1 deletion edxval/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from io import open
from tempfile import mkdtemp

import chardet
import mock
from ddt import data, ddt, unpack
from django.conf import settings
Expand All @@ -24,7 +25,7 @@
from fs.osfs import OSFS
from fs.path import combine
from lxml import etree
from mock import patch
from mock import Mock, patch
from rest_framework import status

from edxval import api, utils
Expand Down Expand Up @@ -1895,6 +1896,45 @@ def test_import_transcript_from_fs_resource_not_found(self, mock_logger):
edx_video_id
)

@patch('edxval.api.create_video_transcript')
@patch('edxval.api.get_transcript_format', Mock())
def test_import_transcript_from_fs_created_transcript_content_encoding(self, mock_create_video_transcript):
"""
Test that `import_transcript_from_fs` correctly calls `create_video_transcript` with `utf-8` file content.
"""
language_code = 'en'
edx_video_id = constants.VIDEO_DICT_FISH['edx_video_id']

# First create utf-8 encoded transcript file in the file system.
# Make sure to include utf-8 characters to chardet recognizes it is utf-8 and not ascii
transcript_file_name = 'transcript.txt'
video_transcript = dict(
constants.VIDEO_TRANSCRIPT_CUSTOM_SJSON,
video_id=edx_video_id,
file_data='Hello, edX greets you. random utf-8 characters: éâô'
)

utils.create_file_in_fs(
video_transcript['file_data'],
transcript_file_name,
self.file_system,
constants.EXPORT_IMPORT_STATIC_DIR
)

api.import_transcript_from_fs(
edx_video_id=edx_video_id,
language_code=language_code,
file_name=transcript_file_name,
provider=TranscriptProviderType.CUSTOM,
resource_fs=self.file_system,
static_dir=constants.EXPORT_IMPORT_STATIC_DIR
)

transcript_content = mock_create_video_transcript.call_args.kwargs['content']
content_encoding = chardet.detect(transcript_content.read())['encoding']

self.assertEqual(content_encoding, 'utf-8')

@patch('edxval.api.logger')
def test_import_transcript_from_fs_invalid_format(self, mock_logger):
"""
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def load_requirements(*requirements_paths):
return list(requirements)


VERSION = '1.4.2'
VERSION = '1.4.3'

if sys.argv[-1] == 'tag':
print("Tagging the version on github:")
Expand Down

0 comments on commit dd8b424

Please sign in to comment.