-
Notifications
You must be signed in to change notification settings - Fork 33
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Use frictionless framework v5 #69
Changes from 25 commits
bda364a
a8d8e09
5021f95
259016b
6e5a68a
d57a3cf
358bd0d
7c63f1a
57030c6
ac7bd2b
972d094
cfa307d
92af068
e602d62
cc3ff3d
5bbe208
f75f84f
d7df909
cfd4305
1a027f9
876ff6b
28608e9
ed56be1
8571b00
91ba8f5
754fa14
694d829
abd199b
c9a5d13
24f2f5e
9728099
8e84b50
70e7386
ae20876
829722f
8b4bf81
df38480
6fd1bc2
90dcdae
2152a7f
ee4beb2
2e4a9b9
9df3b3e
255b946
a067c5c
de8793a
372f40e
2cb684f
bf2a50f
7a58f7c
17c33ee
ca59320
4c63d6e
26f0203
e9575a1
cf90c6b
e774828
b33dd11
08af965
3da4388
c3b6807
fff8dc5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,7 +7,8 @@ | |
|
||
import requests | ||
from sqlalchemy.orm.exc import NoResultFound | ||
from goodtables import validate | ||
# from goodtables import validate | ||
from frictionless import validate, system, Report | ||
aivuk marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
from ckan.model import Session | ||
import ckan.lib.uploader as uploader | ||
|
@@ -22,7 +23,7 @@ | |
|
||
def run_validation_job(resource): | ||
|
||
log.debug(u'Validating resource %s', resource['id']) | ||
log.debug('Validating resource %s', resource['id']) | ||
|
||
try: | ||
validation = Session.query(Validation).filter( | ||
|
@@ -33,18 +34,18 @@ def run_validation_job(resource): | |
if not validation: | ||
validation = Validation(resource_id=resource['id']) | ||
|
||
validation.status = u'running' | ||
validation.status = 'running' | ||
Session.add(validation) | ||
Session.commit() | ||
|
||
options = t.config.get( | ||
u'ckanext.validation.default_validation_options') | ||
'ckanext.validation.default_validation_options') | ||
if options: | ||
options = json.loads(options) | ||
else: | ||
options = {} | ||
|
||
resource_options = resource.get(u'validation_options') | ||
resource_options = resource.get('validation_options') | ||
if resource_options and isinstance(resource_options, str): | ||
resource_options = json.loads(resource_options) | ||
if resource_options: | ||
|
@@ -54,54 +55,62 @@ def run_validation_job(resource): | |
{'ignore_auth': True}, {'id': resource['package_id']}) | ||
|
||
source = None | ||
if resource.get(u'url_type') == u'upload': | ||
if resource.get('url_type') == 'upload': | ||
upload = uploader.get_resource_uploader(resource) | ||
if isinstance(upload, uploader.ResourceUpload): | ||
source = upload.get_path(resource[u'id']) | ||
source = upload.get_path(resource['id']) | ||
else: | ||
# Upload is not the default implementation (ie it's a cloud storage | ||
# implementation) | ||
pass_auth_header = t.asbool( | ||
t.config.get(u'ckanext.validation.pass_auth_header', True)) | ||
if dataset[u'private'] and pass_auth_header: | ||
t.config.get('ckanext.validation.pass_auth_header', True)) | ||
if dataset['private'] and pass_auth_header: | ||
s = requests.Session() | ||
s.headers.update({ | ||
u'Authorization': t.config.get( | ||
u'ckanext.validation.pass_auth_header_value', | ||
'Authorization': t.config.get( | ||
'ckanext.validation.pass_auth_header_value', | ||
_get_site_user_api_key()) | ||
}) | ||
|
||
options[u'http_session'] = s | ||
options['http_session'] = s | ||
|
||
if not source: | ||
source = resource[u'url'] | ||
|
||
schema = resource.get(u'schema') | ||
if schema and isinstance(schema, str): | ||
if schema.startswith('http'): | ||
r = requests.get(schema) | ||
schema = r.json() | ||
source = resource['url'] | ||
|
||
schema = resource.get('schema') | ||
if schema: | ||
if isinstance(schema, str): | ||
if schema.startswith('http'): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So, if |
||
r = requests.get(schema) | ||
schema = r.json() | ||
else: | ||
schema = json.loads(schema) | ||
schema = json.dumps(schema) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This logic is wrong,
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It looks to me like this code is handling all of those cases, except that it will not consistently result in a dict.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I saw that the type o the schema parameter could be a str, but noticed now that if it's a str frictionless deals with it as a path to a schema file. You can't pass directly a dict to the validate function, but you do need to load it with Schema.from_descriptor. I fixed it in my last commit. |
||
|
||
_format = resource[u'format'].lower() | ||
_format = resource['format'].lower() | ||
|
||
report = _validate_table(source, _format=_format, schema=schema, **options) | ||
|
||
# Hide uploaded files | ||
for table in report.get('tables', []): | ||
if table['source'].startswith('/'): | ||
table['source'] = resource['url'] | ||
for index, warning in enumerate(report.get('warnings', [])): | ||
report['warnings'][index] = re.sub(r'Table ".*"', 'Table', warning) | ||
|
||
if report['table-count'] > 0: | ||
validation.status = u'success' if report[u'valid'] else u'failure' | ||
validation.report = report | ||
if type(report) == Report: | ||
report = report.to_dict() | ||
if 'tables' in report: | ||
for table in report['tables']: | ||
if table['source'].startswith('/'): | ||
table['source'] = resource['url'] | ||
if 'warnings' in report: | ||
for index, warning in enumerate(report['warnings']): | ||
report['warnings'][index] = re.sub(r'Table ".*"', 'Table', warning) | ||
if 'valid' in report and report['valid']: | ||
validation.status = 'success' if report['valid'] else 'failure' | ||
validation.report = json.dumps(report) | ||
else: | ||
validation.status = u'error' | ||
validation.error = { | ||
'message': '\n'.join(report['warnings']) or u'No tables found'} | ||
validation.status = 'error' | ||
validation.report = json.dumps(report) | ||
if 'tables' in report: | ||
validation.error = { | ||
'message': [str(err) for err in report['tables'][0]['errors']] if len(report['tables'][0]['errors']) > 0 else 'No tables found'} | ||
else: | ||
validation.error = {'message': []} | ||
aivuk marked this conversation as resolved.
Show resolved
Hide resolved
|
||
validation.finished = datetime.datetime.utcnow() | ||
|
||
Session.add(validation) | ||
|
@@ -117,18 +126,21 @@ def run_validation_job(resource): | |
'validation_timestamp': validation.finished.isoformat()}) | ||
|
||
|
||
def _validate_table(source, _format=u'csv', schema=None, **options): | ||
def _validate_table(source, _format='csv', schema=None, **options): | ||
|
||
frictionless_context = { 'trusted': True } | ||
aivuk marked this conversation as resolved.
Show resolved
Hide resolved
|
||
http_session = options.pop('http_session', None) or requests.Session() | ||
|
||
use_proxy = 'ckan.download_proxy' in t.config | ||
if use_proxy: | ||
proxy = t.config.get('ckan.download_proxy') | ||
log.debug(u'Download resource for validation via proxy: %s', proxy) | ||
log.debug('Download resource for validation via proxy: %s', proxy) | ||
http_session.proxies.update({'http': proxy, 'https': proxy}) | ||
report = validate(source, format=_format, schema=schema, http_session=http_session, **options) | ||
frictionless_context['http_session'] = http_session | ||
aivuk marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
log.debug(u'Validating source: %s', source) | ||
with system.use_context(**frictionless_context): | ||
report = validate(source, format=_format, schema=schema, **options) | ||
log.debug('Validating source: %s', source) | ||
|
||
return report | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -656,26 +656,31 @@ def _run_sync_validation(resource_id, local_upload=False, new_resource=True): | |
{u'ignore_auth': True}, | ||
{u'resource_id': resource_id}) | ||
|
||
report = validation['report'] | ||
if validation['report']: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there an instance when |
||
report = json.loads(validation['report']) | ||
|
||
if not report['valid']: | ||
if not report['valid']: | ||
|
||
# Delete validation object | ||
t.get_action(u'resource_validation_delete')( | ||
{u'ignore_auth': True}, | ||
{u'resource_id': resource_id} | ||
) | ||
# Delete validation object | ||
t.get_action(u'resource_validation_delete')( | ||
{u'ignore_auth': True}, | ||
{u'resource_id': resource_id} | ||
) | ||
|
||
# Delete uploaded file | ||
if local_upload: | ||
delete_local_uploaded_file(resource_id) | ||
# Delete uploaded file | ||
if local_upload: | ||
delete_local_uploaded_file(resource_id) | ||
|
||
if new_resource: | ||
# Delete resource | ||
t.get_action(u'resource_delete')( | ||
{u'ignore_auth': True, 'user': None}, | ||
{u'id': resource_id} | ||
) | ||
if new_resource: | ||
# Delete resource | ||
t.get_action(u'resource_delete')( | ||
{u'ignore_auth': True, 'user': None}, | ||
{u'id': resource_id} | ||
) | ||
|
||
raise t.ValidationError({ | ||
u'validation': [report]}) | ||
else: | ||
raise t.ValidationError({ | ||
u'validation': [report]}) | ||
'validation': [] | ||
}) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,6 @@ | ||
{% import 'macros/form.html' as form %} | ||
|
||
{% set value = data[field.field_name] %} | ||
{% set is_url = value and value[4:]|lower == 'http' %} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is still used below right? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ping, I think this line should stay |
||
{% set is_json = not is_url and value %} | ||
|
||
<div class="image-upload" | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This flag is concerning. Why can't conflicts be resolved? There should at least be a comment to explain why this is being done and what would be needed to address it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@ThrawnCA this will be solved with upcoming requirement changes in frictionless-py. We just added this to not block the tests on this branch.
@aivuk would be good to sort this out on frictionless-py. IIRC it's just loosening up the requirements