Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add option keep-current to clearsource_history command #484

Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Rename keep-actual to keep-current and updated documentation
  • Loading branch information
seitenbau-govdata committed Dec 22, 2021
commit d2b7340509db9b0989d129034443328090816e6e
4 changes: 3 additions & 1 deletion README.rst
Original file line number Diff line number Diff line change
@@ -242,14 +242,16 @@ The following operations can be run from the command line as described underneat
- clears all datasets, jobs and objects related to a harvest source,
but keeps the source itself

harvester clearsource-history [{source-id}]
harvester clearsource-history [{source-id}] [-k]
- If no source id is given the history for all harvest sources (maximum is 1000)
will be cleared.
Clears all jobs and objects related to a harvest source, but keeps the source
itself. The datasets imported from the harvest source will **NOT** be deleted!!!
If a source id is given, it only clears the history of the harvest source with
the given source id.

To keep the currently active jobs use the -k option.

harvester sources [all]
- lists harvest sources
If 'all' is defined, it also shows the Inactive sources
6 changes: 3 additions & 3 deletions ckanext/harvest/cli.py
Original file line number Diff line number Diff line change
@@ -110,11 +110,11 @@ def clear(ctx, id):
@click.argument(u"id", metavar=u"SOURCE_ID_OR_NAME", required=False)
@click.option(
"-k",
"--keep-actual",
"--keep-current",
default=False
)
@click.pass_context
def clear_history(ctx, id, keep_actual):
def clear_history(ctx, id, keep_current):
"""If no source id is given the history for all harvest sources
(maximum is 1000) will be cleared.

@@ -127,7 +127,7 @@ def clear_history(ctx, id, keep_actual):
flask_app = ctx.meta["flask_app"]

with flask_app.test_request_context():
result = utils.clear_harvest_source_history(id, bool(keep_actual))
result = utils.clear_harvest_source_history(id, bool(keep_current))
click.secho(result, fg="green")


12 changes: 7 additions & 5 deletions ckanext/harvest/commands/harvester.py
Original file line number Diff line number Diff line change
@@ -34,12 +34,14 @@ class Harvester(CkanCommand):
- clears all datasets, jobs and objects related to a harvest source,
but keeps the source itself

harvester clearsource_history [{source-id}]
harvester clearsource_history [{source-id}] [-k]
- If no source id is given the history for all harvest sources (maximum is 1000) will be cleared.
Clears all jobs and objects related to a harvest source, but keeps the source itself.
The datasets imported from the harvest source will NOT be deleted!!!
If a source id is given, it only clears the history of the harvest source with the given source id.

To keep the currently active jobs use the -k option.

harvester sources [all]
- lists harvest sources
If 'all' is defined, it also shows the Inactive sources
@@ -192,8 +194,8 @@ def __init__(self, name):

self.parser.add_option(
"-k",
"--keep-actual",
dest="keep_actual",
"--keep-current",
dest="keep_current",
default=False,
help="Do not delete relevant harvest objects",
)
@@ -324,12 +326,12 @@ def create_harvest_source(self):
print(result)

def clear_harvest_source_history(self):
keep_actual = bool(self.options.keep_actual)
keep_current = bool(self.options.keep_current)
source_id = None
if len(self.args) >= 2:
source_id = six.text_type(self.args[1])

print(utils.clear_harvest_source_history(source_id, keep_actual))
print(utils.clear_harvest_source_history(source_id, keep_current))

def show_harvest_source(self):

8 changes: 4 additions & 4 deletions ckanext/harvest/logic/action/update.py
Original file line number Diff line number Diff line change
@@ -322,7 +322,7 @@ def harvest_sources_job_history_clear(context, data_dict):
'''
check_access('harvest_sources_clear', context, data_dict)

keep_actual = data_dict.get('keep_actual', False)
keep_current = data_dict.get('keep_current', False)

job_history_clear_results = []
# We assume that the maximum of 1000 (hard limit) rows should be enough
@@ -332,7 +332,7 @@ def harvest_sources_job_history_clear(context, data_dict):
for data_dict in harvest_packages:
try:
clear_result = get_action('harvest_source_job_history_clear')(
context, {'id': data_dict['id'], 'keep_actual': keep_actual})
context, {'id': data_dict['id'], 'keep_current': keep_current})
job_history_clear_results.append(clear_result)
except NotFound:
# Ignoring not existent harvest sources because of a possibly corrupt search index
@@ -355,7 +355,7 @@ def harvest_source_job_history_clear(context, data_dict):
check_access('harvest_source_clear', context, data_dict)

harvest_source_id = data_dict.get('id', None)
keep_actual = data_dict.get('keep_actual', False)
keep_current = data_dict.get('keep_current', False)

source = HarvestSource.get(harvest_source_id)
if not source:
@@ -366,7 +366,7 @@ def harvest_source_job_history_clear(context, data_dict):

model = context['model']

if keep_actual:
if keep_current:
sql = '''BEGIN;
DELETE FROM harvest_object_error WHERE harvest_object_id
IN (SELECT id FROM harvest_object AS obj WHERE harvest_source_id = '{harvest_source_id}'
16 changes: 8 additions & 8 deletions ckanext/harvest/tests/test_action.py
Original file line number Diff line number Diff line change
@@ -328,7 +328,7 @@ def test_harvest_sources_job_history_clear(self):
assert dataset_from_db_2
assert dataset_from_db_2.id == dataset_2['id']

def test_harvest_sources_job_history_clear_keep_actual(self):
def test_harvest_sources_job_history_clear_keep_current(self):
# prepare
data_dict = SOURCE_DICT.copy()
source_1 = factories.HarvestSourceObj(**data_dict)
@@ -354,7 +354,7 @@ def test_harvest_sources_job_history_clear_keep_actual(self):
context = {'model': model, 'session': model.Session,
'ignore_auth': True, 'user': ''}
result = get_action('harvest_sources_job_history_clear')(
context, {'keep_actual': True})
context, {'keep_current': True})

# verify
assert sorted(result, key=lambda item: item['id']) == sorted(
@@ -373,7 +373,7 @@ def test_harvest_sources_job_history_clear_keep_actual(self):
assert not harvest_model.HarvestJob.get(job_2.id)
assert not harvest_model.HarvestObject.get(object_2_.id)

def test_harvest_source_job_history_clear_keep_actual(self):
def test_harvest_source_job_history_clear_keep_current(self):
# prepare
source = factories.HarvestSourceObj(**SOURCE_DICT.copy())
job = factories.HarvestJobObj(source=source)
@@ -398,7 +398,7 @@ def test_harvest_source_job_history_clear_keep_actual(self):
context = {'model': model, 'session': model.Session,
'ignore_auth': True, 'user': ''}
result = get_action('harvest_source_job_history_clear')(
context, {'id': source.id, 'keep_actual': True})
context, {'id': source.id, 'keep_current': True})

# verify
assert result == {'id': source.id}
@@ -416,7 +416,7 @@ def test_harvest_source_job_history_clear_keep_actual(self):
assert dataset_from_db_2
assert dataset_from_db_2.id == dataset2['id']

def test_harvest_source_job_history_clear_keep_actual_finished_jobs(self):
def test_harvest_source_job_history_clear_keep_current_finished_jobs(self):
# prepare
source = factories.HarvestSourceObj(**SOURCE_DICT.copy())
job = factories.HarvestJobObj(source=source)
@@ -440,7 +440,7 @@ def test_harvest_source_job_history_clear_keep_actual_finished_jobs(self):
context = {'model': model, 'session': model.Session,
'ignore_auth': True, 'user': ''}
result = get_action('harvest_source_job_history_clear')(
context, {'id': source.id, 'keep_actual': True})
context, {'id': source.id, 'keep_current': True})

# verify
assert result == {'id': source.id}
@@ -457,7 +457,7 @@ def test_harvest_source_job_history_clear_keep_actual_finished_jobs(self):
assert dataset_from_db_2
assert dataset_from_db_2.id == dataset2['id']

def test_harvest_source_job_history_clear_keep_actual_running_job(self):
def test_harvest_source_job_history_clear_keep_current_running_job(self):
# Both jobs contain current objects
# prepare
source = factories.HarvestSourceObj(**SOURCE_DICT.copy())
@@ -489,7 +489,7 @@ def test_harvest_source_job_history_clear_keep_actual_running_job(self):
context = {'model': model, 'session': model.Session,
'ignore_auth': True, 'user': ''}
result = get_action('harvest_source_job_history_clear')(
context, {'id': source.id, 'keep_actual': True})
context, {'id': source.id, 'keep_current': True})

# verify that both jobs still exists
assert result == {'id': source.id}
8 changes: 4 additions & 4 deletions ckanext/harvest/utils.py
Original file line number Diff line number Diff line change
@@ -206,7 +206,7 @@ def clear_harvest_source(source_id_or_name):
tk.get_action("harvest_source_clear")(context, {"id": source["id"]})


def clear_harvest_source_history(source_id, keep_actual):
def clear_harvest_source_history(source_id, keep_current):

context = {
"model": model,
@@ -216,17 +216,17 @@ def clear_harvest_source_history(source_id, keep_actual):
if source_id is not None:
tk.get_action("harvest_source_job_history_clear")(context, {
"id": source_id,
"keep_actual": keep_actual
"keep_current": keep_current
})
return "Cleared job history of harvest source: {0}".format(source_id)
else:
# Purge queues, because we clean all harvest jobs and
# objects in the database.
if not keep_actual:
if not keep_current:
purge_queues()
cleared_sources_dicts = tk.get_action(
"harvest_sources_job_history_clear")(context, {
"keep_actual": keep_actual
"keep_current": keep_current
})
return "Cleared job history for all harvest sources: {0} source(s)".format(
len(cleared_sources_dicts))