diff --git a/ocd_backend/extractors/ibabs.py b/ocd_backend/extractors/ibabs.py index a510724f..0dba9b54 100644 --- a/ocd_backend/extractors/ibabs.py +++ b/ocd_backend/extractors/ibabs.py @@ -4,6 +4,7 @@ from collections import OrderedDict from hashlib import sha1 +import redis from zeep.client import Client, Settings from zeep.exceptions import Error from zeep.helpers import serialize_object @@ -16,6 +17,8 @@ from ocd_backend.utils.ibabs import ( meeting_to_dict, list_entry_response_to_dict, votes_to_dict) from ocd_backend.utils.misc import json_encoder +from ocd_backend.settings import SOURCES_CONFIG_FILE, \ + DEFAULT_INDEX_PREFIX, DUMPS_DIR, REDIS_HOST, REDIS_PORT log = get_source_logger('extractor') @@ -51,6 +54,12 @@ def __init__(self, *args, **kwargs): except Error as e: log.error(f'Unable to instantiate iBabs client: {str(e)}') + try: + self.redis_client = redis.StrictRedis( + host=REDIS_HOST, port=REDIS_PORT, db=1, decode_responses=True) + except Exception as e: + log.error(f'Unable to instantiate Redis client: {str(e)}') + def _make_hash(self, report_dict): """ Make a hash value for a dict. This can be usedc to compare dicts to an @@ -61,6 +70,14 @@ def _make_hash(self, report_dict): h.update(json_encoder.encode(ordered_report_dict).encode('ascii')) return h.hexdigest() + def check_if_most_recent(self, site_name, id, report_dict): + redis_key = "%s|%s" % (site_name, id,) + old_hash = self.redis_client.get(redis_key) + new_hash = self._make_hash(report_dict) + if old_hash != new_hash: + self.redis_client.set(redis_key, new_hash) + return (old_hash != new_hash) + class IBabsCommitteesExtractor(IBabsBaseExtractor): """ @@ -193,11 +210,13 @@ def run(self): continue meeting_dict['Meetingtype'] = meeting_types[meeting_dict['MeetingtypeId']] - yield 'application/json', \ - json.dumps(meeting_dict), \ - None, \ - 'ibabs/' + cached_path, - + if self.check_if_most_recent(self.source_definition['ibabs_sitename'], meeting['Id'], meeting_dict): + yield 'application/json', \ + json.dumps(meeting_dict), \ + None, \ + 'ibabs/' + cached_path, + else: + log.info('Skipped meeting %s because we have it already' % (meeting['Id'],)) meeting_count += 1 log.info(f'[{self.source_definition["key"]}] Extracted total of {meeting_count} ibabs meetings. Also skipped {meetings_skipped} meetings.') @@ -311,8 +330,13 @@ def run(self): continue log.info(self._make_hash(report_dict)) + is_newer = self.check_if_most_recent( + self.source_definition['ibabs_sitename'], item['id'], report_dict) # identifier = item['id'][0] - yield 'application/json', json_encoder.encode(report_dict), None, 'ibabs/' + cached_path + if is_newer: + yield 'application/json', json_encoder.encode(report_dict), None, 'ibabs/' + cached_path + else: + log.info('Skipped %s because we already have that one in this version' % (item['id'],)) yield_count += 1 total_yield_count += 1 result_count += 1