diff --git a/.travis.yml b/.travis.yml index 9d68bdff..50336ab6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,10 +12,10 @@ env: - db_user=postgres addons: - postgresql: '9.4' + postgresql: '9.6' apt: packages: - - postgresql-9.4-postgis-2.3 + - postgresql-9.6-postgis-2.3 before_script: - psql -U postgres -c "create extension postgis" @@ -25,8 +25,8 @@ install: - pip install -r tests/requirements.txt --upgrade - pip install -e . -sudo: required -dist: trusty +sudo: required +dist: trusty group: deprecated-2017Q4 script: diff --git a/README.rst b/README.rst index ce0e64e6..603df370 100644 --- a/README.rst +++ b/README.rst @@ -88,15 +88,6 @@ Run the management command to update the test fixture. Run the tests and commit your updated fixture with your PR! -Team ----- - -- Forest Gregg, DataMade - Open Civic Data (OCD) and Legistar scraping -- Cathy Deng, DataMade - data models and loading -- Derek Eder, DataMade - front end -- Eric van Zanten, DataMade - search and dev ops - - Patches and Contributions ------------------------- We continue to improve django-councilmatic, and we welcome your ideas! You can make suggestions in the form of `github issues `_ (bug reports, feature requests, general questions), or you can submit a code contribution via a pull request. diff --git a/councilmatic_core/haystack_indexes.py b/councilmatic_core/haystack_indexes.py index 46e69c05..f7cb052a 100644 --- a/councilmatic_core/haystack_indexes.py +++ b/councilmatic_core/haystack_indexes.py @@ -65,3 +65,9 @@ def prepare_ocr_full_text(self, obj): def get_updated_field(self): return 'updated_at' + + def prepare_last_action_date(self, obj): + # Solr seems to be fussy about the time format, and we do not need the time, just the date stamp. + # https://lucene.apache.org/solr/guide/7_5/working-with-dates.html#date-formatting + if obj.last_action_date: + return obj.last_action_date.date() diff --git a/councilmatic_core/management/commands/convert_attachment_text.py b/councilmatic_core/management/commands/convert_attachment_text.py index 2a11bab2..1bb9fc2f 100644 --- a/councilmatic_core/management/commands/convert_attachment_text.py +++ b/councilmatic_core/management/commands/convert_attachment_text.py @@ -1,27 +1,24 @@ -import os +import itertools import logging import logging.config -import sqlalchemy as sa +import os import requests import tempfile -import itertools +import tqdm -from django.core.management.base import BaseCommand from django.conf import settings +from django.core.management.base import BaseCommand +from django.db import connection from django.db.models import Max, Q -from opencivicdata.legislative.models import BillDocumentLink -from councilmatic_core.models import BillDocument +from opencivicdata.legislative.models import BillDocumentLink, BillDocument + +# Configure logging logging.config.dictConfig(settings.LOGGING) logging.getLogger("requests").setLevel(logging.WARNING) logger = logging.getLogger(__name__) -DB_CONN = 'postgresql://{USER}:{PASSWORD}@{HOST}:{PORT}/{NAME}' - -engine = sa.create_engine(DB_CONN.format(**settings.DATABASES['default']), - convert_unicode=True, - server_side_cursors=True) class Command(BaseCommand): help = 'Converts bill attachments into plain text' @@ -38,17 +35,23 @@ def handle(self, *args, **options): self.add_plain_text() def get_document_url(self): - # Only apply this query to most recently updated (or created) bill documents. + ''' + By default, convert text for recently updated files, or files that + do not have attachment text. Otherwise, convert text for all files. + ''' max_updated = BillDocument.objects.all().aggregate(max_updated_at=Max('bill__updated_at'))['max_updated_at'] - is_null = Q(document__councilmatic_document__full_text__isnull=True) is_file = Q(url__iendswith='pdf') | Q(url__iendswith='docx') | Q(url__iendswith='docx') - after_max_update = Q(document__bill__updated_at__gt=max_updated) + is_null = Q(document__extras__full_text__isnull=True) + after_max_update = Q(document__bill__updated_at__gte=max_updated) if max_updated is None or self.update_all: - qs = BillDocumentLink.objects.filter(is_null & is_file) + qs = BillDocumentLink.objects.filter(is_file) else: - qs = BillDocumentLink.objects.filter(is_null & is_file & after_max_update) + # Always try to convert null files, because files may have failed + # in a reparable manner, e.g., Legistar server errors, during a + # previous conversion. + qs = BillDocumentLink.objects.filter(is_file & (after_max_update | is_null)) for item in qs: yield item.url, item.document.id @@ -58,15 +61,20 @@ def convert_document_to_plaintext(self): # installing it, import the library here. import textract - for document_data in self.get_document_url(): - document_data = dict(document_data) - url = document_data['url'] - document_id = document_data['id'] - response = requests.get(url) - # Sometimes, Metro Legistar has a URL that retuns a bad status code (e.g., 404 from http://metro.legistar1.com/metro/attachments/95d5007e-720b-4cdd-9494-c800392b9265.pdf). + for url, document_id in tqdm.tqdm(self.get_document_url()): + try: + response = requests.get(url) + except (requests.exceptions.Timeout, requests.exceptions.ConnectionError): + # Don't fail due to server errors, as these tend to resolve themselves. + # https://requests.readthedocs.io/en/master/user/quickstart/#errors-and-exceptions + logger.warning('Document URL {} raised a server error - Could not get attachment text!'.format(url)) + continue + + # Sometimes, Metro Legistar has a URL that retuns a bad status code, + # e.g., 404 from http://metro.legistar1.com/metro/attachments/95d5007e-720b-4cdd-9494-c800392b9265.pdf. # Skip these documents. if response.status_code != 200: - logger.error('Document URL {} returns {} - Could not get attachment text!'.format(url, response.status_code)) + logger.warning('Document URL {} returns {} - Could not get attachment text!'.format(url, response.status_code)) continue extension = os.path.splitext(url)[1] @@ -77,27 +85,37 @@ def convert_document_to_plaintext(self): try: plain_text = textract.process(tfp.name) except textract.exceptions.ShellError as e: - logger.error('{} - Could not convert Councilmatic Document ID {}!'.format(e, document_id)) + logger.warning('{} - Could not convert Councilmatic Document ID {}!'.format(e, document_id)) + continue + except TypeError as e: + if 'decode() argument 1 must be str, not None' in str(e): + logger.warning('{} - Could not convert Councilmatic Document ID {}!'.format(e, document_id)) + continue + else: + raise + except UnicodeDecodeError as e: + logger.warning('{} - Could not convert Councilmatic Document ID {}!'.format(e, document_id)) continue logger.info('Councilmatic Document ID {} - conversion complete'.format(document_id)) - yield {'plain_text': plain_text.decode('utf-8'), 'id': document_id} + yield (plain_text.decode('utf-8'), document_id) def add_plain_text(self): ''' - Metro has over 2,000 attachments that should be converted into plain text. - When updating all documents with `--update_all`, this function insures that the database updates only 20 documents per connection (mainly, to avoid unexpected memory consumption). - It fetches up to 20 elements from a generator object, runs the UPDATE query, and then fetches up to 20 more. - - Inspired by: https://stackoverflow.com/questions/30510593/how-can-i-use-server-side-cursors-with-django-and-psycopg2/41088159#41088159 - - More often, this script updates just a handful of documents: so, the incremental, fetch-just-20 approach may prove unnecessary. Possible refactor? + Metro has over 2,000 attachments that should be converted into plain + text. When updating all documents with `--update_all`, this function + ensures that the database updates only 20 documents per connection + (mainly, to avoid unexpected memory consumption). It fetches up to 20 + elements from a generator object, runs the UPDATE query, and then + fetches up to 20 more. + + Inspired by https://stackoverflow.com/questions/30510593/how-can-i-use-server-side-cursors-with-django-and-psycopg2/41088159#41088159 ''' update_statement = ''' - UPDATE councilmatic_core_billdocument AS bill_docs - SET full_text = :plain_text - WHERE bill_docs.document_id = :id + UPDATE opencivicdata_billdocument AS bill_docs + SET extras = jsonb_set(extras, '{full_text}', to_jsonb(cast(%s as text))) + WHERE bill_docs.id = %s ''' plaintexts = self.convert_document_to_plaintext() @@ -109,7 +127,7 @@ def add_plain_text(self): if not plaintexts_fetched_from_generator: break else: - with engine.begin() as connection: - connection.execute(sa.text(update_statement), plaintexts_fetched_from_generator) + with connection.cursor() as cursor: + cursor.executemany(update_statement, plaintexts_fetched_from_generator) logger.info('SUCCESS') diff --git a/councilmatic_core/migrations/0049_auto_20191114_1142.py b/councilmatic_core/migrations/0049_auto_20191114_1142.py new file mode 100644 index 00000000..a980e54b --- /dev/null +++ b/councilmatic_core/migrations/0049_auto_20191114_1142.py @@ -0,0 +1,37 @@ +# Generated by Django 2.1.14 on 2019-11-14 19:42 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('councilmatic_core', '0048_post_shape'), + ] + + operations = [ + migrations.AlterModelOptions( + name='membership', + options={'base_manager_name': 'objects'}, + ), + migrations.AlterField( + model_name='bill', + name='slug', + field=models.SlugField(unique=True), + ), + migrations.AlterField( + model_name='event', + name='slug', + field=models.SlugField(max_length=200, unique=True), + ), + migrations.AlterField( + model_name='organization', + name='slug', + field=models.SlugField(max_length=200, unique=True), + ), + migrations.AlterField( + model_name='person', + name='slug', + field=models.SlugField(unique=True), + ), + ] diff --git a/councilmatic_core/migrations/0050_remove_billdocument.py b/councilmatic_core/migrations/0050_remove_billdocument.py new file mode 100644 index 00000000..2496c04b --- /dev/null +++ b/councilmatic_core/migrations/0050_remove_billdocument.py @@ -0,0 +1,16 @@ +# Generated by Django 2.2.9 on 2020-01-17 21:30 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('councilmatic_core', '0049_auto_20191114_1142'), + ] + + operations = [ + migrations.DeleteModel( + name='BillDocument', + ), + ] diff --git a/councilmatic_core/migrations/0051_bill_last_action_date.py b/councilmatic_core/migrations/0051_bill_last_action_date.py new file mode 100644 index 00000000..8bb7ff9e --- /dev/null +++ b/councilmatic_core/migrations/0051_bill_last_action_date.py @@ -0,0 +1,18 @@ +# Generated by Django 2.2.9 on 2020-01-30 19:03 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('councilmatic_core', '0050_remove_billdocument'), + ] + + operations = [ + migrations.AddField( + model_name='bill', + name='last_action_date', + field=models.DateTimeField(blank=True, null=True), + ), + ] diff --git a/councilmatic_core/models.py b/councilmatic_core/models.py index 1207ee7b..24aba3e0 100644 --- a/councilmatic_core/models.py +++ b/councilmatic_core/models.py @@ -7,7 +7,7 @@ from django.urls import reverse, NoReverseMatch from django.utils import timezone from django.db.models import Case, When -from django.db.models.functions import Cast +from django.db.models.functions import Cast, Now from django.utils.functional import cached_property from django.core.files.storage import FileSystemStorage @@ -22,7 +22,9 @@ class CastToDateTimeMixin: - def cast_to_datetime(self, field): + + @classmethod + def cast_to_datetime(cls, field): """ Cast a given field from a CharField to a DateTimeField, converting empty strings to NULL in the process. Useful for CharFields that store timestamps @@ -38,19 +40,7 @@ def cast_to_datetime(self, field): ) -class PersonManager(models.Manager): - def get_queryset(self, *args, **kwargs): - from django.db.models import Prefetch - - qs = super().get_queryset(*args, **kwargs) - - return qs.prefetch_related( - Prefetch('memberships', Membership.objects.filter(person__in=qs)) - ) - - class Person(opencivicdata.core.models.Person): - objects = PersonManager() person = models.OneToOneField(opencivicdata.core.models.Person, on_delete=models.CASCADE, @@ -61,7 +51,7 @@ class Person(opencivicdata.core.models.Person): storage=static_storage, default='images/headshot_placeholder.png') - slug = models.SlugField() + slug = models.SlugField(unique=True) def delete(self, **kwargs): kwargs['keep_parents'] = kwargs.get('keep_parents', True) @@ -70,6 +60,10 @@ def delete(self, **kwargs): def __str__(self): return self.name + @property + def current_memberships(self): + return self.memberships.filter(end_date_dt__gt=Now()) + @property def latest_council_seat(self): m = self.latest_council_membership @@ -77,9 +71,6 @@ def latest_council_seat(self): return m.post.label return '' - def is_speaker(self): - return True if self.memberships.filter(role='Speaker').first() else False - @property def headshot_source(self): sources = self.sources.filter(url=self.headshot.url) @@ -106,14 +97,14 @@ def sponsorship_sort(sponsorship): @property def chair_role_memberships(self): if hasattr(settings, 'COMMITTEE_CHAIR_TITLE'): - return self.memberships.filter(role=settings.COMMITTEE_CHAIR_TITLE).filter(end_date_dt__gt=timezone.now()) + return self.current_memberships.filter(role=settings.COMMITTEE_CHAIR_TITLE) else: return [] @property def member_role_memberships(self): if hasattr(settings, 'COMMITTEE_MEMBER_TITLE'): - return self.memberships.filter(role=settings.COMMITTEE_MEMBER_TITLE).filter(end_date_dt__gt=timezone.now()) + return self.current_memberships.filter(role=settings.COMMITTEE_MEMBER_TITLE) else: return [] @@ -132,22 +123,21 @@ def latest_council_membership(self): def current_council_seat(self): m = self.latest_council_membership if m and m.end_date_dt > timezone.now(): - return m.post.label - return '' + return m @property def link_html(self): return "{}".format(reverse('person', args=[self.slug]), self.name) -class Organization(opencivicdata.core.models.Organization): +class Organization(opencivicdata.core.models.Organization, CastToDateTimeMixin): organization = models.OneToOneField(opencivicdata.core.models.Organization, on_delete=models.CASCADE, related_name='councilmatic_organization', parent_link=True) - slug = models.SlugField(max_length=200) + slug = models.SlugField(max_length=200, unique=True) def delete(self, **kwargs): kwargs['keep_parents'] = kwargs.get('keep_parents', True) @@ -161,8 +151,11 @@ def committees(cls): """ grabs all organizations (1) classified as a committee & (2) with at least one member """ - return [o for o in cls.objects.filter(classification='committee') - if any([m.end_date_dt > timezone.now() for m in o.memberships.all()])] + return cls.objects\ + .filter(classification='committee')\ + .annotate(memberships_end_date_dt=cls.cast_to_datetime('memberships__end_date'))\ + .filter(memberships_end_date_dt__gte=Now())\ + .distinct() @property def recent_activity(self): @@ -209,10 +202,7 @@ def non_chair_members(self): @property def all_members(self): - if hasattr(settings, 'COMMITTEE_MEMBER_TITLE'): - return self.memberships.filter(end_date_dt__gt=timezone.now()) - else: - return [] + return self.memberships.filter(end_date_dt__gt=timezone.now()) @property def vice_chairs(self): @@ -280,9 +270,10 @@ def get_queryset(self): ) -class Membership(opencivicdata.core.models.Membership): +class Membership(opencivicdata.core.models.Membership, CastToDateTimeMixin): class Meta: proxy = True + base_manager_name = 'objects' objects = MembershipManager() @@ -327,7 +318,7 @@ class Event(opencivicdata.legislative.models.Event): related_name='councilmatic_event', parent_link=True) - slug = models.SlugField(max_length=200) + slug = models.SlugField(max_length=200, unique=True) def delete(self, **kwargs): kwargs['keep_parents'] = kwargs.get('keep_parents', True) @@ -391,8 +382,9 @@ class Bill(opencivicdata.legislative.models.Bill): related_name='councilmatic_bill', parent_link=True) - slug = models.SlugField() + slug = models.SlugField(unique=True) restrict_view = models.BooleanField(default=False) + last_action_date = models.DateTimeField(blank=True, null=True) def delete(self, **kwargs): kwargs['keep_parents'] = kwargs.get('keep_parents', True) @@ -469,11 +461,6 @@ def current_action(self): """ return self.actions.last() - @property - def last_action_date(self): - if self.current_action: - return self.current_action.date_dt - @property def first_action(self): """ @@ -606,6 +593,27 @@ def unique_related_upcoming_events(self): agenda_item__event__start_date__gte=timezone.now()).all()] return list(set(events)) + def get_last_action_date(self): + ''' + Return the date of the most recent action. If there is no action, + return the date of the most recent past event for which the bill + appears on the agenda. Otherwise, return None. + ''' + current_action = self.current_action + + if current_action: + return current_action.date_dt + + try: + last_agenda = Event.objects.filter(start_time__lte=timezone.now(), + agenda__related_entities__bill=self)\ + .latest('start_time') + except Event.DoesNotExist: + return None + + else: + return last_agenda.start_time + class BillSponsorship(opencivicdata.legislative.models.BillSponsorship): class Meta: @@ -690,13 +698,3 @@ class Meta: organization = ProxyForeignKey(Organization, null=True, on_delete=models.SET_NULL) - - -class BillDocument(opencivicdata.legislative.models.BillDocument): - - document = models.OneToOneField(opencivicdata.legislative.models.BillDocument, - on_delete=models.CASCADE, - related_name='councilmatic_document', - parent_link=True) - - full_text = models.TextField(blank=True, null=True) diff --git a/councilmatic_core/signals/handlers.py b/councilmatic_core/signals/handlers.py index e312feb7..8e5caa30 100644 --- a/councilmatic_core/signals/handlers.py +++ b/councilmatic_core/signals/handlers.py @@ -6,7 +6,8 @@ Person as OCDPerson, Post as OCDPost) from opencivicdata.legislative.models import (Event as OCDEvent, - Bill as OCDBill) + Bill as OCDBill, + EventRelatedEntity as OCDEventRelatedEntity) from councilmatic_core.models import (Organization as CouncilmaticOrganization, Person as CouncilmaticPerson, @@ -38,6 +39,7 @@ def create_councilmatic_person(sender, instance, created, **kwargs): # just update the child table, not the parent table cp.save_base(raw=True) + @receiver(post_save, sender=OCDEvent) def create_councilmatic_event(sender, instance, created, **kwargs): if created: @@ -47,9 +49,16 @@ def create_councilmatic_event(sender, instance, created, **kwargs): ce = CouncilmaticEvent(event=instance, slug=slug) + # just update the child table, not the parent table ce.save_base(raw=True) + for entity in OCDEventRelatedEntity.objects.filter(agenda_item__event=instance, bill__isnull=False): + cb = entity.bill.councilmatic_bill + cb.last_action_date = cb.get_last_action_date() + cb.save_base(raw=True) + + @receiver(post_save, sender=OCDBill) def create_councilmatic_bill(sender, instance, created, **kwargs): if created: @@ -57,13 +66,16 @@ def create_councilmatic_bill(sender, instance, created, **kwargs): cb = CouncilmaticBill(bill=instance, slug=slug) - # just update the child table, not the parent table - cb.save_base(raw=True) - cb = CouncilmaticBill.objects.get(id=instance.id) else: cb = instance.councilmatic_bill + cb.last_action_date = cb.get_last_action_date() + + # just update the child table, not the parent table + cb.save_base(raw=True) + + @receiver(post_save, sender=OCDPost) def create_councilmatic_post(sender, instance, created, **kwargs): if created: diff --git a/councilmatic_core/templates/partials/tags.html b/councilmatic_core/templates/partials/tags.html index 6fbb432a..e84727fb 100644 --- a/councilmatic_core/templates/partials/tags.html +++ b/councilmatic_core/templates/partials/tags.html @@ -4,16 +4,12 @@

{{result.last_action_date|date:'n/d/Y'}} - {{result.object.current_action.description | remove_action_subj }}

-{% elif result.object.get_last_action_date %} -

- {{result.object.get_last_action_date|date:'n/d/Y'}} - {{result.object.current_action.description | remove_action_subj }} -

{% endif %} {% if result.object.primary_sponsor %}

- {{result.object.primary_sponsor.person.name}} + {{result.object.primary_sponsor.name}}

{% endif %} diff --git a/councilmatic_core/views.py b/councilmatic_core/views.py index f83e9503..f6757342 100644 --- a/councilmatic_core/views.py +++ b/councilmatic_core/views.py @@ -274,7 +274,7 @@ class CommitteesView(ListView): context_object_name = 'committees' def get_queryset(self): - return Organization.committees + return Organization.committees() class CommitteeDetailView(DetailView): diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..125b4602 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +pytest +pytest-django diff --git a/setup.py b/setup.py index 5f66774d..2a026596 100644 --- a/setup.py +++ b/setup.py @@ -19,7 +19,7 @@ author='DataMade, LLC', author_email='info@datamade.us', install_requires=['requests>=2.20,<2.21', - 'opencivicdata>=2.3.0,<2.4', + 'opencivicdata>=3.1.0', 'pytz>=2015.4', 'django-haystack>=2.8.0,<2.9', 'Django>=2.0,<2.2', @@ -29,7 +29,10 @@ 'psycopg2-binary>=2.7,<2.8', 'django-adv-cache-tag==1.1.2', 'boto==2.38.0', - 'sqlalchemy'], + 'sqlalchemy', + 'tqdm', + ], + extras_require = {'convert_docs': ['textract']}, classifiers=[ 'Environment :: Web Environment', 'Framework :: Django', diff --git a/tests/conftest.py b/tests/conftest.py index 4c527dd8..36837824 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -7,10 +7,10 @@ from django.conf import settings from django.db import connection -from councilmatic_core.models import Bill, Event, BillDocument +from councilmatic_core.models import Bill, Event from opencivicdata.core.models import Jurisdiction, Division from opencivicdata.legislative.models import BillDocumentLink, EventDocument, \ - EventDocumentLink, LegislativeSession, BillVersion + EventDocumentLink, LegislativeSession, BillVersion, BillDocument @pytest.fixture @@ -81,7 +81,7 @@ def metro_event(db, jurisdiction): @pytest.fixture @pytest.mark.django_db(transaction=True) -def metro_bill_document(metro_bill, transactional_db): +def ocd_bill_document(metro_bill, transactional_db): document_info = { 'bill_id': metro_bill.id, 'note': 'Board Report', diff --git a/tests/fixtures/test_data.json b/tests/fixtures/test_data.json index 647d5ec8..82f78406 100644 --- a/tests/fixtures/test_data.json +++ b/tests/fixtures/test_data.json @@ -18869,10 +18869,7 @@ "fields": { "date": "", "event": "ocd-event/3e3a8c6a-e49a-47ea-8229-c135899fb58b", - "media_type": "", - "note": "Agenda", - "text": "", - "url": "" + "note": "Agenda" }, "model": "legislative.eventdocument", "pk": "69da74fd-974c-4b08-b7aa-12ef5014f903" @@ -18881,10 +18878,7 @@ "fields": { "date": "", "event": "ocd-event/3e3a8c6a-e49a-47ea-8229-c135899fb58b", - "media_type": "", - "note": "Notice", - "text": "", - "url": "" + "note": "Notice" }, "model": "legislative.eventdocument", "pk": "7ebaa7fa-43ac-4e7a-954a-0e20e9445f54" diff --git a/tests/test_config.py b/tests/test_config.py index ad393727..953a4e96 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -12,10 +12,10 @@ DATABASES = { 'default': { 'ENGINE': 'django.contrib.gis.db.backends.postgis', - 'NAME': 'chicago_councilmatic', + 'NAME': 'django_councilmatic', 'USER': '', 'PASSWORD': '', - 'HOST': '', + 'HOST': 'localhost', 'PORT': 5432, } } diff --git a/tests/test_management_commands.py b/tests/test_management_commands.py index b6715d42..4c171b64 100644 --- a/tests/test_management_commands.py +++ b/tests/test_management_commands.py @@ -5,7 +5,7 @@ @pytest.mark.django_db -def test_refresh_pic(metro_bill_document, +def test_refresh_pic(ocd_bill_document, metro_event_document): ''' Test that the `_get_urls` and `_create_keys` successfully finds changed bill and event documents @@ -14,7 +14,7 @@ def test_refresh_pic(metro_bill_document, command = RefreshPic() document_urls = list(command._get_urls()) - bill_doc_link, = metro_bill_document.links.all() + bill_doc_link, = ocd_bill_document.links.all() event_doc_link, = metro_event_document.links.all() assert (bill_doc_link.url in document_urls) == True @@ -25,27 +25,21 @@ def test_refresh_pic(metro_bill_document, assert len(document_urls) == len(aws_keys) @pytest.mark.django_db(transaction=True) -def test_convert_attachment_text(metro_bill_document, mocker): +def test_convert_attachment_text(ocd_bill_document, mocker, transactional_db): command = ConvertAttachmentText() command.update_all = True document_urls, = list(command.get_document_url()) document_url, document_id = document_urls - assert document_url == metro_bill_document.links.first().url - assert document_id == metro_bill_document.document.id - -''' -TO-DO: This should work, but it doesn't; the query is executed, but the result -never makes it into the database or to the ORM. When I run the resulting query -directly, the update is made. I'm not sure why this happens, but I need to move -on. -''' -# expected_full_text = 'test' -# documents = (doc for doc in [{'plain_text': expected_full_text, 'id': document_id}]) -# mocker.patch.object(command, 'convert_document_to_plaintext', return_value=documents) -# -# command.add_plain_text() -# -# metro_bill_document.refresh_from_db() -# assert metro_bill_document.full_text == expected_full_text + assert document_url == ocd_bill_document.links.first().url + assert document_id == ocd_bill_document.id + + expected_full_text = 'test' + documents = (doc for doc in [(expected_full_text, document_id)]) + mocker.patch.object(command, 'convert_document_to_plaintext', return_value=documents) + + command.add_plain_text() + + ocd_bill_document.refresh_from_db() + assert ocd_bill_document.extras['full_text'] == expected_full_text