Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cleanup/Remove cli cmd and data migration script for agency address f… #1065

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
288 changes: 0 additions & 288 deletions src/onegov/people/cli.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,12 @@
import sys

import click
import re
import transaction

from collections import OrderedDict
from bs4 import BeautifulSoup

from onegov.core.cli import command_group
from onegov.core.cli import abort
from onegov.people import Agency
from onegov.people.models import Person
from openpyxl import load_workbook
from openpyxl import Workbook
Expand Down Expand Up @@ -168,288 +165,3 @@ def _import(request: 'CoreRequest', app: 'Framework') -> None:
click.secho(f'Imported {count} person(s)', fg='green')

return _import


p2 = re.compile(r'(.*), (.*)Postadresse: (.*), (.*)')
p3 = re.compile(r'(.*), (Postfach), (.*)')
p4 = re.compile(r'(.*), (.*), (.*)')
p1 = re.compile(r'(.*), (.*)')
p6 = re.compile(r'(.*)\n(.*)')
p5 = re.compile(r'([A-Za-z ]*) ?(\d+[a-z]?)?') # street name and optional
# building number


def parse_and_split_address_field(address: str) -> tuple[str, str, str, str]:
"""
Parsing the `address` field to split into location address and code/city
as well as postal address and code/city.

:param address: str
:return: tuple: (location_address, location_code_city,
postal_address, postal_code_city)
"""
location_addr = ''
location_pcc = ''
postal_addr = ''
postal_pcc = ''

# sanitize address
if ';' in address:
address = address.replace('; ', '')
address = address.replace(';', '')

if not address:
return location_addr, location_pcc, postal_addr, postal_pcc

if m := p2.match(address):
location_addr = m.group(1)
location_pcc = m.group(2)
postal_addr = m.group(3)
postal_pcc = m.group(4)
return location_addr, location_pcc, postal_addr, postal_pcc

if m := p3.match(address):
postal_addr = m.group(1) + '\n' + m.group(2)
postal_pcc = m.group(3)
return location_addr, location_pcc, postal_addr, postal_pcc

if m := p4.match(address):
postal_addr = m.group(1) + '\n' + m.group(2)
postal_pcc = m.group(3)
return location_addr, location_pcc, postal_addr, postal_pcc

if m := p1.match(address):
postal_addr = m.group(1)
postal_pcc = m.group(2)
return location_addr, location_pcc, postal_addr, postal_pcc

if p6.match(address):
postal_addr, postal_pcc = address.rsplit('\n', 1)
return location_addr, location_pcc, postal_addr, postal_pcc

if m := p5.match(address):
postal_addr = m.group(1)
if m.group(2):
postal_addr += f'{m.group(2)}'
return location_addr, location_pcc, postal_addr, postal_pcc

# default no match found
return location_addr, location_pcc, postal_addr, postal_pcc


@cli.command('migrate-people-address-field')
@click.option('--dry-run/--no-dry-run', default=False)
def migrate_people_address_field(
dry_run: bool
) -> 'Callable[[CoreRequest, Framework], None]':
""" Migrates onegov_agency people address field.

Migrate data from onegov_agency table 'people' column 'address' field to
'location_address', 'location_code_city', 'postal_address' and
'postal_code_city' fields.


Example::

onegov-people --select /onegov_agency/bs migrate-people-address-field

onegov-people --select /onegov_agency/bs migrate-people-address-field
--dry-run

"""

def _migrate(request: 'CoreRequest', app: 'Framework') -> None:
session = app.session()
click.secho("Migrate data from table 'people' column 'address' "
"field to 'location_address', 'location_code_city', "
"'postal_address' and 'postal_code_city ..",
fg='yellow')
migration_count = 0
total_count = 0
for person in session.query(Person):
total_count += 1

if not person.address:
continue

(
person.location_address,
person.location_code_city,
person.postal_address,
person.postal_code_city
) = parse_and_split_address_field(person.address)

migration_count += 1

if dry_run:
transaction.abort()
click.secho('Aborting transaction', fg='yellow')

click.secho(f'Migrated all {migration_count} address(es) of totally '
f'{total_count} people', fg='green')

return _migrate


@cli.command('onegov-migrate-people-address-field')
@click.option('--dry-run/--no-dry-run', default=False)
def onegov_migrate_people_address_field(
dry_run: bool
) -> 'Callable[[CoreRequest, Framework], None]':
""" Migrates people address field everywhere in onegov.

Migrate data from 'people' column 'address' field to
'location_address', 'location_code_city', 'postal_address' and
'postal_code_city' fields.


Example::

onegov-people --select /onegov_town6/ebikon
onegov-migrate-people-address-field

onegov-people --select /onegov_org/risch
onegov-migrate-people-address-field --dry-run

"""

def _migrate(request: 'CoreRequest', app: 'Framework') -> None:
click.secho(f'Request url: {request.url}..')
session = app.session()
click.secho("Onegov migrate data from table 'people' column "
"'address' field to 'location_address', "
"'location_code_city', 'postal_address' and "
"'postal_code_city ..",
fg='yellow')
migration_count = 0
total_count = 0
for person in session.query(Person):
total_count += 1

if not person.address:
continue

(
person.location_address,
person.location_code_city,
person.postal_address,
person.postal_code_city
) = parse_and_split_address_field(person.address)

migration_count += 1

if dry_run:
transaction.abort()
click.secho('Aborting transaction', fg='yellow')

click.secho(f'Migrated all {migration_count} address(es) of totally '
f'{total_count} people', fg='green')

return _migrate


re_postal_code_city_ch = re.compile(r'\d{4} .*') # e.g. '1234 Mein Ort'
re_postal_code_city_de = re.compile(r'D-\d{5} .*') # e.g. 'D-12345 Mein Ort'


def parse_agency_portrait_field_for_address(
portrait: str
) -> tuple[str, str, str, str]:
"""
Parsing the `portrait` field of agencies and extract address and
code/city as well as location address and city if present.

:param portrait: html str
:return: tuple: (location_addr, location_pcc ,postal_address,
postal_code_city)
"""

location_addr = ''
location_pcc = ''
postal_addr = ''
postal_pcc = ''
plz_city_found_idx = -1

soup = BeautifulSoup(portrait, "html.parser")
# convert from html to text using soup
portrait_text = soup.get_text('\n')
lines = portrait_text.split('\n')
for line, idx in zip(lines, range(len(lines))):
if m := (re_postal_code_city_ch.match(line)
or re_postal_code_city_de.match(line)):

if plz_city_found_idx:
# assuming address initially found was location address
location_addr = postal_addr
location_pcc = postal_pcc

postal_pcc = m.group(0)
postal_addr = lines[idx - 1] if idx > 0 else '' # if only
# code/city no street and number

# only extend postal address 'Postfach' with street/house number if
# previous line is at least two lines away
# Dorfstrasse 1, Postfach, 1234 Govikon
if (
'postfach' in postal_addr.lower()
and (plz_city_found_idx + 2 < idx)
and idx >= 2 and lines[idx - 2] != ''
):
postal_addr = lines[idx - 2] + '\n' + postal_addr

plz_city_found_idx = idx

return location_addr, location_pcc, postal_addr, postal_pcc


@cli.command('extract-address-from-portrait-field')
@click.option('--dry-run/--no-dry-run', default=False)
def extract_address_from_portrait_field(
dry_run: bool
) -> 'Callable[[CoreRequest, Framework], None]':
""" Extracts address from onegov_agency agency portrait field.

Extracts address, postal code and city from onegov_agency table
'agencies' column 'portrait'.

Example::

onegov-people --select /onegov_agency/bs
extract-address-from-portrait-field
onegov-people --select /onegov_agency/bs
extract-address-from-portrait-field --dry-run
"""

def _extract(request: 'CoreRequest', app: 'Framework') -> None:
session = app.session()
click.secho("Extract address, postal code and city from table "
"'agencies' column 'portrait' to "
"'location_address', 'location_code_city', "
"'postal_address' and 'postal_code_city ..",
fg='yellow')
extraction_count = 0
total_count = 0
for agency in session.query(Agency):
total_count += 1

if not agency.portrait:
continue

(
agency.location_address,
agency.location_code_city,
agency.postal_address,
agency.postal_code_city
) = parse_agency_portrait_field_for_address(agency.portrait)

extraction_count += 1

if dry_run:
transaction.abort()
click.secho('Aborting transaction', fg='yellow')

transaction.commit()
click.secho(f'Extracted {extraction_count} address(es) of totally '
f'{total_count} agencies', fg='green')

return _extract
58 changes: 0 additions & 58 deletions src/onegov/people/upgrade.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,61 +200,3 @@ def fix_agency_address_column(context: UpgradeContext) -> None:
context.operations.add_column('agencies', Column(
'address', Text, nullable=True
))


@upgrade_task(
'Remove address columns from agency',
requires='onegov.people:Fix agency address column'
)
def remove_address_columns_from_agency(context: UpgradeContext) -> None:
if context.has_column('agencies', 'zip_code'):
context.operations.drop_column('agencies', 'zip_code')
if context.has_column('agencies', 'city'):
context.operations.drop_column('agencies', 'city')
if context.has_column('agencies', 'address'):
context.operations.drop_column('agencies', 'address')


@upgrade_task('ogc-966 extend agency and person tables with more fields')
def extend_agency_and_person_with_more_fields(context: UpgradeContext) -> None:
# add columns to table 'agencies'
agencies_columns = ['email', 'phone', 'phone_direct', 'website',
'location_address', 'location_code_city',
'postal_address', 'postal_code_city',
'opening_hours']
table = 'agencies'

for column in agencies_columns:
if not context.has_column(table, column):
context.add_column_with_defaults(
table,
Column(column, Text, nullable=True),
default=lambda x: ''
)

context.session.flush()

# add columns to table 'people'
people_columns = ['location_address', 'location_code_city',
'postal_address', 'postal_code_city', 'website_2']
table = 'people'

for column in people_columns:
if not context.has_column(table, column):
context.add_column_with_defaults(
table,
Column(column, Text, nullable=True),
default=lambda x: ''
)


@upgrade_task('Add organisation columns to people')
def add_organisation_columns_to_people(context: UpgradeContext) -> None:
if not context.has_column('people', 'organisation'):
context.operations.add_column('people', Column(
'organisation', Text, nullable=True
))
if not context.has_column('people', 'sub_organisation'):
context.operations.add_column('people', Column(
'sub_organisation', Text, nullable=True
))
Loading