Skip to content

Commit

Permalink
People: Cleanup/Remove cli cmd and data migration script for agency a…
Browse files Browse the repository at this point in the history
…ddress parsed from portrait field

TYPE: Feature
LINK: ogc-1053
  • Loading branch information
Tschuppi81 committed Aug 16, 2024
1 parent cc5f3c0 commit e5c3c3f
Show file tree
Hide file tree
Showing 3 changed files with 0 additions and 455 deletions.
288 changes: 0 additions & 288 deletions src/onegov/people/cli.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,12 @@
import sys

import click
import re
import transaction

from collections import OrderedDict
from bs4 import BeautifulSoup

from onegov.core.cli import command_group
from onegov.core.cli import abort
from onegov.people import Agency
from onegov.people.models import Person
from openpyxl import load_workbook
from openpyxl import Workbook
Expand Down Expand Up @@ -168,288 +165,3 @@ def _import(request: 'CoreRequest', app: 'Framework') -> None:
click.secho(f'Imported {count} person(s)', fg='green')

return _import


p2 = re.compile(r'(.*), (.*)Postadresse: (.*), (.*)')
p3 = re.compile(r'(.*), (Postfach), (.*)')
p4 = re.compile(r'(.*), (.*), (.*)')
p1 = re.compile(r'(.*), (.*)')
p6 = re.compile(r'(.*)\n(.*)')
p5 = re.compile(r'([A-Za-z ]*) ?(\d+[a-z]?)?') # street name and optional
# building number


def parse_and_split_address_field(address: str) -> tuple[str, str, str, str]:
"""
Parsing the `address` field to split into location address and code/city
as well as postal address and code/city.
:param address: str
:return: tuple: (location_address, location_code_city,
postal_address, postal_code_city)
"""
location_addr = ''
location_pcc = ''
postal_addr = ''
postal_pcc = ''

# sanitize address
if ';' in address:
address = address.replace('; ', '')
address = address.replace(';', '')

if not address:
return location_addr, location_pcc, postal_addr, postal_pcc

if m := p2.match(address):
location_addr = m.group(1)
location_pcc = m.group(2)
postal_addr = m.group(3)
postal_pcc = m.group(4)
return location_addr, location_pcc, postal_addr, postal_pcc

if m := p3.match(address):
postal_addr = m.group(1) + '\n' + m.group(2)
postal_pcc = m.group(3)
return location_addr, location_pcc, postal_addr, postal_pcc

if m := p4.match(address):
postal_addr = m.group(1) + '\n' + m.group(2)
postal_pcc = m.group(3)
return location_addr, location_pcc, postal_addr, postal_pcc

if m := p1.match(address):
postal_addr = m.group(1)
postal_pcc = m.group(2)
return location_addr, location_pcc, postal_addr, postal_pcc

if p6.match(address):
postal_addr, postal_pcc = address.rsplit('\n', 1)
return location_addr, location_pcc, postal_addr, postal_pcc

if m := p5.match(address):
postal_addr = m.group(1)
if m.group(2):
postal_addr += f'{m.group(2)}'
return location_addr, location_pcc, postal_addr, postal_pcc

# default no match found
return location_addr, location_pcc, postal_addr, postal_pcc


@cli.command('migrate-people-address-field')
@click.option('--dry-run/--no-dry-run', default=False)
def migrate_people_address_field(
dry_run: bool
) -> 'Callable[[CoreRequest, Framework], None]':
""" Migrates onegov_agency people address field.
Migrate data from onegov_agency table 'people' column 'address' field to
'location_address', 'location_code_city', 'postal_address' and
'postal_code_city' fields.
Example::
onegov-people --select /onegov_agency/bs migrate-people-address-field
onegov-people --select /onegov_agency/bs migrate-people-address-field
--dry-run
"""

def _migrate(request: 'CoreRequest', app: 'Framework') -> None:
session = app.session()
click.secho("Migrate data from table 'people' column 'address' "
"field to 'location_address', 'location_code_city', "
"'postal_address' and 'postal_code_city ..",
fg='yellow')
migration_count = 0
total_count = 0
for person in session.query(Person):
total_count += 1

if not person.address:
continue

(
person.location_address,
person.location_code_city,
person.postal_address,
person.postal_code_city
) = parse_and_split_address_field(person.address)

migration_count += 1

if dry_run:
transaction.abort()
click.secho('Aborting transaction', fg='yellow')

click.secho(f'Migrated all {migration_count} address(es) of totally '
f'{total_count} people', fg='green')

return _migrate


@cli.command('onegov-migrate-people-address-field')
@click.option('--dry-run/--no-dry-run', default=False)
def onegov_migrate_people_address_field(
dry_run: bool
) -> 'Callable[[CoreRequest, Framework], None]':
""" Migrates people address field everywhere in onegov.
Migrate data from 'people' column 'address' field to
'location_address', 'location_code_city', 'postal_address' and
'postal_code_city' fields.
Example::
onegov-people --select /onegov_town6/ebikon
onegov-migrate-people-address-field
onegov-people --select /onegov_org/risch
onegov-migrate-people-address-field --dry-run
"""

def _migrate(request: 'CoreRequest', app: 'Framework') -> None:
click.secho(f'Request url: {request.url}..')
session = app.session()
click.secho("Onegov migrate data from table 'people' column "
"'address' field to 'location_address', "
"'location_code_city', 'postal_address' and "
"'postal_code_city ..",
fg='yellow')
migration_count = 0
total_count = 0
for person in session.query(Person):
total_count += 1

if not person.address:
continue

(
person.location_address,
person.location_code_city,
person.postal_address,
person.postal_code_city
) = parse_and_split_address_field(person.address)

migration_count += 1

if dry_run:
transaction.abort()
click.secho('Aborting transaction', fg='yellow')

click.secho(f'Migrated all {migration_count} address(es) of totally '
f'{total_count} people', fg='green')

return _migrate


re_postal_code_city_ch = re.compile(r'\d{4} .*') # e.g. '1234 Mein Ort'
re_postal_code_city_de = re.compile(r'D-\d{5} .*') # e.g. 'D-12345 Mein Ort'


def parse_agency_portrait_field_for_address(
portrait: str
) -> tuple[str, str, str, str]:
"""
Parsing the `portrait` field of agencies and extract address and
code/city as well as location address and city if present.
:param portrait: html str
:return: tuple: (location_addr, location_pcc ,postal_address,
postal_code_city)
"""

location_addr = ''
location_pcc = ''
postal_addr = ''
postal_pcc = ''
plz_city_found_idx = -1

soup = BeautifulSoup(portrait, "html.parser")
# convert from html to text using soup
portrait_text = soup.get_text('\n')
lines = portrait_text.split('\n')
for line, idx in zip(lines, range(len(lines))):
if m := (re_postal_code_city_ch.match(line)
or re_postal_code_city_de.match(line)):

if plz_city_found_idx:
# assuming address initially found was location address
location_addr = postal_addr
location_pcc = postal_pcc

postal_pcc = m.group(0)
postal_addr = lines[idx - 1] if idx > 0 else '' # if only
# code/city no street and number

# only extend postal address 'Postfach' with street/house number if
# previous line is at least two lines away
# Dorfstrasse 1, Postfach, 1234 Govikon
if (
'postfach' in postal_addr.lower()
and (plz_city_found_idx + 2 < idx)
and idx >= 2 and lines[idx - 2] != ''
):
postal_addr = lines[idx - 2] + '\n' + postal_addr

plz_city_found_idx = idx

return location_addr, location_pcc, postal_addr, postal_pcc


@cli.command('extract-address-from-portrait-field')
@click.option('--dry-run/--no-dry-run', default=False)
def extract_address_from_portrait_field(
dry_run: bool
) -> 'Callable[[CoreRequest, Framework], None]':
""" Extracts address from onegov_agency agency portrait field.
Extracts address, postal code and city from onegov_agency table
'agencies' column 'portrait'.
Example::
onegov-people --select /onegov_agency/bs
extract-address-from-portrait-field
onegov-people --select /onegov_agency/bs
extract-address-from-portrait-field --dry-run
"""

def _extract(request: 'CoreRequest', app: 'Framework') -> None:
session = app.session()
click.secho("Extract address, postal code and city from table "
"'agencies' column 'portrait' to "
"'location_address', 'location_code_city', "
"'postal_address' and 'postal_code_city ..",
fg='yellow')
extraction_count = 0
total_count = 0
for agency in session.query(Agency):
total_count += 1

if not agency.portrait:
continue

(
agency.location_address,
agency.location_code_city,
agency.postal_address,
agency.postal_code_city
) = parse_agency_portrait_field_for_address(agency.portrait)

extraction_count += 1

if dry_run:
transaction.abort()
click.secho('Aborting transaction', fg='yellow')

transaction.commit()
click.secho(f'Extracted {extraction_count} address(es) of totally '
f'{total_count} agencies', fg='green')

return _extract
58 changes: 0 additions & 58 deletions src/onegov/people/upgrade.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,61 +200,3 @@ def fix_agency_address_column(context: UpgradeContext) -> None:
context.operations.add_column('agencies', Column(
'address', Text, nullable=True
))


@upgrade_task(
'Remove address columns from agency',
requires='onegov.people:Fix agency address column'
)
def remove_address_columns_from_agency(context: UpgradeContext) -> None:
if context.has_column('agencies', 'zip_code'):
context.operations.drop_column('agencies', 'zip_code')
if context.has_column('agencies', 'city'):
context.operations.drop_column('agencies', 'city')
if context.has_column('agencies', 'address'):
context.operations.drop_column('agencies', 'address')


@upgrade_task('ogc-966 extend agency and person tables with more fields')
def extend_agency_and_person_with_more_fields(context: UpgradeContext) -> None:
# add columns to table 'agencies'
agencies_columns = ['email', 'phone', 'phone_direct', 'website',
'location_address', 'location_code_city',
'postal_address', 'postal_code_city',
'opening_hours']
table = 'agencies'

for column in agencies_columns:
if not context.has_column(table, column):
context.add_column_with_defaults(
table,
Column(column, Text, nullable=True),
default=lambda x: ''
)

context.session.flush()

# add columns to table 'people'
people_columns = ['location_address', 'location_code_city',
'postal_address', 'postal_code_city', 'website_2']
table = 'people'

for column in people_columns:
if not context.has_column(table, column):
context.add_column_with_defaults(
table,
Column(column, Text, nullable=True),
default=lambda x: ''
)


@upgrade_task('Add organisation columns to people')
def add_organisation_columns_to_people(context: UpgradeContext) -> None:
if not context.has_column('people', 'organisation'):
context.operations.add_column('people', Column(
'organisation', Text, nullable=True
))
if not context.has_column('people', 'sub_organisation'):
context.operations.add_column('people', Column(
'sub_organisation', Text, nullable=True
))
Loading

0 comments on commit e5c3c3f

Please sign in to comment.