Skip to content

Commit

Permalink
Address SQL Server review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
olsen232 committed Mar 3, 2021
1 parent 6297cfe commit 1f66b62
Show file tree
Hide file tree
Showing 15 changed files with 276 additions and 364 deletions.
4 changes: 4 additions & 0 deletions sno/base_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ class BaseDataset(ImportSource):
"""
Common interface for all datasets - mainly Dataset2, but
there is also Dataset0 and Dataset1 used by `sno upgrade`.
A Dataset instance is immutable since it is a view of a particular git tree.
To get a new version of a dataset, commit the desired changes,
then instantiate a new Dataset instance that references the new git tree.
"""

# Constants that subclasses should generally define.
Expand Down
2 changes: 1 addition & 1 deletion sno/checkout.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,7 @@ def create_workingcopy(ctx, discard_changes, wc_path):
wc_path = WorkingCopy.default_path(repo.workdir_path)

if wc_path != old_wc_path:
WorkingCopy.check_valid_creation_path(repo.workdir_path, wc_path)
WorkingCopy.check_valid_creation_path(wc_path, repo.workdir_path)

# Finished sanity checks - start work:
if old_wc and wc_path != old_wc_path:
Expand Down
2 changes: 1 addition & 1 deletion sno/clone.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def clone(

if repo_path.exists() and any(repo_path.iterdir()):
raise InvalidOperation(f'"{repo_path}" isn\'t empty', param_hint="directory")
WorkingCopy.check_valid_creation_path(repo_path, wc_path)
WorkingCopy.check_valid_creation_path(wc_path, repo_path)

if not repo_path.exists():
repo_path.mkdir(parents=True)
Expand Down
8 changes: 8 additions & 0 deletions sno/geometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,11 @@ def with_crs_id(self, crs_id):

@property
def crs_id(self):
"""
Returns the CRS ID as it is embedded in the GPKG header - before the WKB.
Note that datasets V2 zeroes this field before committing,
so will return zero when called on Geometry where it has been zeroed.
"""
wkb_offset, is_le, crs_id = parse_gpkg_geom(self)
return crs_id

Expand Down Expand Up @@ -296,6 +301,7 @@ def gpkg_geom_to_ogr(gpkg_geom, parse_crs=False):


def wkt_to_gpkg_geom(wkt, **kwargs):
"""Given a well-known-text string, returns a GPKG Geometry object."""
if wkt is None:
return None

Expand All @@ -304,6 +310,7 @@ def wkt_to_gpkg_geom(wkt, **kwargs):


def wkb_to_gpkg_geom(wkb, **kwargs):
"""Given a well-known-binary bytestring, returns a GPKG Geometry object."""
if wkb is None:
return None

Expand All @@ -312,6 +319,7 @@ def wkb_to_gpkg_geom(wkb, **kwargs):


def hex_wkb_to_gpkg_geom(hex_wkb, **kwargs):
"""Given a hex-encoded well-known-binary bytestring, returns a GPKG Geometry object."""
if hex_wkb is None:
return None

Expand Down
2 changes: 1 addition & 1 deletion sno/init.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,7 @@ def init(

if repo_path.exists() and any(repo_path.iterdir()):
raise InvalidOperation(f'"{repo_path}" isn\'t empty', param_hint="directory")
WorkingCopy.check_valid_creation_path(repo_path, wc_path)
WorkingCopy.check_valid_creation_path(wc_path, repo_path)

if not repo_path.exists():
repo_path.mkdir(parents=True)
Expand Down
4 changes: 2 additions & 2 deletions sno/repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ def init_repository(
repo_root_path = repo_root_path.resolve()
cls._ensure_exists_and_empty(repo_root_path)
if not bare:
WorkingCopy.check_valid_creation_path(repo_root_path, wc_path)
WorkingCopy.check_valid_creation_path(wc_path, repo_root_path)

extra_args = []
if initial_branch is not None:
Expand Down Expand Up @@ -224,7 +224,7 @@ def clone_repository(
repo_root_path = repo_root_path.resolve()
cls._ensure_exists_and_empty(repo_root_path)
if not bare:
WorkingCopy.check_valid_creation_path(repo_root_path, wc_path)
WorkingCopy.check_valid_creation_path(wc_path, repo_root_path)

if bare:
sno_repo = cls._create_with_git_command(
Expand Down
21 changes: 11 additions & 10 deletions sno/sqlalchemy.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import re
import socket
from urllib.parse import urlsplit, urlunsplit
from urllib.parse import urlsplit, urlunsplit, urlencode, parse_qs


import sqlalchemy
Expand Down Expand Up @@ -110,7 +110,7 @@ def _on_connect(psycopg2_conn, connection_record):

CANONICAL_SQL_SERVER_SCHEME = "mssql"
INTERNAL_SQL_SERVER_SCHEME = "mssql+pyodbc"
SQL_SERVER_DRIVER_LIB = "ODBC+Driver+17+for+SQL+Server"
SQL_SERVER_DRIVER_LIB = "ODBC Driver 17 for SQL Server"


def sqlserver_engine(msurl):
Expand All @@ -122,7 +122,7 @@ def sqlserver_engine(msurl):
url_netloc = re.sub(r"\blocalhost\b", _replace_with_localhost, url.netloc)

url_query = _append_to_query(
url.query, {"driver": SQL_SERVER_DRIVER_LIB, "Application+Name": "sno"}
url.query, {"driver": SQL_SERVER_DRIVER_LIB, "Application Name": "sno"}
)

msurl = urlunsplit(
Expand All @@ -137,14 +137,15 @@ def _replace_with_localhost(*args, **kwargs):
return socket.gethostbyname("localhost")


def _append_query_to_url(uri, query_dict):
def _append_query_to_url(uri, new_query_dict):
url = urlsplit(uri)
url_query = _append_to_query(url.query, query_dict)
url_query = _append_to_query(url.query, new_query_dict)
return urlunsplit([url.scheme, url.netloc, url.path, url_query, ""])


def _append_to_query(url_query, query_dict):
for key, value in query_dict.items():
if key not in url_query:
url_query = "&".join(filter(None, [url_query, f"{key}={value}"]))
return url_query
def _append_to_query(existing_query, new_query_dict):
query_dict = parse_qs(existing_query)
for key, value in new_query_dict.items():
if key not in query_dict:
query_dict[key] = value
return urlencode(query_dict)
70 changes: 45 additions & 25 deletions sno/working_copy/base.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
from enum import Enum, auto
import contextlib
import functools
import itertools
import logging
import time
from enum import Enum, auto


import click
import pygit2
Expand Down Expand Up @@ -48,7 +47,8 @@ def from_path(cls, path, allow_invalid=False):
f"Unrecognised working copy type: {path}\n"
"Try one of:\n"
" PATH.gpkg\n"
" postgresql://[HOST]/DBNAME/SCHEMA\n"
" postgresql://[HOST]/DBNAME/DBSCHEMA\n"
" mssql://[HOST]/DBNAME/DBSCHEMA"
)

@property
Expand Down Expand Up @@ -100,15 +100,10 @@ class WorkingCopy:
self.sno_tables - sqlalchemy Table definitions for sno_track and sno_state tables.
"""

SNO_WORKINGCOPY_PATH = "sno.workingcopy.path"
WORKING_COPY_TYPE_NAME = "AbstractBaseClass"
URI_SCHEME = None

@property
@functools.lru_cache(maxsize=1)
def DB_SCHEMA(self):
"""Escaped, dialect-specific name of the database-schema owned by this working copy (if any)."""
if self.db_schema is None:
raise RuntimeError("No schema to escape.")
return self.preparer.format_schema(self.db_schema)
SNO_WORKINGCOPY_PATH = "sno.workingcopy.path"

@property
@functools.lru_cache(maxsize=1)
Expand Down Expand Up @@ -261,28 +256,33 @@ def write_config(cls, repo, path=None, bare=False):
repo_cfg[path_key] = str(path)

@classmethod
def check_valid_creation_path(cls, workdir_path, wc_path):
def subclass_from_path(cls, wc_path):
wct = WorkingCopyType.from_path(wc_path)
if wct.class_ is cls:
raise RuntimeError(
f"No subclass found - don't call subclass_from_path on concrete implementation {cls}."
)
return wct.class_

@classmethod
def check_valid_creation_path(cls, wc_path, workdir_path=None):
"""
Given a user-supplied string describing where to put the working copy, ensures it is a valid location,
and nothing already exists there that prevents us from creating it. Raises InvalidOperation if it is not.
Doesn't check if we have permissions to create a working copy there.
"""
if not wc_path:
wc_path = cls.default_path(workdir_path)
WorkingCopyType.from_path(wc_path).class_.check_valid_creation_path(
workdir_path, wc_path
)
cls.subclass_from_path(wc_path).check_valid_creation_path(wc_path, workdir_path)

@classmethod
def check_valid_path(cls, workdir_path, wc_path):
def check_valid_path(cls, wc_path, workdir_path=None):
"""
Given a user-supplied string describing where to put the working copy, ensures it is a valid location,
and nothing already exists there that prevents us from creating it. Raises InvalidOperation if it is not.
Doesn't check if we have permissions to create a working copy there.
"""
WorkingCopyType.from_path(wc_path).class_.check_valid_path(
workdir_path, wc_path
)
cls.subclass_from_path(wc_path).check_valid_path(wc_path, workdir_path)

def check_valid_state(self):
if self.is_created():
Expand All @@ -304,19 +304,39 @@ def default_path(cls, workdir_path):
return f"{stem}.gpkg"

@classmethod
def normalise_path(cls, repo, path):
def normalise_path(cls, repo, wc_path):
"""If the path is in a non-standard form, normalise it to the equivalent standard form."""
return WorkingCopyType.from_path(path).class_.normalise_path(repo, path)
return cls.subclass_from_path(wc_path).normalise_path(repo, wc_path)

@contextlib.contextmanager
def session(self, bulk=0):
"""
Context manager for DB sessions, yields a session object inside a transaction
Calling again yields the _same_ session, the transaction/etc only happen in the outer one.
Context manager for GeoPackage DB sessions, yields a connection object inside a transaction
@bulk controls bulk-loading operating mode - subject to change. See ./gpkg.py
Calling again yields the _same_ session, the transaction/etc only happen in the outer one.
"""
raise NotImplementedError()
L = logging.getLogger(f"{self.__class__.__qualname__}.session")

if hasattr(self, "_session"):
# Inner call - reuse existing session.
L.debug("session: existing...")
yield self._session
L.debug("session: existing/done")
return

L.debug("session: new...")
self._session = self.sessionmaker()
try:
# TODO - use tidier syntax for opening transactions from sqlalchemy.
yield self._session
self._session.commit()
except Exception:
self._session.rollback()
raise
finally:
self._session.close()
del self._session
L.debug("session: new/done")

def is_created(self):
"""
Expand Down
101 changes: 101 additions & 0 deletions sno/working_copy/db_server.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import functools
import re
from urllib.parse import urlsplit, urlunsplit

import click

from .base import WorkingCopy
from sno.exceptions import InvalidOperation


class DatabaseServer_WorkingCopy(WorkingCopy):
"""Functionality common to working copies that connect to a database server."""

# Subclasses should override with the DB server URI scheme eg "postgresql".
URI_SCHEME = None

@classmethod
def check_valid_creation_path(cls, wc_path, workdir_path=None):
cls.check_valid_path(wc_path, workdir_path)

working_copy = cls(None, wc_path)
if working_copy.has_data():
db_schema = working_copy.db_schema
container_text = f"schema '{db_schema}'" if db_schema else "working copy"
raise InvalidOperation(
f"Error creating {cls.WORKING_COPY_TYPE_NAME} working copy at {wc_path} - "
f"non-empty {container_text} already exists"
)

@classmethod
def check_valid_path(cls, wc_path, workdir_path=None):
cls.check_valid_db_uri(wc_path, workdir_path)

@classmethod
def normalise_path(cls, repo, wc_path):
return wc_path

@classmethod
def check_valid_db_uri(cls, db_uri, workdir_path=None):
"""
For working copies that connect to a database - checks the given URI is in the required form:
>>> URI_SCHEME::[HOST]/DBNAME/DBSCHEMA
"""
url = urlsplit(db_uri)

if url.scheme != cls.URI_SCHEME:
raise click.UsageError(
f"Invalid {cls.WORKING_COPY_TYPE_NAME} URI - "
f"Expecting URI in form: {cls.URI_SCHEME}://[HOST]/DBNAME/DBSCHEMA"
)

url_path = url.path
path_parts = url_path[1:].split("/", 3) if url_path else []

suggestion_message = ""
if len(path_parts) == 1 and workdir_path is not None:
suggested_path = f"/{path_parts[0]}/{cls.default_schema(workdir_path)}"
suggested_uri = urlunsplit(
[url.scheme, url.netloc, suggested_path, url.query, ""]
)
suggestion_message = f"\nFor example: {suggested_uri}"

if len(path_parts) != 2:
raise click.UsageError(
f"Invalid {cls.WORKING_COPY_TYPE_NAME} URI - URI requires both database name and database schema:\n"
f"Expecting URI in form: {cls.URI_SCHEME}://[HOST]/DBNAME/DBSCHEMA"
+ suggestion_message
)

@classmethod
def _separate_db_schema(cls, db_uri):
"""
Removes the DBSCHEMA part off the end of a uri in the form URI_SCHEME::[HOST]/DBNAME/DBSCHEMA -
and returns the URI and the DBSCHEMA separately.
Useful since generally, URI_SCHEME::[HOST]/DBNAME is what is needed to connect to the database,
and then DBSCHEMA must be specified in each query.
"""
url = urlsplit(db_uri)
url_path = url.path
path_parts = url_path[1:].split("/", 3) if url_path else []
assert len(path_parts) == 2
url_path = "/" + path_parts[0]
db_schema = path_parts[1]
return urlunsplit([url.scheme, url.netloc, url_path, url.query, ""]), db_schema

@classmethod
def default_db_schema(cls, workdir_path):
"""Returns a suitable default database schema - named after the folder this Sno repo is in."""
stem = workdir_path.stem
schema = re.sub("[^a-z0-9]+", "_", stem.lower()) + "_sno"
if schema[0].isdigit():
schema = "_" + schema
return schema

@property
@functools.lru_cache(maxsize=1)
def DB_SCHEMA(self):
"""Escaped, dialect-specific name of the database-schema owned by this working copy (if any)."""
if self.db_schema is None:
raise RuntimeError("No schema to escape.")
return self.preparer.format_schema(self.db_schema)
Loading

0 comments on commit 1f66b62

Please sign in to comment.