Skip to content

Commit

Permalink
GKS hackathon changes tracking (#28)
Browse files Browse the repository at this point in the history
* Add dependencies to pyproject, and optional dev

* Delete .requirements.txt

* Add latest ruff as dev dependency

* Improve README

* Add ci.yaml github action. Add ruff cfg to pyproject

* Fix easy lint issues

* Format code

* Add `pre-commit` script

* Type hints and comments

* Add templates

* Make devready install .[dev]

* Delete all json/rst files compiled by tests. Aren't used.

---------

Co-authored-by: Liam Mulhall <liammulh@gmail.com>
Co-authored-by: Terry ONeill <toneill@broadinstitute.org>
Co-authored-by: Liam Mulhall <liammulh@stanford.edu>
Co-authored-by: Kori Kuzma <korikuzma@gmail.com>
  • Loading branch information
5 people authored Nov 7, 2024
1 parent 2ee0928 commit e019b80
Show file tree
Hide file tree
Showing 58 changed files with 450 additions and 2,518 deletions.
33 changes: 33 additions & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
name: CI

on:
push:
branches:
- main
pull_request:

jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: recursive

- name: Set up Python 3.12
uses: actions/setup-python@v5
with:
python-version: '3.12'
architecture: 'x64'

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install '.[dev]'
- name: Run lint + format
run: |
ruff check src
- name: Run tests
run: pytest
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -132,4 +132,5 @@ dmypy.json
Pipfile*

# IDEs
.vscode
.vscode
.idea
2 changes: 0 additions & 2 deletions .requirements.txt

This file was deleted.

2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ venv/%:
#=> develop: install package in develop mode
.PHONY: develop setup
develop setup:
pip install -e .
pip install -e '.[dev]'

#=> devready: create venv, install prerequisites, install pkg in develop mode
.PHONY: devready
Expand Down
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# gks-metaschema


Tools and scripts for parsing the GA4GH Genomic Knowledge Standards (GKS) metaschemas.
The metaschema processor (MSP) converts
[JSON Schema Version 2020-12](json-schema.org/draft/2020-12/schema) in YAML to
Expand Down Expand Up @@ -28,6 +29,12 @@ environment.
make devready
source venv/3.12/bin/activate


Set up the `pre-commit` hook

cp ./scripts/pre-commit ./.git/hooks/


### Testing

To run the tests:
Expand Down
39 changes: 35 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,17 @@ keywords = [
"variation"
]
requires-python = ">=3.12"
dynamic = ["version", "dependencies"]
dependencies = [
"pyyaml",
"Jinja2"
]
dynamic = ["version"]

[project.optional-dependencies]
dev = [
"pytest",
"ruff==0.7.2"
]

[project.urls]
Homepage = "https://github.com/ga4gh/gks-metaschema"
Expand All @@ -37,9 +47,6 @@ Changelog = "https://github.com/ga4gh/gks-metaschema/releases"
Source = "https://github.com/ga4gh/gks-metaschema"
"Bug Tracker" = "https://github.com/ga4gh/gks-metaschema/issues"

[tool.setuptools.dynamic]
dependencies = {file = [".requirements.txt"]}

[tool.setuptools_scm]

[project.scripts]
Expand All @@ -53,3 +60,27 @@ source2classes = "ga4gh.gks.metaschema.scripts.source2classes:cli"
[build-system]
requires = ["setuptools>=65.3", "setuptools_scm>=8"]
build-backend = "setuptools.build_meta"


[tool.ruff]
line-length = 120
target-version = "py312"

[tool.ruff.lint]
select = [
"C",
"F",
"I",
"E",
"W"
]
fixable = ["ALL"]
ignore = ["C901"]

[tool.ruff.format]
# Like Black, use double quotes for strings.
quote-style = "double"
# Like Black, indent with spaces, rather than tabs.
indent-style = "space"
# Like Black, respect magic trailing commas.
skip-magic-trailing-comma = false
16 changes: 16 additions & 0 deletions scripts/pre-commit
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/usr/bin/env bash
# This pre-commit script should be placed in the .git/hooks/ directory.
# It runs code quality checks prior to a commit.


# Get and change to the root of the repo
project_root_dir=`git rev-parse --show-toplevel`
cd "$project_root_dir" || exit 1

# Immediately exit if there's an error.
set -e

ruff check # Run the linter.
ruff check --select I --fix # Sort imports.
ruff format # Run the formatter.
pytest # Run the test suite.
7 changes: 5 additions & 2 deletions src/ga4gh/gks/metaschema/scripts/jsy2js.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
#!/usr/bin/env python3

import yaml
import json
import sys

import yaml


def cli():
yaml_schema = yaml.load(sys.stdin, Loader=yaml.SafeLoader)
json.dump(yaml_schema, sys.stdout, indent=3)


if __name__ == "__main__":
cli()
cli()
9 changes: 6 additions & 3 deletions src/ga4gh/gks/metaschema/scripts/source2classes.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
#!/usr/bin/env python3
import argparse
from ga4gh.gks.metaschema.tools.source_proc import YamlSchemaProcessor
from pathlib import Path

from ga4gh.gks.metaschema.tools.source_proc import YamlSchemaProcessor

parser = argparse.ArgumentParser()
parser.add_argument("infile")

Expand All @@ -13,10 +14,12 @@ def main(proc):
continue
print(cls)


def cli():
args = parser.parse_args()
p = YamlSchemaProcessor(Path(args.infile))
main(p)

if __name__ == '__main__':
cli()

if __name__ == "__main__":
cli()
5 changes: 4 additions & 1 deletion src/ga4gh/gks/metaschema/scripts/source2jsy.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,15 @@

import pathlib
import sys

from ga4gh.gks.metaschema.tools.source_proc import YamlSchemaProcessor


def cli():
source_file = pathlib.Path(sys.argv[1])
p = YamlSchemaProcessor(source_file)
p.js_yaml_dump(sys.stdout)


if __name__ == "__main__":
cli()
cli()
5 changes: 4 additions & 1 deletion src/ga4gh/gks/metaschema/scripts/source2mergedjsy.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,16 @@

import pathlib
import sys

from ga4gh.gks.metaschema.tools.source_proc import YamlSchemaProcessor


def cli():
source_file = pathlib.Path(sys.argv[1])
p = YamlSchemaProcessor(source_file)
p.merge_imported()
p.js_yaml_dump(sys.stdout)


if __name__ == "__main__":
cli()
cli()
71 changes: 43 additions & 28 deletions src/ga4gh/gks/metaschema/scripts/source2splitjs.py
Original file line number Diff line number Diff line change
@@ -1,55 +1,63 @@
#!/usr/bin/env python3

from pathlib import Path
from ga4gh.gks.metaschema.tools.source_proc import YamlSchemaProcessor
import argparse
import re
import os
import copy
import json
import os
import re
from pathlib import Path

from ga4gh.gks.metaschema.tools.source_proc import YamlSchemaProcessor

parser = argparse.ArgumentParser()
parser.add_argument("infile")


def _redirect_refs(obj, dest_path, root_proc, mode):
frag_re = re.compile(r'(/\$defs|definitions)/(\w+)')
def _redirect_refs(obj: dict | list, dest_path: Path, root_proc: YamlSchemaProcessor, mode: str) -> dict | list:
"""Process the list of references and returns the list of classes
:param obj: list of schema objects
:param dest_path: destination output path
:param root_proc: the root YamlSchemaProcessor
:param mode: output mode of "json" or "yaml"
"""
frag_re = re.compile(r"(/\$defs|definitions)/(\w+)")
if isinstance(obj, list):
return [_redirect_refs(x, dest_path, root_proc, mode) for x in obj]
elif isinstance(obj, dict):
for k, v in obj.items():
if k == '$ref':
parts = v.split('#')
if k == "$ref":
parts = v.split("#")
if len(parts) == 2:
ref, fragment = parts
elif len(parts) == 1:
ref = parts[0]
fragment = ''
fragment = ""
else:
raise ValueError(f'Expected only one fragment operator.')
raise ValueError("Expected only one fragment operator.")
if fragment:
m = frag_re.match(fragment)
assert m is not None
ref_class = m.group(2)
else:
ref_class = ref.split('/')[-1].split('.')[0]
ref_class = ref.split("/")[-1].split(".")[0]

# Test if reference is for internal or external object
# and retrieve appropriate processor for export path
if ref == '':
if ref == "":
proc = root_proc
else:
proc = None
for _, other in root_proc.imports.items():
if ref_class in other.defs:
proc = other
if proc is None:
raise ValueError(f'Could not find {ref_class} in processors')
raise ValueError(f"Could not find {ref_class} in processors")
# if reference is protected for the class being processed, return only fragment
if ref == '' and proc.class_is_protected(ref_class):
containing_class = proc.raw_defs[ref_class]['protectedClassOf']
if ref == "" and proc.class_is_protected(ref_class):
containing_class = proc.raw_defs[ref_class]["protectedClassOf"]
if containing_class == dest_path.name:
obj[k] = f'#{fragment}'
obj[k] = f"#{fragment}"
return obj
obj[k] = proc.get_class_abs_path(ref_class, mode)
else:
Expand All @@ -59,26 +67,31 @@ def _redirect_refs(obj, dest_path, root_proc, mode):
return obj


def split_defs_to_js(root_proc, mode='json'):
if mode == 'json':
def split_defs_to_js(root_proc: YamlSchemaProcessor, mode: str = "json") -> None:
"""Splits the classes defined in the schema into json files.
:param root_proc: root YamlSchemaProcessor
:param mode: str, defaults to "json"
"""
if mode == "json":
fp = root_proc.json_fp
elif mode == 'yaml':
elif mode == "yaml":
fp = root_proc.yaml_fp
else:
raise ValueError('mode must be json or yaml')
raise ValueError("mode must be json or yaml")
os.makedirs(fp, exist_ok=True)
kw = root_proc.schema_def_keyword
for cls in root_proc.for_js[kw].keys():
if root_proc.class_is_protected(cls):
continue
class_def = copy.deepcopy(root_proc.for_js[kw][cls])
target_path = fp / f'{cls}'
target_path = fp / f"{cls}"
out_doc = copy.deepcopy(root_proc.for_js)
if cls in root_proc.has_protected_members:
def_dict = dict()
def_dict = {}
keep = False
for protected_cls in root_proc.has_protected_members[cls]:
if root_proc.raw_defs[protected_cls]['protectedClassOf'] == cls:
if root_proc.raw_defs[protected_cls]["protectedClassOf"] == cls:
def_dict[protected_cls] = copy.deepcopy(root_proc.defs[protected_cls])
keep = True
if keep:
Expand All @@ -89,15 +102,17 @@ def split_defs_to_js(root_proc, mode='json'):
out_doc.pop(kw, None)
class_def = _redirect_refs(class_def, target_path, root_proc, mode)
out_doc.update(class_def)
out_doc['title'] = cls
out_doc['$id'] = root_proc.get_class_uri(cls, mode)
with open(target_path, 'w') as f:
out_doc["title"] = cls
out_doc["$id"] = root_proc.get_class_uri(cls, mode)
with open(target_path, "w") as f:
json.dump(out_doc, f, indent=3, sort_keys=False)


def cli():
args = parser.parse_args()
p = YamlSchemaProcessor(Path(args.infile))
split_defs_to_js(p)

if __name__ == '__main__':
cli()

if __name__ == "__main__":
cli()
Loading

0 comments on commit e019b80

Please sign in to comment.