Skip to content

Commit

Permalink
UPD start refactoring tests, ADD modification test skeleton
Browse files Browse the repository at this point in the history
  • Loading branch information
eboileau committed Jul 17, 2024
1 parent 0afe3ef commit 9f13aa6
Show file tree
Hide file tree
Showing 9 changed files with 276 additions and 322 deletions.
195 changes: 7 additions & 188 deletions server/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,29 +6,17 @@
from sqlalchemy.orm import sessionmaker

from scimodom.database.database import init, Base
from scimodom.database.models import (
RNAType,
Modomics,
Taxonomy,
Taxa,
Assembly,
AssemblyVersion,
Annotation,
AnnotationVersion,
DetectionMethod,
)
from scimodom.utils.specifications import SPECS_EUF

# data path
DataPath = namedtuple("DataPath", "LOC ASSEMBLY_PATH ANNOTATION_PATH META_PATH")


@pytest.fixture()
def EUF_specs():
# columns must match "ORM Data/Dataset model"
FMT = SPECS_EUF["format"]
VERSION = SPECS_EUF["versions"][-1]
return FMT, VERSION, SPECS_EUF[VERSION]
pytest_plugins = [
"fixtures.setup",
"fixtures.selection",
"fixtures.project",
"fixtures.dataset",
]


@pytest.fixture()
Expand All @@ -44,176 +32,7 @@ def Session():
session().close()


@pytest.fixture()
def setup():
add = []
rna_types = [RNAType(id="WTS", name="whole transcriptome")]
add.extend(rna_types)
modomics = [
Modomics(
id="2000000006A",
name="N6-methyladenosine",
short_name="m6A",
moiety="nucleoside",
reference_id=96,
),
Modomics(
id="2000000005C",
name="5-methylcytidine",
short_name="m5C",
moiety="nucleoside",
reference_id=18,
),
Modomics(
id="2000000009U",
name="pseudouridine",
short_name="Y",
moiety="nucleoside",
reference_id=118,
),
]
add.extend(modomics)
taxonomy = [
Taxonomy(
id="a1b240af", domain="Eukarya", kingdom="Animalia", phylum="Chordata"
),
Taxonomy(
id="455a3823", domain="Eukarya", kingdom="Animalia", phylum="Arthropoda"
),
]
add.extend(taxonomy)
taxa = [
Taxa(
id=9606,
name="Homo sapiens",
short_name="H. sapiens",
taxonomy_id="a1b240af",
),
Taxa(
id=10090,
name="Mus musculus",
short_name="M. musculus",
taxonomy_id="a1b240af",
),
Taxa(
id=7227,
name="Drosophila melanogaster",
short_name="D. melanogaster",
taxonomy_id="455a3823",
),
]
add.extend(taxa)
assembly_version = [
AssemblyVersion(version_num="GcatSmFcytpU"),
]
add.extend(assembly_version)
assembly = [
Assembly(name="GRCh38", alt_name="hg38", taxa_id=9606, version="GcatSmFcytpU"),
Assembly(name="GRCm38", alt_name="mm10", taxa_id=10090, version="GcatSmFcytpU"),
Assembly(name="GRCh37", alt_name="hg19", taxa_id=9606, version="J9dit7Tfc6Sb"),
]
add.extend(assembly)
annotation_version = [
AnnotationVersion(version_num="EyRBnPeVwbzW"),
]
add.extend(annotation_version)
annotation = [
Annotation(release=110, taxa_id=9606, source="ensembl", version="EyRBnPeVwbzW"),
Annotation(
release=110, taxa_id=10090, source="ensembl", version="EyRBnPeVwbzW"
),
Annotation(release=109, taxa_id=9606, source="ensembl", version="A8syx5TzWlK0"),
]
add.extend(annotation)
method = [
DetectionMethod(
id="0ee048bc", cls="NGS 2nd generation", meth="Chemical-assisted sequencing"
),
DetectionMethod(
id="91b145ea", cls="NGS 2nd generation", meth="Antibody-based sequencing"
),
DetectionMethod(
id="01d26feb",
cls="NGS 2nd generation",
meth="Enzyme/protein-assisted sequencing",
),
]
add.extend(method)
return add


@pytest.fixture()
def project_template():
"""\
2023-08-25 Project template (JSON format).
All keys are required.
"external_sources" can be None (null in yml).
"external_sources" and "metadata" can be list of dict, or dict.
Parameters
----------
external_sources_fmt: str or None
"external_sources" format (list, dict, or None)
metadata_fmt: str
"metadata" format (list or dict)
missing_key: str or None
missing_key
Returns
-------
dict
Project template
"""

project = dict()
project["title"] = "Title"
project["summary"] = "Summary"
project["contact_name"] = "Contact Name"
project["contact_institution"] = "Contact Institution"
project["contact_email"] = "Contact Email"
project["date_published"] = "2024-01-01"
project["external_sources"] = [
{"doi": "DOI1", "pmid": None},
{"doi": "DOI2", "pmid": 22222222},
]
project["metadata"] = [
{
"rna": "mRNA",
"modomics_id": "2000000006A",
"tech": "Technology 1",
"method_id": "01d26feb",
"organism": {"taxa_id": 9606, "cto": "Cell Type 1", "assembly": "GRCh38"},
},
{
"rna": "mRNA",
"modomics_id": "2000000006A",
"tech": "Technology 1",
"method_id": "01d26feb",
"organism": {"taxa_id": 9606, "cto": "Cell Type 2", "assembly": "GRCh38"},
},
{
"rna": "mRNA",
"modomics_id": "2000000005C",
"tech": "Technology 2",
"method_id": "01d26feb",
"organism": {"taxa_id": 9606, "cto": "Organ 1", "assembly": "GRCh38"},
},
{
"rna": "mRNA",
"modomics_id": "2000000005C",
"tech": "Technology 1",
"method_id": "01d26feb",
"organism": {"taxa_id": 9606, "cto": "Cell Type 1", "assembly": "GRCh38"},
},
]

return project


# TODO this should be simplified to have only temp dirs fixture for session-wise
# usage, i.e. top of function, the rest should be handlded in separate tests
# does it actually have to be session-wise???
# TODO this is now only used in integration/test_import_data
@pytest.fixture(scope="session")
def data_path(tmp_path_factory):
format = SPECS_EUF["format"]
Expand Down
114 changes: 47 additions & 67 deletions server/tests/fixtures/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,59 +3,24 @@
import pytest

from scimodom.database.models import (
Organism,
Modomics,
Modification,
DetectionMethod,
DetectionTechnology,
Selection,
ProjectContact,
Project,
Dataset,
Data,
)
from scimodom.utils.common_dto import Strand


@pytest.fixture
def dataset(Session, setup): # noqa
def dataset(Session, selection, project): # noqa
stamp = datetime.now(timezone.utc).replace(microsecond=0)
organism = Organism(id=1, taxa_id=9606, cto="Cell Type 1")
modomics = Modomics(
id="m1", name="Mod1", short_name="Mod1", moiety="moiety", reference_id=1
)
modification = Modification(id=1, modomics_id=modomics.id, rna="Bla")
method = DetectionMethod(id="m1", cls="c1", meth="Method m1")
technology = DetectionTechnology(id=1, method_id=method.id, tech="Very Cool Tech")
selection = Selection(
id=1,
organism_id=organism.id,
modification_id=modification.id,
technology_id=technology.id,
)
contact = ProjectContact(
id=1,
contact_name="James Bond",
contact_institution="MI5",
contact_email="that@is.secret",
)
project = Project(
id="p1",
title="example project",
date_published=datetime.now(),
date_added=datetime.now(),
summary="summary",
contact_id=contact.id,
)
dataset = Dataset(
dataset1 = Dataset(
id="d1",
title="dataset title",
organism_id=organism.id,
technology_id=technology.id,
organism_id=1,
technology_id=1,
modification_type="RNA",
basecalling="bc1",
bioinformatics_workflow="wf1",
project_id=project.id,
project_id=project,
sequencing_platform="sp1",
experiment="experiment 1",
external_source="ext. source 1",
Expand All @@ -64,51 +29,66 @@ def dataset(Session, setup): # noqa
data1 = Data(
id=1,
dataset_id="d1",
modification_id=modification.id,
modification_id=1,
chrom="17",
start=100001,
end=120000,
name="Y",
end=100002,
name="m6A",
score=1000,
strand=Strand.FORWARD,
thick_start=100101,
thick_end=100201,
thick_start=100001,
thick_end=100002,
item_rgb="128,128,0",
coverage=43,
frequency=100,
)
data2 = Data(
id=2,
dataset_id="d1",
modification_id=modification.id,
modification_id=2,
chrom="Y",
start=200001,
end=220000,
name="X",
end=200002,
name="m5C",
score=900,
strand=Strand.REVERSE,
thick_start=200101,
thick_end=200201,
thick_start=200001,
thick_end=200002,
item_rgb="0,0,128",
coverage=44,
frequency=99,
)
session = Session()
session.add_all(setup)
session.add_all(
[
organism,
modomics,
modification,
method,
technology,
selection,
contact,
project,
dataset,
data1,
data2,
]
dataset2 = Dataset(
id="d2",
title="Dataset title 2",
organism_id=2,
technology_id=2,
modification_type="RNA",
basecalling="bc2",
bioinformatics_workflow="wf2",
project_id=project,
sequencing_platform="sp2",
experiment="experiment 2",
external_source="ext. source 2",
date_added=stamp,
)
data3 = Data(
id=3,
dataset_id="d2",
modification_id=1,
chrom="17",
start=100001,
end=100002,
name="m6A",
score=10,
strand=Strand.FORWARD,
thick_start=100001,
thick_end=100002,
item_rgb="0,0,0",
coverage=10,
frequency=10,
)
session = Session()
session.add_all([dataset1, dataset2, data1, data2, data3])
session.commit()
yield dataset
yield (dataset1, dataset2)
Loading

0 comments on commit 9f13aa6

Please sign in to comment.