UPD start refactoring tests, ADD modification test skeleton

dieterich-lab · Jul 17, 2024 · 9f13aa6 · 9f13aa6
1 parent 0afe3ef
commit 9f13aa6
Show file tree

Hide file tree

Showing 9 changed files with 276 additions and 322 deletions.
diff --git a/server/tests/conftest.py b/server/tests/conftest.py
@@ -6,29 +6,17 @@
 from sqlalchemy.orm import sessionmaker
 
 from scimodom.database.database import init, Base
-from scimodom.database.models import (
-    RNAType,
-    Modomics,
-    Taxonomy,
-    Taxa,
-    Assembly,
-    AssemblyVersion,
-    Annotation,
-    AnnotationVersion,
-    DetectionMethod,
-)
 from scimodom.utils.specifications import SPECS_EUF
 
 # data path
 DataPath = namedtuple("DataPath", "LOC ASSEMBLY_PATH ANNOTATION_PATH META_PATH")
 
-
-@pytest.fixture()
-def EUF_specs():
-    # columns must match "ORM Data/Dataset model"
-    FMT = SPECS_EUF["format"]
-    VERSION = SPECS_EUF["versions"][-1]
-    return FMT, VERSION, SPECS_EUF[VERSION]
+pytest_plugins = [
+    "fixtures.setup",
+    "fixtures.selection",
+    "fixtures.project",
+    "fixtures.dataset",
+]
 
 
 @pytest.fixture()
@@ -44,176 +32,7 @@ def Session():
     session().close()
 
 
-@pytest.fixture()
-def setup():
-    add = []
-    rna_types = [RNAType(id="WTS", name="whole transcriptome")]
-    add.extend(rna_types)
-    modomics = [
-        Modomics(
-            id="2000000006A",
-            name="N6-methyladenosine",
-            short_name="m6A",
-            moiety="nucleoside",
-            reference_id=96,
-        ),
-        Modomics(
-            id="2000000005C",
-            name="5-methylcytidine",
-            short_name="m5C",
-            moiety="nucleoside",
-            reference_id=18,
-        ),
-        Modomics(
-            id="2000000009U",
-            name="pseudouridine",
-            short_name="Y",
-            moiety="nucleoside",
-            reference_id=118,
-        ),
-    ]
-    add.extend(modomics)
-    taxonomy = [
-        Taxonomy(
-            id="a1b240af", domain="Eukarya", kingdom="Animalia", phylum="Chordata"
-        ),
-        Taxonomy(
-            id="455a3823", domain="Eukarya", kingdom="Animalia", phylum="Arthropoda"
-        ),
-    ]
-    add.extend(taxonomy)
-    taxa = [
-        Taxa(
-            id=9606,
-            name="Homo sapiens",
-            short_name="H. sapiens",
-            taxonomy_id="a1b240af",
-        ),
-        Taxa(
-            id=10090,
-            name="Mus musculus",
-            short_name="M. musculus",
-            taxonomy_id="a1b240af",
-        ),
-        Taxa(
-            id=7227,
-            name="Drosophila melanogaster",
-            short_name="D. melanogaster",
-            taxonomy_id="455a3823",
-        ),
-    ]
-    add.extend(taxa)
-    assembly_version = [
-        AssemblyVersion(version_num="GcatSmFcytpU"),
-    ]
-    add.extend(assembly_version)
-    assembly = [
-        Assembly(name="GRCh38", alt_name="hg38", taxa_id=9606, version="GcatSmFcytpU"),
-        Assembly(name="GRCm38", alt_name="mm10", taxa_id=10090, version="GcatSmFcytpU"),
-        Assembly(name="GRCh37", alt_name="hg19", taxa_id=9606, version="J9dit7Tfc6Sb"),
-    ]
-    add.extend(assembly)
-    annotation_version = [
-        AnnotationVersion(version_num="EyRBnPeVwbzW"),
-    ]
-    add.extend(annotation_version)
-    annotation = [
-        Annotation(release=110, taxa_id=9606, source="ensembl", version="EyRBnPeVwbzW"),
-        Annotation(
-            release=110, taxa_id=10090, source="ensembl", version="EyRBnPeVwbzW"
-        ),
-        Annotation(release=109, taxa_id=9606, source="ensembl", version="A8syx5TzWlK0"),
-    ]
-    add.extend(annotation)
-    method = [
-        DetectionMethod(
-            id="0ee048bc", cls="NGS 2nd generation", meth="Chemical-assisted sequencing"
-        ),
-        DetectionMethod(
-            id="91b145ea", cls="NGS 2nd generation", meth="Antibody-based sequencing"
-        ),
-        DetectionMethod(
-            id="01d26feb",
-            cls="NGS 2nd generation",
-            meth="Enzyme/protein-assisted sequencing",
-        ),
-    ]
-    add.extend(method)
-    return add
-
-
-@pytest.fixture()
-def project_template():
-    """\
-    2023-08-25 Project template (JSON format).
-
-    All keys are required.
-    "external_sources" can be None (null in yml).
-    "external_sources" and "metadata" can be list of dict, or dict.
-
-    Parameters
-    ----------
-    external_sources_fmt: str or None
-        "external_sources" format (list, dict, or None)
-    metadata_fmt: str
-        "metadata" format (list or dict)
-    missing_key: str or None
-        missing_key
-
-    Returns
-    -------
-    dict
-        Project template
-    """
-
-    project = dict()
-    project["title"] = "Title"
-    project["summary"] = "Summary"
-    project["contact_name"] = "Contact Name"
-    project["contact_institution"] = "Contact Institution"
-    project["contact_email"] = "Contact Email"
-    project["date_published"] = "2024-01-01"
-    project["external_sources"] = [
-        {"doi": "DOI1", "pmid": None},
-        {"doi": "DOI2", "pmid": 22222222},
-    ]
-    project["metadata"] = [
-        {
-            "rna": "mRNA",
-            "modomics_id": "2000000006A",
-            "tech": "Technology 1",
-            "method_id": "01d26feb",
-            "organism": {"taxa_id": 9606, "cto": "Cell Type 1", "assembly": "GRCh38"},
-        },
-        {
-            "rna": "mRNA",
-            "modomics_id": "2000000006A",
-            "tech": "Technology 1",
-            "method_id": "01d26feb",
-            "organism": {"taxa_id": 9606, "cto": "Cell Type 2", "assembly": "GRCh38"},
-        },
-        {
-            "rna": "mRNA",
-            "modomics_id": "2000000005C",
-            "tech": "Technology 2",
-            "method_id": "01d26feb",
-            "organism": {"taxa_id": 9606, "cto": "Organ 1", "assembly": "GRCh38"},
-        },
-        {
-            "rna": "mRNA",
-            "modomics_id": "2000000005C",
-            "tech": "Technology 1",
-            "method_id": "01d26feb",
-            "organism": {"taxa_id": 9606, "cto": "Cell Type 1", "assembly": "GRCh38"},
-        },
-    ]
-
-    return project
-
-
-# TODO this should be simplified to have only temp dirs fixture for session-wise
-# usage, i.e. top of function, the rest should be handlded in separate tests
-# does it actually have to be session-wise???
+# TODO this is now only used in integration/test_import_data
 @pytest.fixture(scope="session")
 def data_path(tmp_path_factory):
     format = SPECS_EUF["format"]

diff --git a/server/tests/fixtures/dataset.py b/server/tests/fixtures/dataset.py
@@ -3,59 +3,24 @@
 import pytest
 
 from scimodom.database.models import (
-    Organism,
-    Modomics,
-    Modification,
-    DetectionMethod,
-    DetectionTechnology,
-    Selection,
-    ProjectContact,
-    Project,
     Dataset,
     Data,
 )
 from scimodom.utils.common_dto import Strand
 
 
 @pytest.fixture
-def dataset(Session, setup):  # noqa
+def dataset(Session, selection, project):  # noqa
     stamp = datetime.now(timezone.utc).replace(microsecond=0)
-    organism = Organism(id=1, taxa_id=9606, cto="Cell Type 1")
-    modomics = Modomics(
-        id="m1", name="Mod1", short_name="Mod1", moiety="moiety", reference_id=1
-    )
-    modification = Modification(id=1, modomics_id=modomics.id, rna="Bla")
-    method = DetectionMethod(id="m1", cls="c1", meth="Method m1")
-    technology = DetectionTechnology(id=1, method_id=method.id, tech="Very Cool Tech")
-    selection = Selection(
-        id=1,
-        organism_id=organism.id,
-        modification_id=modification.id,
-        technology_id=technology.id,
-    )
-    contact = ProjectContact(
-        id=1,
-        contact_name="James Bond",
-        contact_institution="MI5",
-        contact_email="that@is.secret",
-    )
-    project = Project(
-        id="p1",
-        title="example project",
-        date_published=datetime.now(),
-        date_added=datetime.now(),
-        summary="summary",
-        contact_id=contact.id,
-    )
-    dataset = Dataset(
+    dataset1 = Dataset(
         id="d1",
         title="dataset title",
-        organism_id=organism.id,
-        technology_id=technology.id,
+        organism_id=1,
+        technology_id=1,
         modification_type="RNA",
         basecalling="bc1",
         bioinformatics_workflow="wf1",
-        project_id=project.id,
+        project_id=project,
         sequencing_platform="sp1",
         experiment="experiment 1",
         external_source="ext. source 1",
@@ -64,51 +29,66 @@ def dataset(Session, setup):  # noqa
     data1 = Data(
         id=1,
         dataset_id="d1",
-        modification_id=modification.id,
+        modification_id=1,
         chrom="17",
         start=100001,
-        end=120000,
-        name="Y",
+        end=100002,
+        name="m6A",
         score=1000,
         strand=Strand.FORWARD,
-        thick_start=100101,
-        thick_end=100201,
+        thick_start=100001,
+        thick_end=100002,
         item_rgb="128,128,0",
         coverage=43,
         frequency=100,
     )
     data2 = Data(
         id=2,
         dataset_id="d1",
-        modification_id=modification.id,
+        modification_id=2,
         chrom="Y",
         start=200001,
-        end=220000,
-        name="X",
+        end=200002,
+        name="m5C",
         score=900,
         strand=Strand.REVERSE,
-        thick_start=200101,
-        thick_end=200201,
+        thick_start=200001,
+        thick_end=200002,
         item_rgb="0,0,128",
         coverage=44,
         frequency=99,
     )
-    session = Session()
-    session.add_all(setup)
-    session.add_all(
-        [
-            organism,
-            modomics,
-            modification,
-            method,
-            technology,
-            selection,
-            contact,
-            project,
-            dataset,
-            data1,
-            data2,
-        ]
+    dataset2 = Dataset(
+        id="d2",
+        title="Dataset title 2",
+        organism_id=2,
+        technology_id=2,
+        modification_type="RNA",
+        basecalling="bc2",
+        bioinformatics_workflow="wf2",
+        project_id=project,
+        sequencing_platform="sp2",
+        experiment="experiment 2",
+        external_source="ext. source 2",
+        date_added=stamp,
     )
+    data3 = Data(
+        id=3,
+        dataset_id="d2",
+        modification_id=1,
+        chrom="17",
+        start=100001,
+        end=100002,
+        name="m6A",
+        score=10,
+        strand=Strand.FORWARD,
+        thick_start=100001,
+        thick_end=100002,
+        item_rgb="0,0,0",
+        coverage=10,
+        frequency=10,
+    )
+    session = Session()
+    session.add_all([dataset1, dataset2, data1, data2, data3])
     session.commit()
-    yield dataset
+    yield (dataset1, dataset2)