Skip to content

Commit

Permalink
✨ start working on multiple phenotypes
Browse files Browse the repository at this point in the history
Adding multiple phenotypes to samples
  • Loading branch information
bunop committed Sep 11, 2023
1 parent 411310c commit 839b57d
Show file tree
Hide file tree
Showing 4 changed files with 226 additions and 0 deletions.
5 changes: 5 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@ TODO
* Manage python packages with `poetry <https://python-poetry.org/>`__
* Rename ``manifacturer`` into ``manufacturer``

0.4.8.post1
-----------

* Update *datasets* metadata

0.4.8 (2023-06-28)
------------------

Expand Down
6 changes: 6 additions & 0 deletions docs/commands.rst
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,12 @@ documentation sections.
:prog: src/data/import_metadata.py
:nested: full

.. _import_multiple_phenotypes:

.. click:: src.data.import_multiple_phenotypes:main
:prog: src/data/import_multiple_phenotypes.py
:nested: full

.. _import_phenotypes:

.. click:: src.data.import_phenotypes:main
Expand Down
76 changes: 76 additions & 0 deletions src/data/import_multiple_phenotypes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Sep 11 12:13:47 2023
@author: Paolo Cozzi <paolo.cozzi@ibba.cnr.it>
This program acts like import_phenotypes but adding more data for the same
individual
"""

import click
import logging

from click_option_group import optgroup, RequiredMutuallyExclusiveOptionGroup
from pathlib import Path

from src.features.smarterdb import global_connection

logger = logging.getLogger(__name__)


@click.command()
@click.option(
'--src_dataset', type=str, required=True,
help="The raw dataset file name (zip archive) in which search datafile"
)
@click.option(
'--dst_dataset', type=str, required=False,
help=("The raw dataset file name (zip archive) in which add metadata"
"(def. the 'src_dataset')")
)
@click.option('--datafile', type=str, required=True)
@click.option('--sheet_name',
default="0",
help="pandas 'sheet_name' option")
@optgroup.group(
'Add metadata relying on breeds or samples columns',
cls=RequiredMutuallyExclusiveOptionGroup
)
@optgroup.option('--breed_column', type=str, help="The breed column")
@optgroup.option('--id_column', type=str, help="The original_id column")
@optgroup.option('--alias_column', type=str, help="An alias for original_id")
@click.option(
'--column',
required=True,
multiple=True,
help=(
"Column to track. Could be specified multiple times")
)
@click.option('--na_values', type=str, help="pandas NA values")
def main(src_dataset, dst_dataset, datafile, sheet_name, breed_column,
id_column, alias_column, column, na_values):
"""Read multiple data for the same sample from phenotype file and add it
to SMARTER-database samples"""

logger.info(f"{Path(__file__).name} started")

if breed_column or alias_column:
raise NotImplementedError(
"Loading multiple phenotypes by breed or alias is not yet "
"implemented")

logger.debug(f"Reading {column} columns")

logger.info(f"{Path(__file__).name} ended")


if __name__ == '__main__':
log_fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
logging.basicConfig(level=logging.INFO, format=log_fmt)

# connect to database
global_connection()

main()
139 changes: 139 additions & 0 deletions tests/data/test_import_multiple_phenotypes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Sep 11 12:20:08 2023
@author: Paolo Cozzi <paolo.cozzi@ibba.cnr.it>
"""

import unittest
import pathlib
import tempfile

from openpyxl import Workbook
from click.testing import CliRunner
from unittest.mock import patch, PropertyMock

from src.data.import_multiple_phenotypes import (
main as import_multiple_phenotypes)
from src.features.smarterdb import Dataset, SampleSheep, Phenotype

from ..common import MongoMockMixin, SmarterIDMixin, SupportedChipMixin


class PhenotypeMixin(SmarterIDMixin, SupportedChipMixin, MongoMockMixin):
@classmethod
def setUpClass(cls):
super().setUpClass()

# getting destination dataset
cls.dst_dataset = Dataset.objects.get(file="test.zip")

# create a src dataset for genotypes
cls.src_dataset = Dataset(
file="test2.zip",
country="Italy",
species="Sheep",
contents=[
"phenotypes.xlsx"
]
)
cls.src_dataset.save()

# create a workbook
cls.workbook = Workbook()
cls.sheet = cls.workbook.active

# adding header
cls.sheet.cell(row=1, column=1, value="Id")
cls.sheet.cell(row=1, column=2, value="daily_activity_min")
cls.sheet.cell(row=1, column=3, value="daily_distance_km")

# adding values
cls.sheet.cell(row=2, column=1, value="test-1")
cls.sheet.cell(row=2, column=2, value=123)
cls.sheet.cell(row=2, column=3, value=11.5)
cls.sheet.cell(row=2, column=1, value="test-1")
cls.sheet.cell(row=2, column=2, value=456)
cls.sheet.cell(row=2, column=3, value=23.0)

@classmethod
def tearDownClass(cls):
Dataset.objects.delete()
SampleSheep.objects.delete()

super().tearDownClass()

def setUp(self):
self.runner = CliRunner()

# need also a sample
self.sample = SampleSheep(
original_id="test-1",
smarter_id="ITOA-TEX-000000001",
country="Italy",
breed="Texel",
breed_code="TEX",
dataset=self.dst_dataset,
type_="background",
chip_name=self.chip_name,
alias="alias-1",
)
self.sample.save()

def tearDown(self):
SampleSheep.objects.delete()

super().tearDown()


class TestImportPhenotypeCLI(PhenotypeMixin, unittest.TestCase):
def test_help(self):
result = self.runner.invoke(import_multiple_phenotypes, ["--help"])
self.assertEqual(0, result.exit_code)
self.assertIn('Usage: main', result.output)


class TestImportPhenotypeBySamples(PhenotypeMixin, unittest.TestCase):
@patch('src.features.smarterdb.Dataset.working_dir',
new_callable=PropertyMock)
def test_import_phenotype(self, my_working_dir):
# create a temporary directory using the context manager
with tempfile.TemporaryDirectory() as tmpdirname:
working_dir = pathlib.Path(tmpdirname)
my_working_dir.return_value = working_dir

# save worksheet in temporary folder
self.workbook.save(f"{working_dir}/phenotypes.xlsx")

# got first sample from database
self.assertEqual(SampleSheep.objects.count(), 1)

result = self.runner.invoke(
import_multiple_phenotypes,
[
"--src_dataset",
"test2.zip",
"--dst_dataset",
"test.zip",
"--datafile",
"phenotypes.xlsx",
"--id_column",
"Id",
"--column",
"daily_activity_min",
"--column",
"daily_distance_km",
]
)

self.assertEqual(0, result.exit_code, msg=result.exception)
self.sample.reload()
self.assertIsInstance(self.sample.phenotype, Phenotype)

reference = Phenotype(
daily_activity_min=[123, 456],
daily_distance_km=[11.5, 23.0]
)

self.assertEqual(reference, self.sample.phenotype)

0 comments on commit 839b57d

Please sign in to comment.