Skip to content

Commit

Permalink
Squashed 'pages/libs/mpxsonar/' changes from 6c083cd..91422cf
Browse files Browse the repository at this point in the history
91422cf fix merge
673fcc9 change == to is
59f49b0 Patch mpoxsonar (#2)
b403feb minor change in dbm
8892a93 Update MpoxSonar 0.5.3 Merge commit 'd20960430509ef4fc45455fdb206e7ef5c8030f5' into note/dev
58c001e fix matching function
668a8a9 qMerge commit '55314730f7bcd1e927c95bb9befc780fdb8ee3c6' into note/dev
ef06eca update NCBI download
09b494e Merge commit '70b50bcac89349125d5f077daa004f97079ac953' into note/dev
c5854d7 fix count issue
bf0fd53 Patch V.0.3.5 (#7)
a0572a0 fix delete function
7618df4 Update Dev branch (#5)
63ee752 Update patch 0.5.1 (#1)
541d76a update 0.5.1, fix bugs
4d9fd3b update 0.5.1
9f71aaf Merge branch 'note/dev' of https://github.com/rki-mf1/MpoxRadar into note/dev
7e9afad Merge commit 'f4e9902d0b5fb8806264882d42b61db87589d3de' as 'pages/libs/mpxsonar'
5568533 remove mpxsonar
245d27d update the site
dbdb006 update mpoxsonar repo.
9c147cb update NT display
570b4ff fix bug
55cc0b4 Update README.md
72e976f Merge pull request #12 from ferbsx/add-license-1
8b5a311 Create LICENSE.md
6c98df9 The first version to the mpoxradar.net (#11)
acb06dd The first version to the mpoxradar.net (#11)
b99fb44 terminology correction
9f4c00e replaced file for the correct name
f427576 correcting name
c14d9aa Update about.py
18333e7 FAQ edit according to alice comment
751c5f7 edit terminology: mpox, MPoxRadar
d015854 edit wrong links with anchor tag
bc7a09c Merge pull request #10 from ferbsx/injun/dev
268e798 Bugremovung
adbb052 Merge pull request #9 from ferbsx/injun/dev
e1cab9c Merge remote-tracking branch 'origin/dev' into injun/dev
5c989dc Merge remote-tracking branch 'origin/dev' into injun/dev
3cbcf5b Merge pull request #8 from ferbsx/ivan/dev
473f845 Merge branch 'injun/dev' into ivan/dev
a8b966f Merge branch 'injun/dev' into ivan/dev
3b32162 garbage cleaning
4cd7dd5 Update help.py
40d86d8 Update about.py
abec293 MPXdatabase dump added
892b0d2 Update util_help_tables.py
2e2d17d Update util_tool_checklists.py
f9159ca Update util_footer_table.py
08cbcd2 Update contact.py
518d27c 1. changed structure 2. !!!callbacks doent trigger
ee77cd4 csv-file in case of no working database - for developemnt only
3594609 data.py connects to a mpox-database and gets a dictionary of dataframes
94225ad stylized names, texts and titles
076fc28 this is Ivan's tool, which is ahead Injun's (--> trying to make one branck/fork from these two, to then put in dev)
7a177bd merge change
829b200 merge change
80833ec map.py is now unncessary
7efebb8 jorge_tool.py is now unnecessary
59e213c pointing at where to replace the map-figures
ca7d045 adding an example to run app.py with different env-paramenters - usefull when developing with accesss to local databases
c43399a fixed zoom in non-jorge's map
5b9ab7b added jorge's code
36d5ff6 updated personal notesvim
599909c updated personal notes
3de147b commented out the whole file to not interfere with pages/tool.py and avoid Werkzeug-Exception
98d2171 location_coordinates.csv file from Ivan
99980ab Data.csv file from Ivan
b36051c added import os in order to make pages/*.csv relative paths, note absolute paths
8fb0984 jorge's personal notes
b9e4854 copy of jorge's tool.py to be merged with dev's tool.py
024927a copy of jorge's data.py for dev-branch
e4bd4a0 rephrasing text. from jorge's branch to dev
364ecb3 general .env.template from ivan's to injun's
47d9421 added one of the two data-files from Ivan
80dd223 resize logo
e6e9b02 Note/dev (#6)
9e2ec74 Note/dev (#6)
4757c44 new design (from Injun)
12c8197 Merge branch 'dev' of https://github.com/ferbsx/MPXRadar-frontend into dev
aac9abb help page added
694caa8 edit Datasource Link
cb54534 add page for table, edit for callback
1f79d23 data for geo
71e1e94 make footer&tool.checkout, edit some pages
ee6e064 callbacks to the separate file
dfed955 added map animation
9026b9f data
f0a8727 Q&A on help.py, add photo, edit footer
9c4c7fa web page ceate and edit
0f50ada web page ceate and edit
dd44c8b edit from evan/dev
dbe41b2 edit from evan/dev
82a9053 map working instruments
96345f4 add style and map.py file
108070d playing with map
02da350 adding dbc components
3f5ef9d update mpxsonar 0.4.3, merge
ec3196b update mpxsonar 0.4.3, merge
5de8641 update mpxsonar 0.4.3, merge
5bace10 update mpxsonar 0.4.3, merge
2744ff3 Squashed 'pages/libs/mpxsonar/' changes from 36581b5..0ffa7eb
8d95d44 note/dev (#5)
159896e Note/dev -Update MPXsonar 0.4.2 (#4)
fd76d1d Note/dev -Update MPXsonar 0.4.2 (#4)
a9c407b map manipulatios
85542e7 pages/home.py
6bea1dd cosmeic changes
3871a65 cosmetic changes
2c3c307 fixed internal address problems
41e3bd3 added an example map
922bcf6 some details
fd9050e some details
e210173 some details
08761c1 new design details
3d00552 Delete .gitkeep
95ba64a Add files via upload
9c303b6 Create .gitkeep
13801c7 Add files via upload
b6fcb62 Merge commit 'f4be82d7efccd85009e4f538b5d52978ba74d4d2' as 'pages/libs/mpxsonar'
f4be82d Squashed 'pages/libs/mpxsonar/' content from commit 36581b5
4ef7779 Merge commit 'f4be82d7efccd85009e4f538b5d52978ba74d4d2' as 'pages/libs/mpxsonar'
3943203 remove subtree bugs
5623f35 Update to latest Note/dev (#3)
b5cca15 Update UI (#2)
be05693 add pages contact, edit app&about pages, add assets folder for imgae files
c82a67e making new repo for mpx ABOUT page
f56f892 making new repo for mpx ABOUT page
7dfdd5f update MPXSonar 0.3.7 (#1)
9112085 update MPXSonar 0.3.7 (#1)
40a838f update config.yml and *.md
db4ba15 setup *.md docs
6de6fe5 setup *.md docs
5a39726 Merge commit '555d93ff42af3d2468eb950ff0d2c8548b8b179e' 0.3.1 into note/dev
555d93f Squashed 'libs/mpxsonar/' changes from 50172ad..ae11fb4
7fc1420 add mpxsonar subtree
b0eddcc Squashed 'libs/mpxsonar/' content from commit 50172ad
511b011 Merge commit 'b0eddcc84014b880deddc0839a64bc4435b8237e' as 'libs/mpxsonar'
3c15519 Initial commit

git-subtree-dir: pages/libs/mpxsonar
git-subtree-split: 91422cfde7b7bdddc4465ec50e52fddd46793a7c
  • Loading branch information
silenus092 committed Sep 27, 2023
1 parent f4e9902 commit 89dc3b3
Show file tree
Hide file tree
Showing 16 changed files with 16,854 additions and 766 deletions.
13 changes: 4 additions & 9 deletions .env.template
Original file line number Diff line number Diff line change
@@ -1,9 +1,3 @@
# DASH APP
SERVER="0.0.0.0"
PORT="80"
SECRET_KEY="XX58602833XX"
DEBUG=True

# Database URL with generic format of connection.
# https://USERNAME:PASSWORD@IP:PORT/DBNAME
DB_URL="https://super_user:123456@localhost:3306/mpx"
Expand All @@ -12,11 +6,12 @@ DB_URL="https://super_user:123456@localhost:3306/mpx"
# DEBUG, INFO, WARNING, ERROR, CRITICAL
LOG_LEVEL=DEBUG


# ------- For NCBI downloader -------
# NCBI API-KEY
# To get API key https://ncbiinsights.ncbi.nlm.nih.gov/2017/11/02/new-api-keys-for-the-e-utilities/
NCBI_API_KEY=""
NCBI_TOOL="MPXSonar"
NCBI_EMAIL=""

# For NCBI downloader
SAVE_PATH = ""
# Output from processing.
SAVE_PATH = "/data/prod/download"
34 changes: 17 additions & 17 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
# Welcome to the covsonar contributing guide <!-- omit in toc -->
# Welcome to the mpoxsonar contributing guide <!-- omit in toc -->

In this guide you will get an overview of the contribution workflow from setting up a development environment, testing your changes, submitting a pull request and performing a release.

Use the table of contents icon on the top left corner of this document to get to a specific section of this guide quickly.

## TLDR; I want to start hacking now!

Clone the repo, and the you can run these commands from within the `covsonar/` directory:
Clone the repo, and the you can run these commands from within the `mpoxsonar/` directory:

```sh
mamba create -n covsonar-dev python=3.9 poetry fortran-compiler nox pre-commit emboss=6.6.0
mamba activate covsonar-dev # needs to be activated for the following commands to work
mamba create -n mpoxsonar-dev python=3.10 poetry fortran-compiler nox pre-commit emboss=6.6.0
mamba activate mpoxsonar-dev # needs to be activated for the following commands to work
git config blame.ignoreRevsFile .git-blame-ignore-revs # ignore black reformatting when doing git blame
pre-commit install # install pre-commit hooks for formatting and linting
poetry install # install current source of covsonar and its dependencies
poetry run covsonar <args> # run covsonar
poetry install # install current source of mpoxsonar and its dependencies
poetry run sonar <args> # run sonar
nox # run linting and pytest tests (add -r to reuse previously built environments)
nox -rs zimports black # auto format imports and code
```
Expand All @@ -25,37 +25,37 @@ To get an overview of the project itself, read the [README](README.md).

## Getting started

covsonar is written in Python and tries to follow the excellent packaging guidelines ["Hypermodern Python" by Claudio Jolowicz](https://cjolowicz.github.io/posts/hypermodern-python-01-setup/). Nevertheless, there are some places where covsonar differs from those guidelines, and we have tried to outline those differences here wherever relevant. The main differences are caused by most work on covsonar happening in an environment where administrator access is not available (a shared Linux HPC), and also because we want our package to be installable via [conda](https://docs.conda.io/en/latest/index.html) or [mamba](https://github.com/mamba-org/mamba), from the [bioconda](https://bioconda.github.io/) channel in particular.
mpoxsonar is written in Python and tries to follow the excellent packaging guidelines ["Hypermodern Python" by Claudio Jolowicz](https://cjolowicz.github.io/posts/hypermodern-python-01-setup/). Nevertheless, there are some places where mpoxsonar differs from those guidelines, and we have tried to outline those differences here wherever relevant. The main differences are caused by most work on mpoxsonar happening in an environment where administrator access is not available (a shared Linux HPC), and also because we want our package to be installable via [conda](https://docs.conda.io/en/latest/index.html) or [mamba](https://github.com/mamba-org/mamba), from the [bioconda](https://bioconda.github.io/) channel in particular.

### Setting up your development tools

Some tooling needs to be set up before you can work on covsonar. To install this we use mamba, a faster replacement for the conda package manager, and place them in their own environment:
Some tooling needs to be set up before you can work on mpoxsonar. To install this we use mamba, a faster replacement for the conda package manager, and place them in their own environment:

```sh
mamba create -n covsonar-dev python=3 poetry fortran-compiler nox pre-commit
mamba create -n mpoxsonar-dev python=3 poetry fortran-compiler nox pre-commit
```

Then when you want to work on the project, or at the very least if you want to use poetry commands or run tests, you need to switch to this environment:

```sh
mamba activate covsonar-dev
mamba activate mpoxsonar-dev
```

The rest of this document assumes that you have the covsonar-dev environment active.
The rest of this document assumes that you have the mpoxsonar-dev environment active.

Once you have that environment installed and activated, you can run covsonar:
Once you have that environment installed and activated, you can run mpoxsonar:

```sh
poetry run covsonar --help
poetry run sonar --help
```

### Installing the package

As you're developing, you can install what you have developed using poetry install into your covsonar-dev conda environment:
As you're developing, you can install what you have developed using poetry install into your mpoxsonar-dev conda environment:

```sh
poetry install
covsonar --version
sonar --version
```

### Testing
Expand Down Expand Up @@ -154,13 +154,13 @@ $ poetry env info
Virtualenv
Python: 3.10.4
Implementation: CPython
Path: /home/<redacted>/.conda/envs/covsonar-dev
Path: /home/<redacted>/.conda/envs/mpoxsonar-dev
Valid: True
System
Platform: linux
OS: posix
Python: /home/<redacted>/.conda/envs/covsonar-dev
Python: /home/<redacted>/.conda/envs/mpoxsonar-dev
```

If you decide to rename your conda development environment or have multiple projects and decide to use multiple conda environments, then you might have to switch the environment that poetry is using. This can be done by running the commands:
Expand Down
110 changes: 85 additions & 25 deletions NCBI.downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,27 +12,64 @@
import datetime
import logging
import os
import random
import sys
import time
import traceback
from urllib.error import HTTPError
from urllib.parse import urlparse

from Bio import Entrez
from Bio import SeqIO
import dateparser
from dotenv import load_dotenv
import mariadb

load_dotenv()
LOG_LEVEL = os.getenv("LOG_LEVEL", "DEBUG")
REF_LIST = [
"NC_063383.1",
"ON563414.3",
"MT903344.1",
IGNORE_LIST = [
"NC_063383.1", # REF
"ON563414.3", # REF
"MT903344.1", # REF
"KJ136820.1",
"FV537351.1",
"FV537352.1",
"OX044338.1",
"OX009124.1",
"NC_003310.1", # from 1996
"8HG1_T",
"8HG1_P",
]

Entrez.api_key = os.getenv("NCBI_API_KEY", "")
Entrez.tool = os.getenv("NCBI_TOOL", "")
Entrez.email = os.getenv("NCBI_EMAIL", "") # Always tell NCBI who you are
URI = urlparse(os.getenv("DB_URL", ""))
# connection parameters


def get_existing_sample_list():

database = URI.path.replace("/", "")
conn_params = {
"user": URI.username,
"password": URI.password,
"host": URI.hostname,
"port": URI.port,
"database": database,
}
# Establish a connection
connection = mariadb.connect(**conn_params)
cursor = connection.cursor()
# retrieve data
cursor.execute("SELECT name FROM sample;")
# print content
db_sample_list = [item[0] for item in cursor.fetchall()]

# free resources
cursor.close()
connection.close()
return db_sample_list


def download(save_path): # noqa: C901
Expand All @@ -56,7 +93,7 @@ def download(save_path): # noqa: C901

record = Entrez.read(handle)
total_count = record["Count"]
logging.info("Total sample to download: %s " % (total_count))
logging.info("All samples are found: %s " % (total_count))

handle = Entrez.esearch(
db=DB,
Expand All @@ -67,16 +104,23 @@ def download(save_path): # noqa: C901
)
record = Entrez.read(handle)
# setup cache
time.sleep(1)
time.sleep(random.randint(3, 6))
# print(record)
id_list = record["IdList"]
db_sample_list = get_existing_sample_list()
id_list = list(set(id_list) - set(db_sample_list))
total_count = len(id_list)

logging.info("Remaining samples after check: %s " % (total_count))
search_results = Entrez.read(Entrez.epost(DB, id=",".join(id_list)))
webenv = search_results["WebEnv"]
query_key = search_results["QueryKey"]
time.sleep(1)

success = True
except Exception as e:
logging.error("Error at %s", "getting ID", exc_info=e)
logging.info("Reties to reconnect...")
time.sleep(random.randint(10, 20))
handle.close()
if attempt == 3 and not success:
return False
Expand All @@ -85,6 +129,7 @@ def download(save_path): # noqa: C901
file_log_handler = open(os.path.join(save_path, ".download.log"), "w+")
else:
file_log_handler = open(os.path.join(save_path, ".download.log"), "r+")

try:
_start = int(file_log_handler.read())
logging.info(f"Resume previous download: start at {_start}")
Expand Down Expand Up @@ -120,15 +165,16 @@ def download(save_path): # noqa: C901
if 500 <= err.code <= 599:
logging.warning(f"Received error from server {err}")
logging.warning("Attempt {attempt} of 3")
time.sleep(5)
time.sleep(random.randint(30, 60))
if 400 == err.code:
logging.warning(f"Received error from server {err}")
logging.warning("Attempt {attempt} of 3")
time.sleep(5)
time.sleep(random.randint(30, 60))
else:
raise
except Exception as e:
logging.error("Error at %s", "download sample", exc_info=e)
time.sleep(random.randint(3, 6))

if attempt == 3 and not success:
fetch_handle.close()
Expand All @@ -145,7 +191,7 @@ def download(save_path): # noqa: C901
file_log_handler.seek(0)
file_log_handler.write(str(end))
file_log_handler.truncate()
time.sleep(2)
time.sleep(random.randint(3, 6))

with open(os.path.join(save_path, ".download.success"), "w") as f:
f.writelines("done")
Expand All @@ -161,7 +207,6 @@ def generate_outputfiles(save_download_path, save_final_path): # noqa: C901
if x.endswith(".GB"):
# Prints only text file present in My Folder
list_of_GB.append(os.path.join(save_download_path, x))
# TODO: remove reference genome from the list.

# fasta & meta
fasta_out_handler = open(os.path.join(save_final_path, "seq.fasta"), "w")
Expand All @@ -175,14 +220,17 @@ def generate_outputfiles(save_download_path, save_final_path): # noqa: C901
"RELEASE_DATE",
"COLLECTION_DATE",
"SEQ_TECH",
"HOST",
"GENOME_COMPLETENESS",
]
meta_out_handler.write("\t".join(header) + "\n") # Write the header line
try:
for _file in list_of_GB:
logging.info("Load:" + _file)
seq_GBrecords = list(SeqIO.parse(_file, "genbank"))
for seq_record in seq_GBrecords:
if seq_record.id in REF_LIST:
# remove reference genome from the list.
if seq_record.id in IGNORE_LIST:
continue

_isolate = ""
Expand All @@ -191,6 +239,8 @@ def generate_outputfiles(save_download_path, save_final_path): # noqa: C901
_NCBI_release_date = ""
_collection_date = ""
_seq_tech = ""
_nuc_completeness = ""
_host = ""
# print("Dealing with GenBank record %s" % seq_record.id)

fasta_out_handler.write(
Expand All @@ -199,6 +249,12 @@ def generate_outputfiles(save_download_path, save_final_path): # noqa: C901
)

# assume all keys are exit.
if "partial" in seq_record.description:
_nuc_completeness = "partial"
elif "complete" in seq_record.description:
_nuc_completeness = "complete"
if "host" in seq_record.features[0].qualifiers:
_host = seq_record.features[0].qualifiers["host"][0]
if "isolate" in seq_record.features[0].qualifiers:
_isolate = seq_record.features[0].qualifiers["isolate"][0]
if "country" in seq_record.features[0].qualifiers:
Expand All @@ -210,8 +266,10 @@ def generate_outputfiles(save_download_path, save_final_path): # noqa: C901
_collection_date = seq_record.features[0].qualifiers[
"collection_date"
][0]
# 1.) need to fix date Nov-2017 -> 2017-11-01, 09-Nov-2017 -> 2017-11-09
# 1995 -> 1995-01-01 set default value with first day of
# Step
# 1.) Fix date;
# * Nov-2017 -> 2017-11-01, 09-Nov-2017 -> 2017-11-09
# * 1995 -> 1995-01-01 set default value with first day of
# the month and first month of the year
# 2.) Year needs to be present in the format.

Expand All @@ -238,15 +296,15 @@ def generate_outputfiles(save_download_path, save_final_path): # noqa: C901

if "date" in seq_record.annotations:
_NCBI_release_date = seq_record.annotations["date"]
# need to fix date 18-NOV-2022 -> 2022-11-18
# Fix date; 18-NOV-2022 -> 2022-11-18
d = dateparser.parse(
_NCBI_release_date,
settings={"PREFER_DAY_OF_MONTH": "first", "DATE_ORDER": "YMD"},
)
_NCBI_release_date = d.strftime("%Y-%m-%d")

meta_out_handler.write(
"%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n"
"%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n"
% (
seq_record.id,
_isolate,
Expand All @@ -256,6 +314,8 @@ def generate_outputfiles(save_download_path, save_final_path): # noqa: C901
_NCBI_release_date,
_collection_date,
_seq_tech,
_host,
_nuc_completeness,
)
)
except Exception:
Expand Down Expand Up @@ -295,7 +355,7 @@ def run(args):
logging.StreamHandler(),
],
)
logging.info("Script version: 1")
logging.info("Script version: 1.1")
logging.info("Save output to:" + SAVE_PATH)

save_download_path = os.path.join(SAVE_PATH, "GB")
Expand All @@ -318,16 +378,16 @@ def run(args):

# 3
logging.info("--- Convert GeneBank to fasta and meta file ---")
if not os.path.exists(os.path.join(SAVE_PATH, ".success")):
if generate_outputfiles(save_download_path, save_final_path):
# if not os.path.exists(os.path.join(SAVE_PATH, ".success")):
if generate_outputfiles(save_download_path, save_final_path):

logging.info("Processing completed")
else:
logging.error("Process stop before it is finished")
sys.exit("Please rerun it again later.")
logging.info("Processing completed")
else:
logging.error("Process stop before it is finished")
sys.exit("Please rerun it again later.")

with open(os.path.join(SAVE_PATH, ".success"), "w+") as f:
f.write("done")
with open(os.path.join(SAVE_PATH, ".success"), "w+") as f:
f.write("done")
logging.info("--- Done ---")


Expand Down
Loading

0 comments on commit 89dc3b3

Please sign in to comment.