Skip to content

Commit

Permalink
Merge pull request #317 from debbiemarkslab/pdb_and_setup_fixes
Browse files Browse the repository at this point in the history
Pdb and setup fixes
  • Loading branch information
thomashopf authored Nov 5, 2024
2 parents 7568749 + f272939 commit fd0572e
Show file tree
Hide file tree
Showing 8 changed files with 103 additions and 153 deletions.
10 changes: 5 additions & 5 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,12 @@ jobs:
conda info -a
conda create -q -n test-environment python=${{ matrix.python-version }} numpy scipy numba pandas matplotlib
source activate test-environment
- name: Run setup.py
- name: Install Hatch
uses: pypa/hatch@install
- name: Build and install package
run: |
pip install build
python setup.py sdist --formats=zip -k
python -m build
find ./dist -iname "*.zip" -print0 | xargs -0 pip install
hatch build
find ./dist -iname "*.tar.gz" -print0 | xargs -0 pip install
pip install codecov
- name: Download test files
run: |
Expand Down
18 changes: 7 additions & 11 deletions .github/workflows/build_test_and_push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,12 @@ jobs:
conda info -a
conda create -q -n test-environment python=${{ matrix.python-version }} numpy scipy numba pandas matplotlib
source activate test-environment
- name: Run setup.py
- name: Install Hatch
uses: pypa/hatch@install
- name: Build and install package
run: |
python setup.py sdist --formats=zip -k
find ./dist -iname "*.zip" -print0 | xargs -0 pip install
hatch build
find ./dist -iname "*.tar.gz" -print0 | xargs -0 pip install
pip install codecov
- name: Download test files
run: |
Expand All @@ -42,14 +44,8 @@ jobs:
with:
run: coverage run -m unittest discover -s test -p "Test*.py"
working-directory: ./ #optional
- name: Publish evcouplings to test PyPI
if: startsWith(github.ref, 'refs/tags')
uses: pypa/gh-action-pypi-publish@master
with:
password: ${{ secrets.PYPI_ACCESS_TOKEN_TEST }}
repository_url: https://test.pypi.org/legacy/
- name: Publish evcouplings to PyPI
if: startsWith(github.ref, 'refs/tags')
uses: pypa/gh-action-pypi-publish@master
uses: pypa/gh-action-pypi-publish@v1.9.0
with:
user: __token__
password: ${{ secrets.PYPI_ACCESS_TOKEN }}
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ __pycache__
*.ipynb_checkpoints*
notebooks_dev/*
evcouplings.egg-info/*
/dist/
3 changes: 0 additions & 3 deletions MANIFEST.in

This file was deleted.

38 changes: 26 additions & 12 deletions evcouplings/compare/pdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -470,7 +470,9 @@ def __init__(self, filehandle, keep_full_data=False):
"_atom_site.pdbx_formal_charge": "charge",
}

HELIX_TARGET_COLS = {
# full list of conf types: https://mmcif.wwpdb.org/dictionaries/mmcif_ma.dic/Items/_struct_conf_type.id.html;
# mapping between file types: https://manpages.debian.org/unstable/dssp/mkdssp.1.en.html
CONF_TARGET_COLS = {
"_struct_conf.conf_type_id": "conformation_type",
"_struct_conf.id": "id",
# label_asym_id and label_seq_id are sufficient for merging to atom table;
Expand Down Expand Up @@ -508,11 +510,15 @@ def __init__(self, filehandle, keep_full_data=False):
# decode information into dataframe with BioPython helper method; note this section may not be
# present if no helices exist in the structure
try:
self.helix_table = pd.DataFrame({
name: _decode(data[source_column]) for source_column, name in HELIX_TARGET_COLS.items()
})
self.conf_table = pd.DataFrame({
name: _decode(data[source_column]) for source_column, name in CONF_TARGET_COLS.items()
}).query(
# there are a handful of PDB entries that have (probably wrong) secondary structure assignments
# extending over more than one segment (e.g. 2bp7, 2wjv), drop these rather than raising an error
"beg_label_asym_id == end_label_asym_id"
)
except KeyError:
self.helix_table = None
self.conf_table = None

# decode information into dataframe with BioPython helper method; note this section may not be
# present if no sheets exist in the structure
Expand All @@ -526,16 +532,23 @@ def __init__(self, filehandle, keep_full_data=False):
# create secondary structure table for merging to chain tables
# (will only contain helix/H and strand/E, coil/C will need to be filled in)
sse_raw = []
for sse_type, sse_table in [
("H", self.helix_table),
("E", self.sheet_table)
for sse_type, sse_table, sse_filter in [
("H", self.conf_table, "HELX"),
("E", self.sheet_table, None),
# also retrieve beta strands/bridges from conf_table if available
("E", self.conf_table, "STRN"),
]:
# skip if secondary structure element not present in PDB file at all
if sse_table is None:
continue

# filter table down to relevant entries for current secondary structure type
if sse_filter is not None:
sse_table = sse_table.query(
f"conformation_type.str.startswith('{sse_filter}')"
)

for _, row in sse_table.iterrows():
assert row.beg_label_asym_id == row.end_label_asym_id
for seq_id in range(row.beg_label_seq_id, row.end_label_seq_id + 1):
sse_raw.append({
"label_asym_id": row.beg_label_asym_id,
Expand Down Expand Up @@ -694,7 +707,7 @@ def get_chain(self, chain, model=0, is_author_id=True):
# create coordinate ID from author residue ID + insertion code
# (this should be unique and circumvents issues from 0 seqres values if selecting based on author chain ID)
coord_id=lambda df: df.auth_seq_id.astype(str) + df.insertion_code,
seqres_id=lambda df: df.label_seq_id.astype(str).replace("0", np.nan),
seqres_id=lambda df: df.label_seq_id.astype(str).replace("0", pd.NA).replace("", pd.NA),
one_letter_code=lambda df: df.label_comp_id.map(AA3_to_AA1, na_action="ignore"),
# note that MSE will now be labeled as HETATM, which was not the case with MMTF
hetatm=lambda df: df.record_type == "HETATM",
Expand All @@ -720,12 +733,13 @@ def get_chain(self, chain, model=0, is_author_id=True):
how="left"
)
else:
# initialize to pd.NA instead of np.nan or warning about assigning str to float64 column appears
res_sse = res.assign(
sec_struct_3state=np.nan
sec_struct_3state=pd.NA
)

res_sse.loc[
res_sse.sec_struct_3state.isnull() & (res_sse.label_seq_id > 0),
res_sse.sec_struct_3state.isnull() & res_sse.seqres_id.notnull(),
"sec_struct_3state"
] = "C"

Expand Down
64 changes: 64 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

[project]
name = "evcouplings"
version = "0.2.1"
description = "A Framework for evolutionary couplings analysis"
readme = "README.md"
license = "MIT"
authors = [
{ name = "Thomas Hopf", email = "thomas.hopf@gmail.com" },
]
keywords = [
"analysis",
"couplings",
"evolutionary",
]
classifiers = [
"Development Status :: 4 - Beta",
"Intended Audience :: Developers",
"License :: OSI Approved :: MIT License",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Topic :: Scientific/Engineering :: Bio-Informatics",
]
dependencies = [
"billiard",
"biopython>=1.84",
"bokeh",
"click",
"filelock",
"jinja2",
"matplotlib",
"msgpack",
"numba",
"numpy",
"pandas",
"psutil",
"requests",
"ruamel.yaml<0.18",
"scikit-learn",
"scipy",
"seaborn",
"setuptools>=18.2",
]

[project.scripts]
evcouplings = "evcouplings.utils.app:app"
evcouplings_dbupdate = "evcouplings.utils.update_database:app"
evcouplings_runcfg = "evcouplings.utils.pipeline:app"
evcouplings_summarize = "evcouplings.utils.summarize:app"

[project.urls]
Homepage = "https://github.com/debbiemarkslab/EVcouplings"

[tool.hatch.version]
path = "evcouplings/__init__.py"

[tool.hatch.build.targets.sdist]
include = [
"/evcouplings",
]
17 changes: 0 additions & 17 deletions requirements.txt

This file was deleted.

105 changes: 0 additions & 105 deletions setup.py

This file was deleted.

0 comments on commit fd0572e

Please sign in to comment.