Skip to content

Commit

Permalink
Use HTTPS to download protein data bank files
Browse files Browse the repository at this point in the history
  • Loading branch information
LaurentRDC committed Jun 19, 2022
1 parent 2d3a570 commit 4d8d01c
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 21 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ Release 1.5.0

* Added some typing information.
* Added the :attr:`Supercell.scaled_lattice_vectors` property and associated documentation (#11).
* Protein Data Bank downloads are now done through HTTPS rather than FTP, which is recommended by the RCSB data bank.
* Fixed some documentation formatting.

Release 1.4.1
Expand Down
29 changes: 8 additions & 21 deletions crystals/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,14 @@
"""
Atomic structure parsers.
"""
import gzip
import re
from typing import Any, Iterable, Optional, Tuple, Union
import warnings
from abc import abstractmethod
from contextlib import AbstractContextManager, suppress
from functools import lru_cache
from itertools import repeat
from os import PathLike, environ, remove
from os import PathLike, environ
from pathlib import Path
from platform import system
from string import digits, punctuation
Expand Down Expand Up @@ -147,7 +146,7 @@ def __exit__(self, *args, **kwargs):
def download_pdb_file(
pdb_code: str,
download_dir: Optional[PathLike] = None,
server: str = "ftp://ftp.wwpdb.org",
server: str = "https://files.rcsb.org",
overwrite: bool = False,
) -> Path:
"""
Expand All @@ -161,7 +160,7 @@ def download_pdb_file(
download_dir : path-like object
Directory where to save the PDB file. Default is a local folder in the current directory
server : str, optional
Address of the FTP server from which to download the PDB file. Default is the main server.
Root address of the server from which to download the PDB file. Default is the main server.
overwrite : bool, optional
If True, existing PDB file with the same structure will be overwritten. Default is False.
Expand All @@ -170,36 +169,24 @@ def download_pdb_file(
file : pathlib.Path
Pointer to the downloaded file
"""
# Get the compressed PDB structure
code = pdb_code.lower()
archive_fn = Path(f"pdb{code}.ent.gz")
pdb_dir = "divided"
url = (
server + f"/pub/pdb/data/structures/{pdb_dir}/pdb/{code[1:3]}/{archive_fn}"
)
# Where does the final PDB file get saved?
if download_dir is None:
path = STRUCTURE_CACHE
else:
path = Path(download_dir)

path.mkdir(exist_ok=True)

filename = path / archive_fn
final_file = path / f"pdb{code}.ent" # (decompressed)
final_file = path / f"pdb{pdb_code.lower()}.ent" # (decompressed)

# Skip download if the file already exists
if (not overwrite) and (final_file.exists()):
return final_file

urlretrieve(url, filename)
resp = requests.get(server + f"/download/{pdb_code.upper()}.pdb")
resp.raise_for_status()

# Uncompress the archive, delete when done
# Can't use context manager with gzip.open until Python 2.7
with gzip.open(filename, "rb") as gz:
with open(final_file, "wb") as out:
out.writelines(gz)
remove(filename)
with open(final_file, "wb") as out:
out.write(resp.content)

return Path(final_file)

Expand Down

0 comments on commit 4d8d01c

Please sign in to comment.