Merge pull request #8 from DeepRank/update_read_write

Update read and write
DeepRank · Oct 23, 2019 · 06a67a3 · 06a67a3
2 parents 1688039 + 87ba2b1
commit 06a67a3
Show file tree

Hide file tree

Showing 13 changed files with 747 additions and 306 deletions.
diff --git a/.gitignore b/.gitignore
@@ -5,3 +5,7 @@ Byte-compiled / optimized
 *.izone
 *.lzone
 *.ref_pairs
+.vscode/settings.json
+.vscode/launch.json
+.vscode/.ropeproject/config.py
+.vscode/.ropeproject/objectdb
diff --git a/pdb2sql/StructureSimilarity.py b/pdb2sql/StructureSimilarity.py
diff --git a/pdb2sql/__init__.py b/pdb2sql/__init__.py
@@ -1,3 +1,4 @@
-#from pdb2sql import pdb2sql
-#from .interface import *
-#import transform
+from .pdb2sqlcore import pdb2sql
+from .interface import interface
+from .StructureSimilarity import StructureSimilarity
+from . import transform
diff --git a/pdb2sql/interface.py b/pdb2sql/interface.py
@@ -11,8 +11,7 @@ class interface(pdb2sql):
     def __init__(self, pdb):
         '''Identify interface between protein chains.'''
 
-        pdb2sql.__init__(self, pdb, no_extra=True)
-        self._create_sql()
+        pdb2sql.__init__(self, pdb)
         self.backbone_type = ['CA', 'C', 'N', 'O']
 
     ##########################################################################

diff --git a/pdb2sql/pdb2sqlAlchemy.py b/pdb2sql/pdb2sqlAlchemy.py
@@ -18,17 +18,18 @@ class ATOM(Base):
     __tablename__ = 'ATOM'
     rowID = Column(Integer, primary_key=True)
     serial = Column(Integer, nullable=False)
-    name = Column(String(5), nullable=False)
-    altLoc = Column(String(5), nullable=False)
-    resName = Column(String(5), nullable=False)
-    chainID = Column(String(5), nullable=False)
+    name = Column(String(6), nullable=False)
+    altLoc = Column(String(1), nullable=False)
+    resName = Column(String(3), nullable=False)
+    chainID = Column(String(1), nullable=False)
     resSeq = Column(Integer, nullable=False)
-    iCode = Column(String(5), nullable=False)
+    iCode = Column(String(1), nullable=False)
     x = Column(Float, nullable=False)
     y = Column(Float, nullable=False)
     z = Column(Float, nullable=False)
     occ = Column(Float, nullable=False)
     temp = Column(Float, nullable=False)
+    element = Column(String(2), nullable=False)
     model = Column(Integer, nullable=False)
 
 
@@ -39,10 +40,9 @@ def __init__(
             pdbfile,
             sqlfile=None,
             fix_chainID=False,
-            verbose=False,
-            no_extra=True):
+            verbose=False):
         '''Use sqlAlchemy to load the database.'''
-        super().__init__(pdbfile, sqlfile, fix_chainID, verbose, no_extra)
+        super().__init__(pdbfile, sqlfile, fix_chainID, verbose)
         self._create_sql()
 
     def _create_sql(self):
@@ -92,14 +92,15 @@ def _create_sql(self):
                 if colname in del_copy.keys():
                     data = line[del_copy[colname][0]:del_copy[colname][1]].strip()
 
-                # convert it if necessary
-                if coltype == 'INT':
-                    data = int(data)
-                elif coltype == 'REAL':
-                    data = float(data)
+                    # convert it if necessary
+                    if coltype == 'INT':
+                        data = int(data)
+                    elif coltype == 'REAL':
+                        data = float(data)
 
-                # append to dict
-                at[colname] = data
+
+                    # append to dict
+                    at[colname] = data
 
             # create a new ATOM
             newat = ATOM(
@@ -115,6 +116,7 @@ def _create_sql(self):
                 z=at['z'],
                 occ=at['occ'],
                 temp=at['temp'],
+                element=at['element'],
                 model=self.nModel)
 
             # add the atom to the data base
@@ -264,7 +266,7 @@ def update(self, attribute, values, **kwargs):
                     'Wrong number of values for the ATOM selection')
 
             # goes through all the ros
-            for irow in range(nvalues):
+            for irow in range(nrow):
 
                 # create  a dict of values
                 dict_values = {}

diff --git a/pdb2sql/pdb2sql_base.py b/pdb2sql/pdb2sql_base.py
@@ -1,6 +1,7 @@
 import sqlite3
 import subprocess as sp
 import os
+import warnings
 import numpy as np
 from time import time
 
@@ -12,23 +13,23 @@ def __init__(
             pdbfile,
             sqlfile=None,
             fix_chainID=False,
-            verbose=False,
-            no_extra=True):
+            verbose=False):
         '''Base class for the definition of sql database.
 
         Args:
-            pdbbfile : name of the pdbfile
-            sqlfile : name of the sql file (if None the db is stored in memeory)
-            fix_chainID : bool to rename chain ID from A, B, C, ....
-            verbose : bool verbose
-            no_extra : bool don't consider the 'temp' and 'model' column
+            pdbfile (str, list(str/bytes), ndarray) : name of pdbfile or
+                list or ndarray containing the pdb data
+            sqlfile (str, optional): name of the sqlfile.
+                By default it is created in memory only.
+            fix_chainID (bool, optinal): check if the name of the chains
+                are A,B,C, .... and fix it if not.
+            verbose (bool): probably print stuff
         '''
 
         self.pdbfile = pdbfile
         self.sqlfile = sqlfile
         self.is_valid = True
         self.verbose = verbose
-        self.no_extra = no_extra
 
         # column names and types
         self.col = {'serial': 'INT',
@@ -43,6 +44,7 @@ def __init__(
                     'z': 'REAL',
                     'occ': 'REAL',
                     'temp': 'REAL',
+                    'element': 'TEXT',
                     'model': 'INT'}
 
         # delimtier of the column format
@@ -55,12 +57,13 @@ def __init__(
             'resName': [17, 20],
             'chainID': [21, 22],
             'resSeq': [22, 26],
-            'iCode': [26, 26],
+            'iCode': [26, 27],
             'x': [30, 38],
             'y': [38, 46],
             'z': [46, 54],
             'occ': [54, 60],
-            'temp': [60, 66]}
+            'temp': [60, 66],
+            'element': [76,78]}
 
     ##########################################################################
     #
@@ -112,41 +115,109 @@ def add_column(self, colname, coltype='FLOAT', default=0):
     def exportpdb(self, fname, append=False, periodic=False, **kwargs):
         '''Export a PDB file with kwargs selection.'''
 
-        # get the data
-        data = self.get('*', **kwargs)
-
-        # write each line
-        # the PDB format is pretty strict
-        # http://www.wwpdb.org/documentation/file-format-content/format33/sect9.html#ATOM
         if append:
             f = open(fname, 'a')
         else:
             f = open(fname, 'w')
 
+        lines = self.sql2pdb(**kwargs)
+        for i in lines:
+            f.write(i + '\n')
+
+        f.close()
+
+    def sql2pdb(self, **kwargs):
+        """Convert sql pdb data to PDB formatted lines
+
+        Returns:
+            list: pdb-format lines
+        """
+        data = self.get('*', **kwargs)
+        pdb = []
+        # the PDB format is pretty strict
+        # http://www.wwpdb.org/documentation/file-format-content/format33/sect9.html#ATOM
         for d in data:
             line = 'ATOM  '
             line += '{:>5}'.format(d[0])    # serial
             line += ' '
-            line += '{:^4}'.format(d[1])    # name
+            line += self._format_atomname(d) # name
             line += '{:>1}'.format(d[2])    # altLoc
-            line += '{:>3}'.format(d[3])  # resname
+            line += '{:>3}'.format(d[3])    # resname
             line += ' '
             line += '{:>1}'.format(d[4])    # chainID
             line += '{:>4}'.format(d[5])    # resSeq
             line += '{:>1}'.format(d[6])    # iCODE
             line += '   '
-            line += '{: 8.3f}'.format(d[7])  # x
-            line += '{: 8.3f}'.format(d[8])  # y
-            line += '{: 8.3f}'.format(d[9])  # z
-            if not self.no_extra:
-                line += '{: 6.2f}'.format(d[10])    # occ
-                line += '{: 6.2f}'.format(d[11])    # temp
-            line += '\n'
+            line += pdb2sql_base._format_xyz(d[7]) # x
+            line += pdb2sql_base._format_xyz(d[8]) # y
+            line += pdb2sql_base._format_xyz(d[9])  # z
+            line += '{:>6.2f}'.format(d[10])    # occ
+            line += '{:>6.2f}'.format(d[11])    # temp
+            line += ' ' * 10
+            line += '{:>2}'.format(d[12])       # element
+            # line += '\n'
+            pdb.append(line)
+
+        return pdb
+
+    def _format_atomname(self, data):
+        """Format atom name to align with PDB reqireuments:
+             - alignment of one-letter atom name starts at column 14,
+             - while two-letter atom name such as FE starts at column 13.
 
-            f.write(line)
+        Args:
+            data(list): sql output for one pdb line
+
+        Returns:
+            str: formatted atom name
+        """
+        name = data[1]
+        lname = len(name)
+        if lname in (1, 4):
+            name = '{:^4}'.format(name)
+        elif lname == 2:
+            if name == data[12]:  # name == element
+                name = '{:<4}'.format(name)
+            else:
+                name = '{:^4}'.format(name)
+        else:
+            if name[0] in '0123456789':
+                name = '{:<4}'.format(name)
+            else:
+                name = '{:>4}'.format(name)
+        return name
 
-        # close
-        f.close()
+    @staticmethod
+    def _format_xyz(i):
+        """Format PDB coordinations x,y or z value.
+
+        Note: PDB has a fixed 8-column space for x,y or z value.
+            Thus the value should be in the range of (-1e7, 1e8).
+
+        Args:
+            i(float): PDB coordinations x, y or z.
+
+        Raises:
+            ValueError: Exceed the range of (-1e7, 1e8)
+
+        Returns:
+            str: formated x, y or z value.
+        """
+
+        if i >= 1e8 - 0.5 or i <= -1e7 + 0.5:
+            raise ValueError(
+                f'PDB coordination {i} exceeds the range of (-1e7, 1e8) '
+                f'after rounding.')
+        elif i >= 1e6 - 0.5 or i <= -1e5 + 0.5:
+            i = '{:>8.0f}'.format(i)
+        elif i >= 1e5 - 0.5 or i <= -1e4 + 0.5:
+            i = '{:>8.1f}'.format(i)
+        elif i >= 1e4 - 0.5 or i <= -1e3 + 0.5:
+            i = '{:>8.2f}'.format(i)
+        else:
+            i = '{:>8.3f}'.format(i)
+
+        return i
 
     def close(self, rmdb=True):