Merge pull request #57 from N3PDF/drop_lhapdf_dependency

Remove LHAPDF non-python dependencies
N3PDF · Sep 10, 2021 · d8499f9 · d8499f9
2 parents 415a8be + 269189c
commit d8499f9
Show file tree

Hide file tree

Showing 7 changed files with 82 additions and 116 deletions.
diff --git a/README.md b/README.md
@@ -38,6 +38,16 @@ TensorFlow is updated frequently and a later version of TensorFlow will often
 offer better performance in both GPUs and CPUs.
 Although it can be made to work with earlier versions, `PDFFlow` is only supported for TensorFlow>2.1.
 
+## PDF set management
+
+PDFFlow does not do management of PDF sets, which is left to LHAPDF and so a lhapdf installation is needed.
+A full lhapdf installation can be obtained by utilizing the `lhapdf_management` library.
+
+```bash
+  python3 -m pip install lhapdf_management
+  lhapdf_management install NNPDF31_nnlo_as_0118
+```
+
 ## Minimal Working Example
 
 Below a minimalistic example where `PDFFlow` is used to generate a 10 values of the PDF

diff --git a/setup.py b/setup.py
@@ -5,7 +5,7 @@
 import re
 
 
-requirements = ['numpy', 'pyyaml']
+requirements = ['numpy', 'pyyaml', 'lhapdf_management']
 if version_info.major >=3 and version_info.minor >= 9:
     # For python above 3.9 the only existing TF is 2.5 which works well (even pre releases)
     tf_pack = "tensorflow"

diff --git a/src/pdfflow/configflow.py b/src/pdfflow/configflow.py
@@ -7,14 +7,15 @@
 import logging
 import subprocess as sp
 import numpy as np
+from lhapdf_management import pdf_install
+from lhapdf_management.configuration import environment as lhapdf_environment
 
 # Log levels
 LOG_DICT = {"0": logging.ERROR, "1": logging.WARNING, "2": logging.INFO, "3": logging.DEBUG}
 
 # Read the PDFFLOW environment variables
 _log_level_idx = os.environ.get("PDFFLOW_LOG_LEVEL")
 _data_path = os.environ.get("PDFFLOW_DATA_PATH")
-_lhapdf_data_path = os.environ.get("LHAPDF_DATA_PATH")
 _float_env = os.environ.get("PDFFLOW_FLOAT", "64")
 _int_env = os.environ.get("PDFFLOW_INT", "32")
 
@@ -105,39 +106,26 @@ def find_pdf_path(pdfname):
     all_paths = []
     if _data_path:
         all_paths.append(_data_path)
-    if _lhapdf_data_path:
-        all_paths.append(_lhapdf_data_path)
-    try:
-        import lhapdf
-
-        lhapdf_cmd = ["lhapdf-config", "--datadir"]
-        # Check the python version in order to use the right subprocess call
-        if sys.version_info.major == 3 and sys.version_info.minor < 7:
-            dirname_raw = sp.run(lhapdf_cmd, check=True, stdout=sp.PIPE)
-            dirname = dirname_raw.stdout.decode().strip()
-        else:
-            dirname_raw = sp.run(lhapdf_cmd, capture_output=True, text=True, check=True)
-            dirname = dirname_raw.stdout.strip()
-        all_paths.append(dirname)
-    except ModuleNotFoundError:
-        # If lhapdf is not installed, make a note and continue
-        lhapdf = None
+    all_paths.append(lhapdf_environment.datapath)
 
     # Return whatever path has the pdf inside
     for path in all_paths:
         if pathlib.Path(f"{path}/{pdfname}").exists():
             return path
 
-    # If none of them do, fail but inform the user
-    error_msg = f"The PDF set {pdfname} could not be found"
-    if lhapdf is not None:
-        error_msg += f"\nIt can be installed with ~$ lhapdf install {pdfname}"
-    elif _data_path is not None:
-        error_msg += f"\nPlease, download it and uncompress it in {_data_path}"
+    logger.warning("The PDF set %s could not be found in the system", pdfname)
+    yn = input("Do you want to try and install it automatically? [y/n]: ")
+    if yn.lower() in ("yes", "y"):
+        if not pdf_install(pdfname):
+            raise RuntimeError(f"Could not install {pdfname} in {lhapdf_environment.datapath}")
+
+    # If none of them do, ask for possible installation
+    if _data_path is not None:
+        error_msg = f"\nPlease, download the PDF and uncompress it in {_data_path}"
     elif _lhapdf_data_path is not None:
-        error_msg += f"\nPlease, download it and uncompress it in {_lhapdf_data_path}"
+        error_msg = f"\nPlease, download the PDf uncompress it in {_lhapdf_data_path}"
     else:
         error_msg += f"""
 Please, either download the set to an appropiate folder and make the environment variable
-PDFFLOW_DATA_PATH point to it or install the lhapdf python wrapper"""
-    raise ValueError(error_msg)
+PDFFLOW_DATA_PATH point to it or install with ``lhapdf_management install {pdfname}``"""
+    raise RuntimeError(error_msg)
diff --git a/src/pdfflow/pflow.py b/src/pdfflow/pflow.py
@@ -11,12 +11,15 @@
 import logging
 import collections
 import yaml
+from pathlib import Path
 
 import subprocess as sp
 import numpy as np
 
 import os, sys
 
+from lhapdf_management.pdfsets import PDF as LHA_PDF
+
 # import configflow before tf to set some tf options
 from pdfflow.configflow import DTYPE, DTYPEINT, int_me, izero, float_me, find_pdf_path
 import tensorflow as tf
@@ -34,44 +37,6 @@
 AlphaTuple = collections.namedtuple("Alpha", ["q2", "grid"])
 
 
-def _load_data(pdf_file):
-    """
-    Reads pdf from file and retrieves a list of grids
-    Each grid is a tuple containing numpy arrays (x,Q2, flavours, pdf)
-
-    Note:
-        the input q array in LHAPDF is just q, this functions
-        squares the result and q^2 is used everwhere in the code
-
-    Parameters
-    ----------
-        pdf_file: str
-            PDF .dat file
-
-    Returns
-    -------
-        grids: list(tuple(np.array))
-            list of tuples of arrays (x, Q2, flavours, pdf values)
-    """
-    with open(pdf_file, "r") as pfile:
-        n = []
-        count = 0
-        for line in pfile:
-            if "---" in line:
-                n += [count]
-            count += 1
-
-    grids = []
-    for i in range(len(n) - 1):
-        x = np.loadtxt(pdf_file, skiprows=(n[i] + 1), max_rows=1)
-        q2 = pow(np.loadtxt(pdf_file, skiprows=(n[i] + 2), max_rows=1), 2)
-        flav = np.loadtxt(pdf_file, skiprows=(n[i] + 3), max_rows=1)
-        grid = np.loadtxt(pdf_file, skiprows=(n[i] + 4), max_rows=(n[i + 1] - n[i] - 4))
-        grids += [GridTuple(x, q2, flav, grid)]
-
-    return grids
-
-
 def _load_alphas(info_file):
     """
     Reads metadata from info file and retrieves a list of alphas subgrids
@@ -186,11 +151,8 @@ def __init__(self, dirname, fname, members, compilable=True):
         self.dirname = dirname
         self.fname = fname
         self.grids = []
-        info_file = os.path.join(self.dirname, self.fname, f"{fname}.info")
-
-        # Load the info file
-        with open(info_file, "r") as ifile:
-            self.info = yaml.load(ifile, Loader=yaml.FullLoader)
+        lhapdf_pdf = LHA_PDF(Path(self.dirname) / fname)
+        self.info = lhapdf_pdf.info
 
         if members is None:
             total_members = self.info.get("NumMembers", 1)
@@ -202,12 +164,7 @@ def __init__(self, dirname, fname, members, compilable=True):
             logger.info("Loading %d members from %s", len(members), self.fname)
 
         for member_int in members:
-            member = str(member_int).zfill(4)
-            filename = os.path.join(self.dirname, fname, f"{fname}_{member}.dat")
-
-            logger.debug("Loading %s", filename)
-            grids = _load_data(filename)
-
+            grids = lhapdf_pdf.get_member_grids(member_int)
             subgrids = [Subgrid(grid, i, len(grids)) for i, grid in enumerate(grids)]
             self.grids.append(subgrids)
         self.members = members
@@ -240,24 +197,24 @@ def __init__(self, dirname, fname, members, compilable=True):
 
     @property
     def q2max(self):
-        """ Upper boundary in q2 of the first grid """
+        """Upper boundary in q2 of the first grid"""
         q2max = self.grids[0][-1].log_q2max
         return np.exp(q2max)
 
     @property
     def q2min(self):
-        """ Lower boundary in q2 of the first grid """
+        """Lower boundary in q2 of the first grid"""
         q2min = self.grids[0][0].log_q2min
         return np.exp(q2min)
 
     @property
     def nmembers(self):
-        """ Number of members for this PDF """
+        """Number of members for this PDF"""
         return len(self.members)
 
     @property
     def active_members(self):
-        """ List of all member files """
+        """List of all member files"""
         member_list = []
         for member_int in self.members:
             member = str(member_int).zfill(4)

diff --git a/src/pdfflow/tests/test_alphas.py b/src/pdfflow/tests/test_alphas.py
@@ -7,6 +7,8 @@
 import logging
 import subprocess as sp
 import numpy as np
+from lhapdf_management import pdf_install
+from lhapdf_management.configuration import environment
 import pdfflow.pflow as pdf
 from pdfflow.configflow import run_eager
 
@@ -26,15 +28,15 @@ def install_lhapdf(pdfset):
     try:
         lhapdf.mkPDF(pdfset)
     except RuntimeError:
-        sp.run(["lhapdf", "install", pdfset])
+        pdf_install(pdfset)
 
 
 SIZE = 200
 
 # Set up the PDF
-LIST_PDF = ["NNPDF31_nnlo_as_0118", "cteq6"]
+LIST_PDF = ["NNPDF31_nnlo_as_0118", "cteq61"]
 MEMBERS = 2
-DIRNAME = sp.run(["lhapdf-config", "--datadir"], stdout=sp.PIPE).stdout.strip().decode()
+DIRNAME = environment.datapath
 
 # Install the pdfs if they don't exist
 for pdfset in LIST_PDF:
@@ -45,12 +47,12 @@ def install_lhapdf(pdfset):
 
 # utilities
 def gen_q2(qmin, qmax):
-    """ generate an array of q2 between qmin and qmax """
+    """generate an array of q2 between qmin and qmax"""
     return np.random.rand(SIZE) * (qmax - qmin) + qmin
 
 
 def get_alphavals(q2arr, pdfset, sq2=False):
-    """ Generate an array of alphas(q) values from LHAPDF """
+    """Generate an array of alphas(q) values from LHAPDF"""
     lhapdf_pdf = lhapdf.mkPDF(pdfset)
     if sq2:
         return np.array([lhapdf_pdf.alphasQ2(iq) for iq in q2arr])
@@ -59,7 +61,7 @@ def get_alphavals(q2arr, pdfset, sq2=False):
 
 
 def test_accuracy_alphas(atol=1e-6):
-    """ Check the accuracy for all PDF sets for all members given
+    """Check the accuracy for all PDF sets for all members given
     when computing alpha_s given Q is compatible within atol
     between pdfflow and LHAPDF.
     This test run eagerly
@@ -80,8 +82,9 @@ def test_accuracy_alphas(atol=1e-6):
                 np.testing.assert_allclose(flow_values, lhapdf_values, atol=atol)
     run_eager(False)
 
+
 def test_alphas_q2(atol=1e-6):
-    """ Check the accuracy for all PDF sets for all members given
+    """Check the accuracy for all PDF sets for all members given
     when computing alpha_s given Q is compatible within atol
     between pdfflow and LHAPDF
     This test does not run eagerly
@@ -100,8 +103,9 @@ def test_alphas_q2(atol=1e-6):
                 lhapdf_values = get_alphavals(q2arr, pdfset, sq2=True)
                 np.testing.assert_allclose(flow_values, lhapdf_values, atol=atol)
 
+
 def test_alpha_trace():
-    """ Check that the alpha_s can be traced and then instantiated """
+    """Check that the alpha_s can be traced and then instantiated"""
     # Ensure the functions are not run eagerly
     run_eager(False)
     setname = LIST_PDF[0]
@@ -114,6 +118,5 @@ def test_alpha_trace():
     pex2.alphas_trace()
 
 
-
 if __name__ == "__main__":
     test_alpha_trace()