Skip to content

Commit

Permalink
Merge pull request #57 from N3PDF/drop_lhapdf_dependency
Browse files Browse the repository at this point in the history
Remove LHAPDF non-python dependencies
  • Loading branch information
scarlehoff authored Sep 10, 2021
2 parents 415a8be + 269189c commit d8499f9
Show file tree
Hide file tree
Showing 7 changed files with 82 additions and 116 deletions.
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,16 @@ TensorFlow is updated frequently and a later version of TensorFlow will often
offer better performance in both GPUs and CPUs.
Although it can be made to work with earlier versions, `PDFFlow` is only supported for TensorFlow>2.1.

## PDF set management

PDFFlow does not do management of PDF sets, which is left to LHAPDF and so a lhapdf installation is needed.
A full lhapdf installation can be obtained by utilizing the `lhapdf_management` library.

```bash
python3 -m pip install lhapdf_management
lhapdf_management install NNPDF31_nnlo_as_0118
```

## Minimal Working Example

Below a minimalistic example where `PDFFlow` is used to generate a 10 values of the PDF
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import re


requirements = ['numpy', 'pyyaml']
requirements = ['numpy', 'pyyaml', 'lhapdf_management']
if version_info.major >=3 and version_info.minor >= 9:
# For python above 3.9 the only existing TF is 2.5 which works well (even pre releases)
tf_pack = "tensorflow"
Expand Down
42 changes: 15 additions & 27 deletions src/pdfflow/configflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,15 @@
import logging
import subprocess as sp
import numpy as np
from lhapdf_management import pdf_install
from lhapdf_management.configuration import environment as lhapdf_environment

# Log levels
LOG_DICT = {"0": logging.ERROR, "1": logging.WARNING, "2": logging.INFO, "3": logging.DEBUG}

# Read the PDFFLOW environment variables
_log_level_idx = os.environ.get("PDFFLOW_LOG_LEVEL")
_data_path = os.environ.get("PDFFLOW_DATA_PATH")
_lhapdf_data_path = os.environ.get("LHAPDF_DATA_PATH")
_float_env = os.environ.get("PDFFLOW_FLOAT", "64")
_int_env = os.environ.get("PDFFLOW_INT", "32")

Expand Down Expand Up @@ -105,39 +106,26 @@ def find_pdf_path(pdfname):
all_paths = []
if _data_path:
all_paths.append(_data_path)
if _lhapdf_data_path:
all_paths.append(_lhapdf_data_path)
try:
import lhapdf

lhapdf_cmd = ["lhapdf-config", "--datadir"]
# Check the python version in order to use the right subprocess call
if sys.version_info.major == 3 and sys.version_info.minor < 7:
dirname_raw = sp.run(lhapdf_cmd, check=True, stdout=sp.PIPE)
dirname = dirname_raw.stdout.decode().strip()
else:
dirname_raw = sp.run(lhapdf_cmd, capture_output=True, text=True, check=True)
dirname = dirname_raw.stdout.strip()
all_paths.append(dirname)
except ModuleNotFoundError:
# If lhapdf is not installed, make a note and continue
lhapdf = None
all_paths.append(lhapdf_environment.datapath)

# Return whatever path has the pdf inside
for path in all_paths:
if pathlib.Path(f"{path}/{pdfname}").exists():
return path

# If none of them do, fail but inform the user
error_msg = f"The PDF set {pdfname} could not be found"
if lhapdf is not None:
error_msg += f"\nIt can be installed with ~$ lhapdf install {pdfname}"
elif _data_path is not None:
error_msg += f"\nPlease, download it and uncompress it in {_data_path}"
logger.warning("The PDF set %s could not be found in the system", pdfname)
yn = input("Do you want to try and install it automatically? [y/n]: ")
if yn.lower() in ("yes", "y"):
if not pdf_install(pdfname):
raise RuntimeError(f"Could not install {pdfname} in {lhapdf_environment.datapath}")

# If none of them do, ask for possible installation
if _data_path is not None:
error_msg = f"\nPlease, download the PDF and uncompress it in {_data_path}"
elif _lhapdf_data_path is not None:
error_msg += f"\nPlease, download it and uncompress it in {_lhapdf_data_path}"
error_msg = f"\nPlease, download the PDf uncompress it in {_lhapdf_data_path}"
else:
error_msg += f"""
Please, either download the set to an appropiate folder and make the environment variable
PDFFLOW_DATA_PATH point to it or install the lhapdf python wrapper"""
raise ValueError(error_msg)
PDFFLOW_DATA_PATH point to it or install with ``lhapdf_management install {pdfname}``"""
raise RuntimeError(error_msg)
63 changes: 10 additions & 53 deletions src/pdfflow/pflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,15 @@
import logging
import collections
import yaml
from pathlib import Path

import subprocess as sp
import numpy as np

import os, sys

from lhapdf_management.pdfsets import PDF as LHA_PDF

# import configflow before tf to set some tf options
from pdfflow.configflow import DTYPE, DTYPEINT, int_me, izero, float_me, find_pdf_path
import tensorflow as tf
Expand All @@ -34,44 +37,6 @@
AlphaTuple = collections.namedtuple("Alpha", ["q2", "grid"])


def _load_data(pdf_file):
"""
Reads pdf from file and retrieves a list of grids
Each grid is a tuple containing numpy arrays (x,Q2, flavours, pdf)
Note:
the input q array in LHAPDF is just q, this functions
squares the result and q^2 is used everwhere in the code
Parameters
----------
pdf_file: str
PDF .dat file
Returns
-------
grids: list(tuple(np.array))
list of tuples of arrays (x, Q2, flavours, pdf values)
"""
with open(pdf_file, "r") as pfile:
n = []
count = 0
for line in pfile:
if "---" in line:
n += [count]
count += 1

grids = []
for i in range(len(n) - 1):
x = np.loadtxt(pdf_file, skiprows=(n[i] + 1), max_rows=1)
q2 = pow(np.loadtxt(pdf_file, skiprows=(n[i] + 2), max_rows=1), 2)
flav = np.loadtxt(pdf_file, skiprows=(n[i] + 3), max_rows=1)
grid = np.loadtxt(pdf_file, skiprows=(n[i] + 4), max_rows=(n[i + 1] - n[i] - 4))
grids += [GridTuple(x, q2, flav, grid)]

return grids


def _load_alphas(info_file):
"""
Reads metadata from info file and retrieves a list of alphas subgrids
Expand Down Expand Up @@ -186,11 +151,8 @@ def __init__(self, dirname, fname, members, compilable=True):
self.dirname = dirname
self.fname = fname
self.grids = []
info_file = os.path.join(self.dirname, self.fname, f"{fname}.info")

# Load the info file
with open(info_file, "r") as ifile:
self.info = yaml.load(ifile, Loader=yaml.FullLoader)
lhapdf_pdf = LHA_PDF(Path(self.dirname) / fname)
self.info = lhapdf_pdf.info

if members is None:
total_members = self.info.get("NumMembers", 1)
Expand All @@ -202,12 +164,7 @@ def __init__(self, dirname, fname, members, compilable=True):
logger.info("Loading %d members from %s", len(members), self.fname)

for member_int in members:
member = str(member_int).zfill(4)
filename = os.path.join(self.dirname, fname, f"{fname}_{member}.dat")

logger.debug("Loading %s", filename)
grids = _load_data(filename)

grids = lhapdf_pdf.get_member_grids(member_int)
subgrids = [Subgrid(grid, i, len(grids)) for i, grid in enumerate(grids)]
self.grids.append(subgrids)
self.members = members
Expand Down Expand Up @@ -240,24 +197,24 @@ def __init__(self, dirname, fname, members, compilable=True):

@property
def q2max(self):
""" Upper boundary in q2 of the first grid """
"""Upper boundary in q2 of the first grid"""
q2max = self.grids[0][-1].log_q2max
return np.exp(q2max)

@property
def q2min(self):
""" Lower boundary in q2 of the first grid """
"""Lower boundary in q2 of the first grid"""
q2min = self.grids[0][0].log_q2min
return np.exp(q2min)

@property
def nmembers(self):
""" Number of members for this PDF """
"""Number of members for this PDF"""
return len(self.members)

@property
def active_members(self):
""" List of all member files """
"""List of all member files"""
member_list = []
for member_int in self.members:
member = str(member_int).zfill(4)
Expand Down
21 changes: 12 additions & 9 deletions src/pdfflow/tests/test_alphas.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import logging
import subprocess as sp
import numpy as np
from lhapdf_management import pdf_install
from lhapdf_management.configuration import environment
import pdfflow.pflow as pdf
from pdfflow.configflow import run_eager

Expand All @@ -26,15 +28,15 @@ def install_lhapdf(pdfset):
try:
lhapdf.mkPDF(pdfset)
except RuntimeError:
sp.run(["lhapdf", "install", pdfset])
pdf_install(pdfset)


SIZE = 200

# Set up the PDF
LIST_PDF = ["NNPDF31_nnlo_as_0118", "cteq6"]
LIST_PDF = ["NNPDF31_nnlo_as_0118", "cteq61"]
MEMBERS = 2
DIRNAME = sp.run(["lhapdf-config", "--datadir"], stdout=sp.PIPE).stdout.strip().decode()
DIRNAME = environment.datapath

# Install the pdfs if they don't exist
for pdfset in LIST_PDF:
Expand All @@ -45,12 +47,12 @@ def install_lhapdf(pdfset):

# utilities
def gen_q2(qmin, qmax):
""" generate an array of q2 between qmin and qmax """
"""generate an array of q2 between qmin and qmax"""
return np.random.rand(SIZE) * (qmax - qmin) + qmin


def get_alphavals(q2arr, pdfset, sq2=False):
""" Generate an array of alphas(q) values from LHAPDF """
"""Generate an array of alphas(q) values from LHAPDF"""
lhapdf_pdf = lhapdf.mkPDF(pdfset)
if sq2:
return np.array([lhapdf_pdf.alphasQ2(iq) for iq in q2arr])
Expand All @@ -59,7 +61,7 @@ def get_alphavals(q2arr, pdfset, sq2=False):


def test_accuracy_alphas(atol=1e-6):
""" Check the accuracy for all PDF sets for all members given
"""Check the accuracy for all PDF sets for all members given
when computing alpha_s given Q is compatible within atol
between pdfflow and LHAPDF.
This test run eagerly
Expand All @@ -80,8 +82,9 @@ def test_accuracy_alphas(atol=1e-6):
np.testing.assert_allclose(flow_values, lhapdf_values, atol=atol)
run_eager(False)


def test_alphas_q2(atol=1e-6):
""" Check the accuracy for all PDF sets for all members given
"""Check the accuracy for all PDF sets for all members given
when computing alpha_s given Q is compatible within atol
between pdfflow and LHAPDF
This test does not run eagerly
Expand All @@ -100,8 +103,9 @@ def test_alphas_q2(atol=1e-6):
lhapdf_values = get_alphavals(q2arr, pdfset, sq2=True)
np.testing.assert_allclose(flow_values, lhapdf_values, atol=atol)


def test_alpha_trace():
""" Check that the alpha_s can be traced and then instantiated """
"""Check that the alpha_s can be traced and then instantiated"""
# Ensure the functions are not run eagerly
run_eager(False)
setname = LIST_PDF[0]
Expand All @@ -114,6 +118,5 @@ def test_alpha_trace():
pex2.alphas_trace()



if __name__ == "__main__":
test_alpha_trace()
Loading

0 comments on commit d8499f9

Please sign in to comment.