From 134177650aa940e2b222ed9aaa0c027fbddfe100 Mon Sep 17 00:00:00 2001 From: Han Yang Date: Thu, 5 Dec 2024 13:32:12 +0800 Subject: [PATCH 1/3] move the location of pretrained checkpoints and automatically download from github --- .../mattersim-v1.0.0-1M.pth | Bin .../mattersim-v1.0.0-5M.pth | Bin src/mattersim/forcefield/potential.py | 21 +++++--- src/mattersim/utils/download_utils.py | 47 ++++++++++++++++++ 4 files changed, 61 insertions(+), 7 deletions(-) rename {src/mattersim/pretrained_models => pretrained_models}/mattersim-v1.0.0-1M.pth (100%) rename {src/mattersim/pretrained_models => pretrained_models}/mattersim-v1.0.0-5M.pth (100%) create mode 100644 src/mattersim/utils/download_utils.py diff --git a/src/mattersim/pretrained_models/mattersim-v1.0.0-1M.pth b/pretrained_models/mattersim-v1.0.0-1M.pth similarity index 100% rename from src/mattersim/pretrained_models/mattersim-v1.0.0-1M.pth rename to pretrained_models/mattersim-v1.0.0-1M.pth diff --git a/src/mattersim/pretrained_models/mattersim-v1.0.0-5M.pth b/pretrained_models/mattersim-v1.0.0-5M.pth similarity index 100% rename from src/mattersim/pretrained_models/mattersim-v1.0.0-5M.pth rename to pretrained_models/mattersim-v1.0.0-5M.pth diff --git a/src/mattersim/forcefield/potential.py b/src/mattersim/forcefield/potential.py index fa98dea..ab87dac 100644 --- a/src/mattersim/forcefield/potential.py +++ b/src/mattersim/forcefield/potential.py @@ -28,6 +28,7 @@ from mattersim.datasets.utils.build import build_dataloader from mattersim.forcefield.m3gnet.m3gnet import M3Gnet from mattersim.jit_compile_tools.jit import compile_mode +from mattersim.utils.download_utils import download_checkpoint rank = int(os.getenv("RANK", 0)) @@ -863,23 +864,28 @@ def from_checkpoint( if model_name.lower() != "m3gnet": raise NotImplementedError - current_dir = os.path.dirname(__file__) + checkpoint_folder = os.path.expanduser("~/.local/mattersim/pretrained_models") + os.makedirs(checkpoint_folder, exist_ok=True) if ( load_path is None or load_path.lower() == "mattersim-v1.0.0-1m.pth" or load_path.lower() == "mattersim-v1.0.0-1m" ): - load_path = os.path.join( - current_dir, "..", "pretrained_models/mattersim-v1.0.0-1M.pth" - ) + load_path = os.path.join(checkpoint_folder, "mattersim-v1.0.0-1M.pth") + if not os.path.exists(load_path): + download_checkpoint( + "mattersim-v1.0.0-1M.pth", save_folder=checkpoint_folder + ) logger.info(f"Loading the pre-trained {os.path.basename(load_path)} model") elif ( load_path.lower() == "mattersim-v1.0.0-5m.pth" or load_path.lower() == "mattersim-v1.0.0-5m" ): - load_path = os.path.join( - current_dir, "..", "pretrained_models/mattersim-v1.0.0-5M.pth" - ) + load_path = os.path.join(checkpoint_folder, "mattersim-v1.0.0-5M.pth") + if not os.path.exists(load_path): + download_checkpoint( + "mattersim-v1.0.0-5M.pth", save_folder=checkpoint_folder + ) logger.info(f"Loading the pre-trained {os.path.basename(load_path)} model") else: logger.info("Loading the model from %s" % load_path) @@ -979,6 +985,7 @@ def load( logger.info(f"Loading the pre-trained {os.path.basename(load_path)} model") else: logger.info("Loading the model from %s" % load_path) + assert os.path.exists(load_path), f"Model file {load_path} not found" checkpoint = torch.load(load_path, map_location=device) diff --git a/src/mattersim/utils/download_utils.py b/src/mattersim/utils/download_utils.py new file mode 100644 index 0000000..edccab8 --- /dev/null +++ b/src/mattersim/utils/download_utils.py @@ -0,0 +1,47 @@ +# -*- coding: utf-8 -*- +""" +This module contains utility functions for downloading files. +""" +import os + +import requests +from loguru import logger + + +def download_file(url: str, output_path: str): + """ + A wrapper around requests.get to download a file from a URL. + + Args: + url (str): The URL to download the file from. + output_path (str): The path to save the downloaded file to. + """ + + logger.info(f"Downloading file from {url} to {output_path}") + response = requests.get(url) + response.raise_for_status() # Check if the request was successful + os.makedirs(os.path.dirname(output_path), exist_ok=True) + with open(output_path, "wb") as f: + f.write(response.content) + logger.info(f"File downloaded to {output_path}") + + +def download_checkpoint( + checkpoint_name: str, save_folder: str = "~/.local/mattersim/pretrained_models/" +): + """ + Download a checkpoint from the Microsoft Mattersim repository. + + Args: + checkpoint_name (str): The name of the checkpoint to download. + save_folder (str): The local folder to save the checkpoint to. + """ + + GITHUB_CHECKPOINT_PREFIX = ( + "https://raw.githubusercontent.com/microsoft/mattersim/main/pretrained_models/" + ) + checkpoint_url = GITHUB_CHECKPOINT_PREFIX + checkpoint_name.strip("/") + save_path = os.path.join( + os.path.expanduser(save_folder), checkpoint_name.strip("/") + ) + download_file(checkpoint_url, save_path) From d90ea45901e1062d423bf7a9c930bdb0f36f0783 Mon Sep 17 00:00:00 2001 From: Han Yang Date: Thu, 5 Dec 2024 13:42:34 +0800 Subject: [PATCH 2/3] update log information --- src/mattersim/forcefield/potential.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/mattersim/forcefield/potential.py b/src/mattersim/forcefield/potential.py index ab87dac..d95fff3 100644 --- a/src/mattersim/forcefield/potential.py +++ b/src/mattersim/forcefield/potential.py @@ -2,7 +2,6 @@ """ Potential """ -import logging import os import pickle import random @@ -19,6 +18,7 @@ from ase.constraints import full_3x3_to_voigt_6_stress from ase.units import GPa from deprecated import deprecated +from loguru import logger from torch.optim import Adam from torch.optim.lr_scheduler import ReduceLROnPlateau, StepLR from torch_ema import ExponentialMovingAverage @@ -32,14 +32,6 @@ rank = int(os.getenv("RANK", 0)) -if rank == 0: - logging.basicConfig( - level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" - ) -else: - logging.basicConfig(level=logging.CRITICAL) -logger = logging.getLogger(__name__) - @compile_mode("script") class Potential(nn.Module): @@ -873,6 +865,10 @@ def from_checkpoint( ): load_path = os.path.join(checkpoint_folder, "mattersim-v1.0.0-1M.pth") if not os.path.exists(load_path): + logger.info( + "The pre-trained model is not found locally, " + "attempting to download it from the server." + ) download_checkpoint( "mattersim-v1.0.0-1M.pth", save_folder=checkpoint_folder ) @@ -883,6 +879,10 @@ def from_checkpoint( ): load_path = os.path.join(checkpoint_folder, "mattersim-v1.0.0-5M.pth") if not os.path.exists(load_path): + logger.info( + "The pre-trained model is not found locally, " + "attempting to download it from the server." + ) download_checkpoint( "mattersim-v1.0.0-5M.pth", save_folder=checkpoint_folder ) From cfe75b6d27caba37b716524551ebac8642a46444 Mon Sep 17 00:00:00 2001 From: Han Yang Date: Thu, 5 Dec 2024 13:49:40 +0800 Subject: [PATCH 3/3] change what files to include in package --- MANIFEST.in | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/MANIFEST.in b/MANIFEST.in index 82f39c1..1170410 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,2 +1,22 @@ -include src/mattersim/pretrained_models/*.pth -include src/mattersim/datasets/utils/*.pyx +# Documentation files +include CITATION.cff +include CODE_OF_CONDUCT.md +include CONTRIBUTING.md +include LICENSE.txt +include MODEL_CARD.md +include README.md +include SECURITY.md + +# Configuration files +include MANIFEST.in +include environment.yaml +include pyproject.toml +include setup.py + +# Source code +include src + +# Exclude directories +exclude docs +exclude pretrained_models +exclude tests