Skip to content

Commit

Permalink
Issue 144 - remove langchain (#145)
Browse files Browse the repository at this point in the history
* remove langchain dependency

* remove langchain from setup.py

* Fixed tests

* Updated CI

* Updated changelog

* Bumped version

* Relaxed requirements

---------

Co-authored-by: Andrew White <white.d.andrew@gmail.com>
  • Loading branch information
geemi725 and whitead authored Nov 22, 2024
1 parent 89d7626 commit 40f5089
Show file tree
Hide file tree
Showing 13 changed files with 54 additions and 49 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@ jobs:

steps:
- uses: actions/checkout@v2
- name: Set up Python "3.8"
- name: Set up Python "3.11"
uses: actions/setup-python@v2
with:
python-version: "3.8"
python-version: "3.11"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ jobs:

steps:
- uses: actions/checkout@v2
- name: Set up Python 3.8
- name: Set up Python 3.11
uses: actions/setup-python@v2
with:
python-version: '3.8'
python-version: '3.11'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/paper.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Set up Python 3.8
- name: Set up Python 3.11
uses: actions/setup-python@v2
with:
python-version: "3.8"
python-version: "3.11"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.8, 3.9, "3.10", "3.11"]
python-version: ["3.10", "3.11", "3.12"]

steps:
- uses: actions/checkout@v2
Expand Down
4 changes: 4 additions & 0 deletions docs/source/changelog.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
Change Log
==========

v3.1.0 (2024-11-21)
-------------------
* Removed langchain and switched to use openai API directly

v3.0.3 (2023-06-19)
-------------------
* Now compatible with python 3.11
Expand Down
30 changes: 20 additions & 10 deletions exmol/exmol.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,8 @@
from rdkit.Chem.Draw import MolToImage as mol2img, DrawMorganBit # type: ignore
from rdkit.Chem import rdchem # type: ignore
from rdkit.DataStructs.cDataStructs import BulkTanimotoSimilarity, TanimotoSimilarity # type: ignore
import langchain.llms as llms
import langchain.prompts as prompts

import openai
from . import stoned
from .plot_utils import _mol_images, _image_scatter, _bit2atoms
from .data import *
Expand Down Expand Up @@ -392,6 +391,7 @@ def _check_alphabet_consistency(
alphabet_symbols = _alphabet_to_elements(set(alphabet_symbols))
# find all elements in smiles (Upper alpha or upper alpha followed by lower alpha)
smiles_symbols = set(re.findall(r"[A-Z][a-z]?", smiles))

if check and not smiles_symbols.issubset(alphabet_symbols):
# show which symbols are not in alphabet
raise ValueError(
Expand Down Expand Up @@ -1410,7 +1410,7 @@ def merge_text_explains(
def text_explain_generate(
text_explanations: List[Tuple[str, float]],
property_name: str,
llm: Optional[llms.BaseLLM] = None,
llm_model: str = "gpt-4o",
single: bool = True,
) -> str:
"""Insert text explanations into template, and generate explanation.
Expand All @@ -1430,14 +1430,24 @@ def text_explain_generate(
for x in text_explanations
]
)
prompt_template = prompts.PromptTemplate(
input_variables=["property", "text"],
template=_single_prompt if single else _multi_prompt,
)

prompt_template = _single_prompt if single else _multi_prompt
prompt = prompt_template.format(property=property_name, text=text)
if llm is None:
llm = llms.OpenAI(temperature=0.05)
return llm(prompt)

messages = [
{
"role": "system",
"content": "Your goal is to explain which molecular features are important to its properties based on the given text.",
},
{"role": "user", "content": prompt},
]
response = openai.chat.completions.create(
model=llm_model,
messages=messages,
temperature=0.05,
)

return response.choices[0].message.content


def text_explain(
Expand Down
2 changes: 1 addition & 1 deletion exmol/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "3.0.4"
__version__ = "3.1.0"
4 changes: 2 additions & 2 deletions paper1_CFs/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
mordred[full]==1.2.0
scikit-learn==1.1.2
mordred[full]
scikit-learn
jupyter
seaborn
pandas
Expand Down
20 changes: 8 additions & 12 deletions paper2_LIME/RF-lime.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,14 @@
"import numpy as np\n",
"import mordred, mordred.descriptors\n",
"from mordred import HydrogenBond, Polarizability\n",
"from mordred import SLogP, AcidBase, BertzCT, Aromatic, BondCount, AtomCount\n",
"from mordred import SLogP, AcidBase, Aromatic, BondCount, AtomCount\n",
"from mordred import Calculator\n",
"\n",
"import exmol as exmol\n",
"from rdkit.Chem.Draw import rdDepictor\n",
"import os\n",
"from sklearn.ensemble import RandomForestRegressor\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import roc_auc_score, plot_roc_curve\n",
"\n",
"os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n",
"rdDepictor.SetPreferCoordGen(True)\n",
Expand All @@ -50,6 +49,9 @@
"soldata = pd.read_csv(\n",
" \"https://github.com/whitead/dmol-book/raw/main/data/curated-solubility-dataset.csv\"\n",
")\n",
"#drop smile with containing 'P'\n",
"soldata = soldata[soldata[\"SMILES\"].str.contains(\"P\") == False]\n",
"\n",
"features_start_at = list(soldata.columns).index(\"MolWt\")"
]
},
Expand Down Expand Up @@ -97,7 +99,8 @@
"outputs": [],
"source": [
"raw_features = np.array(raw_features)\n",
"labels = soldata[\"Solubility\"]"
"labels = soldata[\"Solubility\"]\n",
"print(len(labels)==len(molecules))"
]
},
{
Expand Down Expand Up @@ -197,7 +200,7 @@
"metadata": {},
"outputs": [],
"source": [
"smi = soldata.SMILES[1500]\n",
"smi = soldata.SMILES[150]\n",
"stoned_kwargs = {\n",
" \"num_samples\": 2000,\n",
" \"alphabet\": exmol.get_basic_alphabet(),\n",
Expand Down Expand Up @@ -275,13 +278,6 @@
"plt.gca().invert_yaxis()\n",
"plt.title(\"Random Forest Regression\", fontsize=12)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand All @@ -303,7 +299,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.11"
"version": "3.11.10"
}
},
"nbformat": 4,
Expand Down
9 changes: 2 additions & 7 deletions paper2_LIME/Solubility-RNN.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -22,21 +22,15 @@
"source": [
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"from matplotlib.patches import Rectangle, FancyBboxPatch\n",
"from matplotlib.offsetbox import AnnotationBbox\n",
"import seaborn as sns\n",
"import skunk\n",
"import matplotlib as mpl\n",
"import numpy as np\n",
"import tensorflow as tf\n",
"import selfies as sf\n",
"import exmol\n",
"from dataclasses import dataclass\n",
"from rdkit.Chem.Draw import rdDepictor, MolsToGridImage\n",
"from rdkit.Chem import MolFromSmiles, MACCSkeys\n",
"from rdkit.Chem import MolFromSmiles\n",
"import random\n",
"\n",
"\n",
"rdDepictor.SetPreferCoordGen(True)\n",
"import matplotlib.pyplot as plt\n",
"import matplotlib.font_manager as font_manager\n",
Expand Down Expand Up @@ -66,6 +60,7 @@
"soldata = pd.read_csv(\n",
" \"https://github.com/whitead/dmol-book/raw/main/data/curated-solubility-dataset.csv\"\n",
")\n",
"\n",
"features_start_at = list(soldata.columns).index(\"MolWt\")\n",
"np.random.seed(0)\n",
"random.seed(0)"
Expand Down
4 changes: 2 additions & 2 deletions paper2_LIME/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
mordred[full]==1.2.0
scikit-learn==1.1.2
mordred[full]
scikit-learn
jupyter
seaborn
pandas
Expand Down
14 changes: 7 additions & 7 deletions paper3_Scents/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
pyrfume
tensorflow==2.5.0
tensorflow>2.5.0
seaborn
jaxlib==0.1.67
jax==0.2.13
jaxlib
jax
pandas
dm-haiku==0.0.5
chex==0.0.7
optax==0.0.9
dm-haiku
chex
optax
matplotlib
scikit-learn==1.1.2
scikit-learn
jupyter
CairoSVG
Pillow
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
"skunk >= 0.4.0",
"importlib-resources",
"synspace",
"langchain==0.0.343",
"openai",
],
test_suite="tests",
long_description=long_description,
Expand Down

0 comments on commit 40f5089

Please sign in to comment.