Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
OlgaGKononova committed Jun 25, 2021
1 parent dccab27 commit e0ca69a
Show file tree
Hide file tree
Showing 34 changed files with 7,246 additions and 0 deletions.
64 changes: 64 additions & 0 deletions example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@

import json

from text2chem.regex_parser import RegExParser
from text2chem.parser_pipeline import ParserPipelineBuilder
from text2chem.preprocessing_tools.additives_processing import AdditivesProcessing
from text2chem.preprocessing_tools.chemical_name_processing import ChemicalNameProcessing
from text2chem.preprocessing_tools.mixture_processing import MixtureProcessing
from text2chem.preprocessing_tools.phase_processing import PhaseProcessing
from text2chem.postprocessing_tools.substitute_additives import SubstituteAdditives


mp = ParserPipelineBuilder() \
.add_preprocessing(AdditivesProcessing) \
.add_preprocessing(ChemicalNameProcessing) \
.add_preprocessing(PhaseProcessing) \
.add_preprocessing(MixtureProcessing)\
.add_postprocessing(SubstituteAdditives)\
.set_regex_parser(RegExParser)\
.build()


def run_test(testdata):
for data in testdata:
chem_name = data["material"]
output = data["parser_output"][0]
result = mp.parse(chem_name).to_dict()
if output != result:
print(chem_name)

"""
test formulas
"""
testdata_fn = "tests/formulas.json"
testdata = json.loads(open(testdata_fn).read())
run_test(testdata)

"""
test additives
"""
testdata_fn = "tests/additives.json"
testdata = json.loads(open(testdata_fn).read())
run_test(testdata)

"""
test chemical names
"""
testdata_fn = "tests/chemical_names.json"
testdata = json.loads(open(testdata_fn).read())
run_test(testdata)

"""
test mixtures: alloys, solid solutions, composites
"""
testdata_fn = "tests/mixtures.json"
testdata = json.loads(open(testdata_fn).read())
run_test(testdata)

"""
test phases
"""
testdata_fn = "tests/phases.json"
testdata = json.loads(open(testdata_fn).read())
run_test(testdata)
3 changes: 3 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
regex
pubchempy
sympy
17 changes: 17 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from setuptools import setup, find_packages

setup(
name='text2chem',
packages=find_packages(),
version='0.0.1',
author='Ceder Research Group',
author_email='cedergroup-ml-team@lbl.gov',
description='RegEx-based text parser that converts chemical terms and material entities into chemical datastructure.',
zip_safe=False,
install_requires=[
"regex",
"pubchempy",
"sympy"
],
include_package_data=True
)
Loading

0 comments on commit e0ca69a

Please sign in to comment.