Skip to content

Commit

Permalink
Added possibility to replace scientific notation
Browse files Browse the repository at this point in the history
  • Loading branch information
charon25 committed Apr 21, 2023
1 parent d22b540 commit 3ca506d
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 10 deletions.
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setuptools.setup(
name="shunting-yard",
version="1.0.9",
version="1.0.10",
author="Paul 'charon25' Kern",
description="Compute any math expression",
long_description=long_description,
Expand All @@ -14,5 +14,5 @@
url="https://www.github.com/charon25/ShuntingYard",
license="MIT",
packages=['shunting_yard'],
download_url="https://github.com/charon25/ShuntingYard/archive/refs/tags/v1.0.9.tar.gz"
download_url="https://github.com/charon25/ShuntingYard/archive/refs/tags/v1.0.10.tar.gz"
)
8 changes: 8 additions & 0 deletions shunting_yard/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,11 @@

SEPARATORS_NO_CLOSING_BRACKET = '(,;'
SEPARATORS = SEPARATORS_NO_CLOSING_BRACKET + ')'


IMPLICIT_MULTIPLICATION_NUMBER_REGEX = r'\b(\d+)([^)\d.,;+*\/^-])'
IMPLICIT_MULTIPLICATION_BRACKET_REGEX = r'(\))([^),;+*\/^-])'


SCIENTIFIC_NOTATION_AFTER_DOT_REGEX = r'([+-]?\d*\.?\d+)e([+-]?\d+)'
SCIENTIFIC_NOTATION_BEFORE_DOT_REGEX = r'([+-]?\d+\.?\d*)e([+-]?\d+)'
9 changes: 5 additions & 4 deletions shunting_yard/shunting_yard.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ class Associativity(Enum):


# Reference : https://en.wikipedia.org/wiki/Shunting_yard_algorithm
def shunting_yard(expression: str, case_sensitive: bool = True, variable: Optional[str] = None) -> str:
def shunting_yard(expression: str, case_sensitive: bool = True, variable: Optional[str] = None, convert_scientific_notation: bool = True) -> str:
"""Convert the given classical math expression into Reverse Polish Notation using the Shunting-yard algorithm (see https://en.wikipedia.org/wiki/Shunting_yard_algorithm for more details). All whitespace are ignored.
Expand All @@ -56,8 +56,9 @@ def shunting_yard(expression: str, case_sensitive: bool = True, variable: Option
Args:
expression (str): string containing the mathematical expression to convert.
case_sensitive (bool): indicates whether the expression should care about case.
variable (str, optional): if defined, will treat every token matching the variable as a number.
case_sensitive (bool): indicates whether the expression should care about case (default: True).
variable (str, optional): if defined, will treat every token matching the variable as a number (default: None).
convert_scientific_notation (bool, optional): indicates whether the expression should convert scientific notation (e.g. 1.23e4 to 1.23*10^4) (default: True).
Raises:
MismatchedBracketsError: raised if the bracket are unbalanced.
Expand All @@ -72,7 +73,7 @@ def shunting_yard(expression: str, case_sensitive: bool = True, variable: Option
if not case_sensitive:
expression = expression.lower()

for token in tokenize(expression):
for token in tokenize(expression, convert_scientific_notation=convert_scientific_notation):
first_char = token[0]

if first_char in NUMBER_CHARS or token == variable:
Expand Down
23 changes: 20 additions & 3 deletions shunting_yard/tokenize.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from typing import Iterator

from shunting_yard.constants import BASE_OPERATORS, FUNCTION_CHARS, FUNCTION_FIRST_CHARS, NUMBER_CHARS, SEPARATORS, UNARY_OPERATORS
from shunting_yard.constants import IMPLICIT_MULTIPLICATION_BRACKET_REGEX, IMPLICIT_MULTIPLICATION_NUMBER_REGEX, SCIENTIFIC_NOTATION_AFTER_DOT_REGEX, SCIENTIFIC_NOTATION_BEFORE_DOT_REGEX



Expand All @@ -18,18 +19,34 @@ def _remove_implicit_multiplication(expression: str) -> str:
"""

# Insert '*' between a number and anything other than a digit, an operation, a closing bracket, a decimal dot, a function parameters separator
expression = re.sub(r'\b(\d+)([^)\d.,;+*\/^-])', r'\1*\2', expression)
expression = re.sub(IMPLICIT_MULTIPLICATION_NUMBER_REGEX, r'\1*\2', expression)
# Insert '*' between a closing bracket and anything other than an operation, another closing bracket, a function parameters separator
expression = re.sub(r'(\))([^),;+*\/^-])', r'\1*\2', expression)
expression = re.sub(IMPLICIT_MULTIPLICATION_BRACKET_REGEX, r'\1*\2', expression)
return expression


def tokenize(string: str) -> Iterator[str]:
def _convert_scientific_notation(expression: str) -> str:
""""""

# Replace everything of the form "xey" by "x*10^y" where y is an integer and x is a float not ending with just a dot
expression = re.sub(SCIENTIFIC_NOTATION_AFTER_DOT_REGEX, r'\1*10^(\2)', expression)

# Replace everything of the form "xey" by "x*10^y" where y is an integer and x is a float not starting with just a dot
expression = re.sub(SCIENTIFIC_NOTATION_BEFORE_DOT_REGEX, r'\1*10^(\2)', expression)

return expression


def tokenize(string: str, convert_scientific_notation: bool = True) -> Iterator[str]:
if string == '':
return

# Remove all whitespaces are they do not change anything
string = ''.join(string.split())

if convert_scientific_notation:
string = _convert_scientific_notation(string)

string = _remove_implicit_multiplication(string)

cursor = 0
Expand Down
27 changes: 26 additions & 1 deletion tests/test_tokenize.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import unittest

from shunting_yard import tokenize
from shunting_yard.tokenize import _remove_implicit_multiplication
from shunting_yard.tokenize import _convert_scientific_notation, _remove_implicit_multiplication


class TestTokenizer(unittest.TestCase):
Expand Down Expand Up @@ -106,6 +106,7 @@ def test_digit_other(self):
self.assertEqual(_remove_implicit_multiplication('3cos(5)'), '3*cos(5)')
self.assertEqual(_remove_implicit_multiplication('3_func(0)'), '3*_func(0)')
self.assertEqual(_remove_implicit_multiplication('1(2+3)'), '1*(2+3)')
self.assertEqual(_remove_implicit_multiplication('1(2(3(4(5(6(7(8(9(10+0)))))))))'), '1*(2*(3*(4*(5*(6*(7*(8*(9*(10+0)))))))))')

def test_implicit_mult_double_brackets(self):
self.assertEqual(_remove_implicit_multiplication('(1+2)(3+4)'), '(1+2)*(3+4)')
Expand All @@ -127,6 +128,30 @@ def test_with_longer_numbers(self):
self.assertEqual(_remove_implicit_multiplication('1+200x'), '1+200*x')


class TestScientificNotation(unittest.TestCase):

def test_not_present(self):
self.assertEqual(_convert_scientific_notation('abc'), 'abc')
self.assertEqual(_convert_scientific_notation('123.4'), '123.4')
self.assertEqual(_convert_scientific_notation('123exp(4)'), '123exp(4)')
self.assertEqual(_convert_scientific_notation('123e +1'), '123e +1')

def test_present(self):
self.assertEqual(_convert_scientific_notation('12e3'), '12*10^(3)')
self.assertEqual(_convert_scientific_notation('+12e3'), '+12*10^(3)')
self.assertEqual(_convert_scientific_notation('-12e3'), '-12*10^(3)')
self.assertEqual(_convert_scientific_notation('1.2e3'), '1.2*10^(3)')
self.assertEqual(_convert_scientific_notation('1.e3'), '1.*10^(3)')
self.assertEqual(_convert_scientific_notation('.2e3'), '.2*10^(3)')
self.assertEqual(_convert_scientific_notation('12e-3'), '12*10^(-3)')
self.assertEqual(_convert_scientific_notation('12e+3'), '12*10^(+3)')
self.assertEqual(_convert_scientific_notation('12e34'), '12*10^(34)')
self.assertEqual(_convert_scientific_notation('12e-34'), '12*10^(-34)')
self.assertEqual(_convert_scientific_notation('12e+34'), '12*10^(+34)')

def test_double(self):
self.assertEqual(_convert_scientific_notation('12e3+45e-6'), '12*10^(3)+45*10^(-6)')


if __name__ == '__main__':
unittest.main()

0 comments on commit 3ca506d

Please sign in to comment.