-
Notifications
You must be signed in to change notification settings - Fork 5
/
utils.py
54 lines (45 loc) · 1.61 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import inspect
import collections
import pandas as pd
# See https://www.python-course.eu/python3_memoization.php
class Memoize:
def __init__(self, fn):
self.fn = fn
self.memo = {}
def __call__(self, *args, **kwargs):
sig = inspect.signature(self.fn)
ba = sig.bind(*args)
for param in sig.parameters.values():
# to support default args: https://docs.python.org/3.3/library/inspect.html
if param.name not in ba.arguments:
ba.arguments[param.name] = param.default
# For some reason, when we pass in something like tmp(a,b=1) to the function
# def tmp(a,b=0), the args will remain (a,0) but not (a,1). Therefore, we
# should update `ba` according to the k,v pairs in kwargs
if param.name in kwargs.keys():
ba.arguments[param.name] = kwargs[param.name]
args = ba.args
# # convert DataFrames (not hashable because mutable) to numpy array (hashable)
# args = [
# elem.copy().to_numpy() if isinstance(elem, pd.DataFrame) else elem
# for elem in args
# ]
# convert lists and numpy array into tuples so that they can be used as keys
hashable_args = tuple([
arg if isinstance(arg, collections.Hashable) else str(arg)
for arg in args
])
if hashable_args not in self.memo:
self.memo[hashable_args] = self.fn(*args)
return self.memo[hashable_args]
def convertToOneHotWithPrespecifiedCategories(df, node, lower_bound, upper_bound):
return pd.get_dummies(
pd.Categorical(
df[node],
categories=list(range(
int(lower_bound),
int(upper_bound) + 1
))
),
prefix=node
)