Skip to content

Commit

Permalink
Merge pull request #12 from INGEOTEC/develop
Browse files Browse the repository at this point in the history
Version - 0.1.2
  • Loading branch information
mgraffg authored Jul 24, 2023
2 parents b4012dd + 266f3a4 commit cacbc74
Show file tree
Hide file tree
Showing 5 changed files with 41 additions and 16 deletions.
2 changes: 1 addition & 1 deletion IngeoDash/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@
# limitations under the License.
from IngeoDash.annotate import label_column, flip_label, store, similarity

__version__ = '0.1.1'
__version__ = '0.1.2'
11 changes: 6 additions & 5 deletions IngeoDash/annotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def has_label(mem: Config, x):
return False


def model(mem: Config, data: dict, select: bool=True):
def model(mem: Config, data: dict):
lang = mem[mem.lang]
if lang not in CONFIG.denseBoW:
dense = DenseBoW(lang=lang, voc_size_exponent=mem.voc_size_exponent,
Expand All @@ -41,7 +41,7 @@ def model(mem: Config, data: dict, select: bool=True):
n_jobs=mem.n_jobs,
dataset=False, emoji=False, keyword=False)
dense.text_representations_extend(CONFIG.denseBoW[lang])
if select:
if mem.dense_select:
dense.select(D=data)
_ = np.unique([x[mem.label_header] for x in data],
return_counts=True)[1]
Expand All @@ -57,7 +57,6 @@ def model(mem: Config, data: dict, select: bool=True):
return stack.fit(data)



def active_learning_selection(mem: Config):
db = CONFIG.db[mem[mem.username]]
dense = model(mem, db[mem.permanent])
Expand All @@ -79,10 +78,11 @@ def active_learning_selection(mem: Config):
data = []
for cnt, i in enumerate(index):
ele = D.pop(i - cnt)
ele[mem.label_header] = klasses[cnt]
ele[mem.label_header] = ele.get(mem.label_header, klasses[cnt])
data.append(ele)
db[mem.original] = D
db[mem.data] = data
return dense


def label_column_predict(mem: Config, model=None):
Expand All @@ -96,7 +96,8 @@ def label_column_predict(mem: Config, model=None):
dense = model(mem, D)
hys = dense.predict(data).tolist()
for ele, hy in zip(data, hys):
ele[mem.label_header] = ele.get(mem.label_header, hy)
ele[mem.label_header] = ele.get(mem.label_header, hy)
return dense


def label_column(mem: Config, model=model):
Expand Down
6 changes: 5 additions & 1 deletion IngeoDash/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ class Config:
voc_selection: str='most_common_by_type'
estimator_class: object=LinearSVC
decision_function_name: str='decision_function'
dense_select: bool=True


def __getitem__(self, key):
Expand All @@ -65,7 +66,10 @@ def __call__(self, value):
cls = deepcopy(self)
if value is not None:
cls.mem = json.loads(value) if isinstance(value, str) else value
for key in ['label_header', 'text', 'n_value']:
for key in ['label_header', 'text', 'n_value',
'voc_size_exponent', 'voc_selection',
'estimator_class', 'decision_function_name',
'dense_select']:
if key in cls.mem:
setattr(cls, key, cls.mem[key])
return cls
Expand Down
15 changes: 12 additions & 3 deletions IngeoDash/tests/test_annotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from IngeoDash.annotate import label_column, flip_label, store, similarity
from IngeoDash.annotate import label_column, flip_label, store, similarity, model
from IngeoDash.config import CONFIG
from microtc.utils import tweet_iterator
from EvoMSA.tests.test_base import TWEETS
Expand Down Expand Up @@ -91,7 +91,6 @@ def test_predict_active_learning():
assert [x['id'] for x in data] != list(range(10, 20))



def test_flip_label():
data = [dict() for i in range(3)]
mem = CONFIG({CONFIG.username: 'xxx'})
Expand Down Expand Up @@ -125,4 +124,14 @@ def test_similarity():
_ = sorted([[tweet['nn'], sim]for tweet, (sim, ) in zip(tweets, sim_values)],
key=lambda x: x[1],
reverse=True)
assert 'Me choca ahorita' in _[0][0]
assert 'Me choca ahorita' in _[0][0]


def test_stack_dense():
from EvoMSA import BoW, DenseBoW, StackGeneralization
mem = CONFIG({CONFIG.lang: 'es'})
D = list(tweet_iterator(TWEETS))
m = model(mem, D[:15])
assert isinstance(m, DenseBoW) and not isinstance(m, StackGeneralization)
m = model(mem, D)
assert isinstance(m, StackGeneralization)
23 changes: 17 additions & 6 deletions IngeoDash/tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.
from IngeoDash.config import Config
from IngeoDash.config import CONFIG
from sklearn.svm import LinearSVC


def test_Config():
Expand Down Expand Up @@ -45,7 +46,12 @@ def test_Config():
checklist='checklist',
active_learning='active_learning',
shuffle='shuffle',
labels_proportion='labels_proportion')
labels_proportion='labels_proportion',
voc_size_exponent=15,
voc_selection='most_common_by_type',
estimator_class=LinearSVC,
decision_function_name='decision_function',
dense_select=True)
for k, v in default.items():
assert v == getattr(conf, k)

Expand All @@ -70,11 +76,16 @@ def test_Config_call():


def test_Config_call2():
mem = CONFIG(dict(label_header='label',
text='texto', n_value=12))
assert mem.label_header == 'label'
assert mem.text == 'texto'
assert mem.n_value == 12
kwargs = dict(label_header='label',
text='texto', n_value=12,
voc_size_exponent=15,
voc_selection='most_common_by_type',
estimator_class=LinearSVC,
decision_function_name='decision_function',
dense_select=True)
mem = CONFIG(kwargs)
for k, v in kwargs.items():
assert getattr(mem, k) == v


def test_CONFIG():
Expand Down

0 comments on commit cacbc74

Please sign in to comment.