Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix/entity threshold filter #62

Merged
merged 11 commits into from
Jul 11, 2021
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# 0.7.1
- [x] [fix](https://github.com/Vernacular-ai/dialogy/issues/60): Entity scoring within `EntityExtractor` and `DucklingPlugin`.
- [x] [fix](https://github.com/Vernacular-ai/dialogy/issues/58): CurrencyEntity added to operate on `amount-of-money` dimension.
- [x] add: TimeIntervalEntities sometimes may contain a hybrid structure that resembles some values as `TimeEntities`.

# 0.7.0
- [x] add: `KeywordEntity` entity-type class.
- [x] refactor: `ListEntityPlugin` doesn't need an entity map. Uses `KeywordEntity` instead.
Expand Down
14 changes: 8 additions & 6 deletions dialogy/base/entity_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,18 +79,17 @@ def aggregate_entities(

def apply_filters(self, entities: List[BaseEntity]) -> List[BaseEntity]:
"""
[summary]
Conditionally remove entities.

:param entities: [description]
:param entities: A list of entities.
:type entities: List[BaseEntity]
:return: [description]
:return: A list of entities. This can be at most the same length as `entities`.
:rtype: List[BaseEntity]
"""
return self.remove_low_scoring_entities(entities)

@staticmethod
def entity_consensus(
entities: List[BaseEntity], input_size: int
self, entities: List[BaseEntity], input_size: int
) -> List[BaseEntity]:
"""
Combine entities by type and value.
Expand All @@ -108,4 +107,7 @@ def entity_consensus(
entity_type_value_group = py_.group_by(
entities, lambda entity: (entity.type, entity.get_value())
)
return EntityExtractor.aggregate_entities(entity_type_value_group, input_size)
aggregate_entities = EntityExtractor.aggregate_entities(
entity_type_value_group, input_size
)
return self.apply_filters(aggregate_entities)
13 changes: 9 additions & 4 deletions dialogy/plugins/preprocess/text/duckling_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ def select_datetime(

def apply_filters(self, entities: List[BaseEntity]) -> List[BaseEntity]:
"""
Filter entities by configurable criteria.
Conditionally remove entities.

The utility of this method is tracked here:
https://github.com/Vernacular-ai/dialogy/issues/42
Expand All @@ -261,7 +261,9 @@ def apply_filters(self, entities: List[BaseEntity]) -> List[BaseEntity]:
:rtype: List[BaseEntity]
"""
if self.datetime_filters:
entities = self.select_datetime(entities, self.datetime_filters)
return self.select_datetime(entities, self.datetime_filters)

# We call the filters that exist on the EntityExtractor class like threshold filtering.
return super().apply_filters(entities)

@dbg(log)
Expand Down Expand Up @@ -396,7 +398,10 @@ def utility(self, *args: Any) -> List[BaseEntity]:
for (alternative_index, entities) in enumerate(list_of_entities):
shaped_entities.append(self._reshape(entities, alternative_index))

filtered_entities = self.apply_filters(py_.flatten(shaped_entities))
return EntityExtractor.entity_consensus(filtered_entities, input_size)
shaped_entities_flattened = py_.flatten(shaped_entities)
aggregate_entities = self.entity_consensus(
shaped_entities_flattened, input_size
)
return self.apply_filters(aggregate_entities)
except ValueError as value_error:
raise ValueError(str(value_error)) from value_error
4 changes: 2 additions & 2 deletions dialogy/plugins/preprocess/text/list_entity_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,8 +201,8 @@ def get_entities(self, transcripts: List[str]) -> List[BaseEntity]:
log.debug("Parsed entities")
log.debug(entities)

filtered_entities = self.apply_filters(entities)
return EntityExtractor.entity_consensus(filtered_entities, len(transcripts))
aggregated_entities = self.entity_consensus(entities, len(transcripts))
return self.apply_filters(aggregated_entities)

@dbg(log)
def utility(self, *args: Any) -> Any:
Expand Down
2 changes: 2 additions & 0 deletions dialogy/types/entity/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"""

from dialogy.types.entity.base_entity import BaseEntity, entity_synthesis
from dialogy.types.entity.currency_entity import CurrencyEntity
from dialogy.types.entity.duration_entity import DurationEntity
from dialogy.types.entity.keyword_entity import KeywordEntity
from dialogy.types.entity.location_entity import LocationEntity
Expand All @@ -19,4 +20,5 @@
"people": {"value": PeopleEntity},
"time": {"value": TimeEntity, "interval": TimeIntervalEntity},
"duration": {"value": DurationEntity},
"amount-of-money": {"value": CurrencyEntity},
}
54 changes: 54 additions & 0 deletions dialogy/types/entity/currency_entity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
"""
.. _currency_entity:
Module provides access to entity types that can be parsed to currencies and their value.

Import classes:
- CurrencyEntity
"""
from typing import Any, Dict

import attr

from dialogy import constants as const
from dialogy.types.entity.numerical_entity import NumericalEntity


@attr.s
class CurrencyEntity(NumericalEntity):
"""
Numerical Entity Type

Use this type for handling all entities that can be parsed to obtain:
- numbers
- date
- time
- datetime

Attributes:
- `dim` dimension of the entity from duckling parser
- `type` is the type of the entity which can have values in ["value", "interval"]
"""
unit = attr.ib(
type=str, validator=attr.validators.instance_of(str), kw_only=True
)

@classmethod
def reshape(cls, dict_: Dict[str, Any]) -> Dict[str, Any]:
unit = dict_[const.EntityKeys.VALUE].get(const.EntityKeys.UNIT)
dict_ = super().reshape(dict_)
dict_[const.EntityKeys.UNIT] = unit
return dict_

def get_value(self, reference: Any = None) -> Any:
"""
Getter for CurrencyEntity.

We are yet to decide the pros and cons of the output. It seems retaining {"value": float, "unit": }

:param reference: [description], defaults to None
:type reference: Any, optional
:return: [description]
:rtype: Any
"""
value = super().get_value(reference=reference)
return f"{self.unit}{value:.2f}"
21 changes: 11 additions & 10 deletions dialogy/types/entity/time_interval_entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,16 +34,15 @@ class TimeIntervalEntity(TimeEntity):
@classmethod
def reshape(cls, dict_: Dict[str, Any]) -> Dict[str, Any]:
dict_ = super(TimeIntervalEntity, cls).reshape(dict_)
if all(
value[const.EntityKeys.TYPE] == const.EntityKeys.INTERVAL
for value in dict_[const.EntityKeys.VALUES]
):
date_range = dict_[const.EntityKeys.VALUES][0].get(
const.EntityKeys.FROM
) or dict_[const.EntityKeys.VALUES][0].get(const.EntityKeys.TO)
if not date_range:
raise TypeError(f"{dict_} does not match TimeIntervalEntity format")
dict_[const.EntityKeys.GRAIN] = date_range[const.EntityKeys.GRAIN]
for value in dict_[const.EntityKeys.VALUES]:
if value[const.EntityKeys.TYPE] == const.EntityKeys.INTERVAL:
date_range = dict_[const.EntityKeys.VALUES][0].get(
const.EntityKeys.FROM
) or dict_[const.EntityKeys.VALUES][0].get(const.EntityKeys.TO)
if not date_range:
raise TypeError(f"{dict_} does not match TimeIntervalEntity format")
dict_[const.EntityKeys.GRAIN] = date_range[const.EntityKeys.GRAIN]
break
return dict_

def get_value(self, reference: Any = None) -> Any:
Expand Down Expand Up @@ -76,6 +75,8 @@ def get_value(self, reference: Any = None) -> Any:

if date_dict:
return datetime.fromisoformat(date_dict.get(const.EntityKeys.VALUE))
elif reference.get(const.EntityKeys.VALUE):
return datetime.fromisoformat(reference.get(const.EntityKeys.VALUE))
else:
raise KeyError(
f"Expected at least 1 of `from` or `to` in {self.values} for {self}"
Expand Down
22 changes: 12 additions & 10 deletions docs/_modules/dialogy/base/entity_extractor.html
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<meta name="color-scheme" content="light dark"><link rel="index" title="Index" href="../../../genindex.html" /><link rel="search" title="Search" href="../../../search.html" />

<meta name="generator" content="sphinx-3.5.4, furo 2021.04.11.beta34"/>
<title>dialogy.base.entity_extractor - dialogy 0.6.0 documentation</title>
<title>dialogy.base.entity_extractor - dialogy 0.7.0 documentation</title>
<link rel="stylesheet" href="../../../_static/styles/furo.css?digest=59ab60ac09ea94ccfe6deddff6d715cce948a6fc">
<link rel="stylesheet" href="../../../_static/pygments.css">
<link media="(prefers-color-scheme: dark)" rel="stylesheet" href="../../../_static/pygments_dark.css">
Expand Down Expand Up @@ -87,7 +87,7 @@
</label>
</div>
<div class="header-center">
<a href="../../../index.html"><div class="brand">dialogy 0.6.0 documentation</div></a>
<a href="../../../index.html"><div class="brand">dialogy 0.7.0 documentation</div></a>
</div>
<div class="header-right">
<label class="toc-overlay-icon toc-header-icon no-toc" for="__toc">
Expand All @@ -101,7 +101,7 @@
<div class="sidebar-sticky"><a class="sidebar-brand" href="../../../index.html">


<span class="sidebar-brand-text">dialogy 0.6.0 documentation</span>
<span class="sidebar-brand-text">dialogy 0.7.0 documentation</span>

</a><form class="sidebar-search-container" method="get" action="../../../search.html">
<input class="sidebar-search" placeholder=Search name="q">
Expand Down Expand Up @@ -260,18 +260,17 @@ <h1>Source code for dialogy.base.entity_extractor</h1><div class="highlight"><pr

<div class="viewcode-block" id="EntityExtractor.apply_filters"><a class="viewcode-back" href="../../../source/dialogy.base.html#dialogy.base.entity_extractor.EntityExtractor.apply_filters">[docs]</a> <span class="k">def</span> <span class="nf">apply_filters</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">entities</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="n">BaseEntity</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">List</span><span class="p">[</span><span class="n">BaseEntity</span><span class="p">]:</span>
<span class="sd">"""</span>
<span class="sd"> [summary]</span>
<span class="sd"> Conditionally remove entities.</span>

<span class="sd"> :param entities: [description]</span>
<span class="sd"> :param entities: A list of entities.</span>
<span class="sd"> :type entities: List[BaseEntity]</span>
<span class="sd"> :return: [description]</span>
<span class="sd"> :return: A list of entities. This can be at most the same length as `entities`.</span>
<span class="sd"> :rtype: List[BaseEntity]</span>
<span class="sd"> """</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">remove_low_scoring_entities</span><span class="p">(</span><span class="n">entities</span><span class="p">)</span></div>

<div class="viewcode-block" id="EntityExtractor.entity_consensus"><a class="viewcode-back" href="../../../source/dialogy.base.html#dialogy.base.entity_extractor.EntityExtractor.entity_consensus">[docs]</a> <span class="nd">@staticmethod</span>
<span class="k">def</span> <span class="nf">entity_consensus</span><span class="p">(</span>
<span class="n">entities</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="n">BaseEntity</span><span class="p">],</span> <span class="n">input_size</span><span class="p">:</span> <span class="nb">int</span>
<div class="viewcode-block" id="EntityExtractor.entity_consensus"><a class="viewcode-back" href="../../../source/dialogy.base.html#dialogy.base.entity_extractor.EntityExtractor.entity_consensus">[docs]</a> <span class="k">def</span> <span class="nf">entity_consensus</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">entities</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="n">BaseEntity</span><span class="p">],</span> <span class="n">input_size</span><span class="p">:</span> <span class="nb">int</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">List</span><span class="p">[</span><span class="n">BaseEntity</span><span class="p">]:</span>
<span class="sd">"""</span>
<span class="sd"> Combine entities by type and value.</span>
Expand All @@ -289,7 +288,10 @@ <h1>Source code for dialogy.base.entity_extractor</h1><div class="highlight"><pr
<span class="n">entity_type_value_group</span> <span class="o">=</span> <span class="n">py_</span><span class="o">.</span><span class="n">group_by</span><span class="p">(</span>
<span class="n">entities</span><span class="p">,</span> <span class="k">lambda</span> <span class="n">entity</span><span class="p">:</span> <span class="p">(</span><span class="n">entity</span><span class="o">.</span><span class="n">type</span><span class="p">,</span> <span class="n">entity</span><span class="o">.</span><span class="n">get_value</span><span class="p">())</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">EntityExtractor</span><span class="o">.</span><span class="n">aggregate_entities</span><span class="p">(</span><span class="n">entity_type_value_group</span><span class="p">,</span> <span class="n">input_size</span><span class="p">)</span></div></div>
<span class="n">aggregate_entities</span> <span class="o">=</span> <span class="n">EntityExtractor</span><span class="o">.</span><span class="n">aggregate_entities</span><span class="p">(</span>
<span class="n">entity_type_value_group</span><span class="p">,</span> <span class="n">input_size</span>
<span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">apply_filters</span><span class="p">(</span><span class="n">aggregate_entities</span><span class="p">)</span></div></div>
</pre></div>
</article>
<footer>
Expand Down
6 changes: 3 additions & 3 deletions docs/_modules/dialogy/base/plugin.html
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<meta name="color-scheme" content="light dark"><link rel="index" title="Index" href="../../../genindex.html" /><link rel="search" title="Search" href="../../../search.html" />

<meta name="generator" content="sphinx-3.5.4, furo 2021.04.11.beta34"/>
<title>dialogy.base.plugin - dialogy 0.6.0 documentation</title>
<title>dialogy.base.plugin - dialogy 0.7.0 documentation</title>
<link rel="stylesheet" href="../../../_static/styles/furo.css?digest=59ab60ac09ea94ccfe6deddff6d715cce948a6fc">
<link rel="stylesheet" href="../../../_static/pygments.css">
<link media="(prefers-color-scheme: dark)" rel="stylesheet" href="../../../_static/pygments_dark.css">
Expand Down Expand Up @@ -87,7 +87,7 @@
</label>
</div>
<div class="header-center">
<a href="../../../index.html"><div class="brand">dialogy 0.6.0 documentation</div></a>
<a href="../../../index.html"><div class="brand">dialogy 0.7.0 documentation</div></a>
</div>
<div class="header-right">
<label class="toc-overlay-icon toc-header-icon no-toc" for="__toc">
Expand All @@ -101,7 +101,7 @@
<div class="sidebar-sticky"><a class="sidebar-brand" href="../../../index.html">


<span class="sidebar-brand-text">dialogy 0.6.0 documentation</span>
<span class="sidebar-brand-text">dialogy 0.7.0 documentation</span>

</a><form class="sidebar-search-container" method="get" action="../../../search.html">
<input class="sidebar-search" placeholder=Search name="q">
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<meta name="color-scheme" content="light dark"><link rel="index" title="Index" href="../../../../../../genindex.html" /><link rel="search" title="Search" href="../../../../../../search.html" />

<meta name="generator" content="sphinx-3.5.4, furo 2021.04.11.beta34"/>
<title>dialogy.plugins.postprocess.text.slot_filler.rule_slot_filler - dialogy 0.6.0 documentation</title>
<title>dialogy.plugins.postprocess.text.slot_filler.rule_slot_filler - dialogy 0.7.0 documentation</title>
<link rel="stylesheet" href="../../../../../../_static/styles/furo.css?digest=59ab60ac09ea94ccfe6deddff6d715cce948a6fc">
<link rel="stylesheet" href="../../../../../../_static/pygments.css">
<link media="(prefers-color-scheme: dark)" rel="stylesheet" href="../../../../../../_static/pygments_dark.css">
Expand Down Expand Up @@ -87,7 +87,7 @@
</label>
</div>
<div class="header-center">
<a href="../../../../../../index.html"><div class="brand">dialogy 0.6.0 documentation</div></a>
<a href="../../../../../../index.html"><div class="brand">dialogy 0.7.0 documentation</div></a>
</div>
<div class="header-right">
<label class="toc-overlay-icon toc-header-icon no-toc" for="__toc">
Expand All @@ -101,7 +101,7 @@
<div class="sidebar-sticky"><a class="sidebar-brand" href="../../../../../../index.html">


<span class="sidebar-brand-text">dialogy 0.6.0 documentation</span>
<span class="sidebar-brand-text">dialogy 0.7.0 documentation</span>

</a><form class="sidebar-search-container" method="get" action="../../../../../../search.html">
<input class="sidebar-search" placeholder=Search name="q">
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<meta name="color-scheme" content="light dark"><link rel="index" title="Index" href="../../../../../../genindex.html" /><link rel="search" title="Search" href="../../../../../../search.html" />

<meta name="generator" content="sphinx-3.5.4, furo 2021.04.11.beta34"/>
<title>dialogy.plugins.postprocess.text.voting.intent_voting - dialogy 0.6.0 documentation</title>
<title>dialogy.plugins.postprocess.text.voting.intent_voting - dialogy 0.7.0 documentation</title>
<link rel="stylesheet" href="../../../../../../_static/styles/furo.css?digest=59ab60ac09ea94ccfe6deddff6d715cce948a6fc">
<link rel="stylesheet" href="../../../../../../_static/pygments.css">
<link media="(prefers-color-scheme: dark)" rel="stylesheet" href="../../../../../../_static/pygments_dark.css">
Expand Down Expand Up @@ -87,7 +87,7 @@
</label>
</div>
<div class="header-center">
<a href="../../../../../../index.html"><div class="brand">dialogy 0.6.0 documentation</div></a>
<a href="../../../../../../index.html"><div class="brand">dialogy 0.7.0 documentation</div></a>
</div>
<div class="header-right">
<label class="toc-overlay-icon toc-header-icon no-toc" for="__toc">
Expand All @@ -101,7 +101,7 @@
<div class="sidebar-sticky"><a class="sidebar-brand" href="../../../../../../index.html">


<span class="sidebar-brand-text">dialogy 0.6.0 documentation</span>
<span class="sidebar-brand-text">dialogy 0.7.0 documentation</span>

</a><form class="sidebar-search-container" method="get" action="../../../../../../search.html">
<input class="sidebar-search" placeholder=Search name="q">
Expand Down
Loading