Skip to content
This repository has been archived by the owner on Oct 9, 2023. It is now read-only.

Commit

Permalink
Merge branch 'master' into better_finetune
Browse files Browse the repository at this point in the history
  • Loading branch information
tchaton authored Feb 1, 2021
2 parents 7ccfd81 + bd3d50d commit 54bb69a
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 7 deletions.
5 changes: 3 additions & 2 deletions flash/core/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,18 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Any
from typing import Any, Union

import torch
from transformers.modeling_outputs import SequenceClassifierOutput

from flash.core.data import TaskDataPipeline
from flash.core.model import Task


class ClassificationDataPipeline(TaskDataPipeline):

def before_uncollate(self, batch: torch.Tensor) -> torch.Tensor:
def before_uncollate(self, batch: Union[torch.Tensor, tuple]) -> torch.Tensor:
if isinstance(batch, tuple):
batch = batch[0]
return torch.softmax(batch, -1)
Expand Down
10 changes: 8 additions & 2 deletions flash/text/classification/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,15 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from functools import partial
from typing import Any, Callable, Optional
from typing import Any, Callable, Optional, Union

import torch
from datasets import load_dataset
from datasets.utils.download_manager import GenerateMode
from pytorch_lightning.utilities.exceptions import MisconfigurationException
from torch import Tensor
from transformers import AutoTokenizer, default_data_collator
from transformers.modeling_outputs import SequenceClassifierOutput

from flash.core.classification import ClassificationDataPipeline
from flash.core.data import DataModule
Expand Down Expand Up @@ -48,7 +49,6 @@ def prepare_dataset(
label_to_class_mapping=None,
predict=False,
):

data_files = {}

if train_file is not None:
Expand Down Expand Up @@ -141,6 +141,12 @@ def after_collate(self, batch: Tensor) -> Tensor:
batch["input_ids"] = batch["input_ids"].squeeze(0)
return batch

def before_uncollate(self, batch: Union[torch.Tensor, tuple,
SequenceClassifierOutput]) -> Union[tuple, torch.Tensor]:
if isinstance(batch, SequenceClassifierOutput):
batch = batch.logits
return super().before_uncollate(batch)


class TextClassificationData(DataModule):
"""Data module for text classification tasks."""
Expand Down
3 changes: 2 additions & 1 deletion flash/text/classification/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ def forward(self, batch_dict):

def step(self, batch, batch_idx) -> dict:
output = {}
loss, logits = self.forward(batch)
out = self.forward(batch)
loss, logits = out[:2]
output["loss"] = loss
output["y_hat"] = logits
probs = self.data_pipeline.before_uncollate(logits)
Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ torch==1.7.1
PyYAML==5.3.1
Pillow>=7.2
torchvision==0.8.2
transformers==3.1.0
datasets==1.0.1
transformers==4.2.2
datasets==1.2.1
pandas==1.1.2
scikit-learn==0.24.0
numpy
Expand Down

0 comments on commit 54bb69a

Please sign in to comment.