Skip to content

Commit

Permalink
Rename data.transforms -> data.encoders
Browse files Browse the repository at this point in the history
Summary: Pull Request resolved: fairinternal/fairseq-py#747

Differential Revision: D16403464

Pulled By: myleott

fbshipit-source-id: ee3b4184f129a02be833c7bdc00685978b4de883
  • Loading branch information
myleott authored and facebook-github-bot committed Jul 21, 2019
1 parent 69d0f7f commit f812e52
Show file tree
Hide file tree
Showing 9 changed files with 14 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
)


# automatically import any Python files in the transforms/ directory
# automatically import any Python files in the encoders/ directory
for file in os.listdir(os.path.dirname(__file__)):
if file.endswith('.py') and not file.startswith('_'):
module = file[:file.find('.py')]
importlib.import_module('fairseq.data.transforms.' + module)
importlib.import_module('fairseq.data.encoders.' + module)
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# can be found in the PATENTS file in the same directory.

from fairseq import file_utils
from fairseq.data.transforms import register_bpe
from fairseq.data.encoders import register_bpe


@register_bpe('gpt2')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# the root directory of this source tree. An additional grant of patent rights
# can be found in the PATENTS file in the same directory.

from fairseq.data.transforms import register_tokenizer
from fairseq.data.encoders import register_tokenizer


@register_tokenizer('moses')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# the root directory of this source tree. An additional grant of patent rights
# can be found in the PATENTS file in the same directory.

from fairseq.data.transforms import register_tokenizer
from fairseq.data.encoders import register_tokenizer


@register_tokenizer('nltk')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# can be found in the PATENTS file in the same directory.

from fairseq import file_utils
from fairseq.data.transforms import register_bpe
from fairseq.data.encoders import register_bpe


@register_bpe('sentencepiece')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import re

from fairseq.data.transforms import register_tokenizer
from fairseq.data.encoders import register_tokenizer


@register_tokenizer('space')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# can be found in the PATENTS file in the same directory.

from fairseq import file_utils
from fairseq.data.transforms import register_bpe
from fairseq.data.encoders import register_bpe


@register_bpe('subword_nmt')
Expand Down
6 changes: 3 additions & 3 deletions fairseq/hub_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import torch

from fairseq import utils
from fairseq.data import transforms
from fairseq.data import encoders


class Generator(object):
Expand Down Expand Up @@ -44,8 +44,8 @@ def __init__(self, args, task, models):
# (None if no unknown word replacement, empty if no path to align dictionary)
self.align_dict = utils.load_align_dict(getattr(args, 'replace_unk', None))

self.tokenizer = transforms.build_tokenizer(args)
self.bpe = transforms.build_bpe(args)
self.tokenizer = encoders.build_tokenizer(args)
self.bpe = encoders.build_bpe(args)

def generate(self, src_str, verbose=False):

Expand Down
6 changes: 3 additions & 3 deletions interactive.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import torch

from fairseq import checkpoint_utils, options, tasks, utils
from fairseq.data import transforms
from fairseq.data import encoders


Batch = namedtuple('Batch', 'ids src_tokens src_lengths')
Expand Down Expand Up @@ -103,8 +103,8 @@ def main(args):
generator = task.build_generator(args)

# Handle tokenization and BPE
tokenizer = transforms.build_tokenizer(args)
bpe = transforms.build_bpe(args)
tokenizer = encoders.build_tokenizer(args)
bpe = encoders.build_bpe(args)

def encode_fn(x):
if tokenizer is not None:
Expand Down

0 comments on commit f812e52

Please sign in to comment.