Skip to content

Commit

Permalink
change variable name
Browse files Browse the repository at this point in the history
  • Loading branch information
greycooker committed Aug 2, 2024
1 parent e988cf5 commit ecb62b6
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions paddlenlp/data/indexed_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def make_sft_dataset(path, dataclass, skip_warmup=False, impl="mmap"):

print_rank_0(" > building dataset index ...")
start_time = time.time()
sft_indexed_dataset = SftMMapIndexedDataset(path, dataclass, skip_warmup)
sft_indexed_dataset = SFTMMapIndexedDataset(path, dataclass, skip_warmup)
print_rank_0(" > finished creating SFT indexed dataset in {:4f} " "seconds".format(time.time() - start_time))
print_rank_0(" number of samples: {}".format(len(sft_indexed_dataset.doc_idx) - 1))

Expand Down Expand Up @@ -574,7 +574,7 @@ def exists(path):
return os.path.exists(index_file_path(path)) and os.path.exists(data_file_path(path))


class SftMMapIndexedDataset(paddle.io.Dataset):
class SFTMMapIndexedDataset(paddle.io.Dataset):
class Index(object):
_HDR_MAGIC = b"MMIDIDX\x00\x00"

Expand Down Expand Up @@ -798,7 +798,7 @@ def make_builder(out_file, impl, save_dtype, loss_mask_file=None):
return IndexedDatasetBuilder(out_file, dtype=save_dtype)


class SftMMapIndexedDatasetBuilder(object):
class SFTMMapIndexedDatasetBuilder(object):
def __init__(self, output_file_dict, dtype):
self._data_file_dict = {}
for key, filename in output_file_dict.items():
Expand All @@ -823,7 +823,7 @@ def end_document(self):
def finalize(self, index_file):
for key, filename in self._data_file_dict.items():
filename.close()
with SftMMapIndexedDataset.Index.writer(index_file, self._dtype) as index:
with SFTMMapIndexedDataset.Index.writer(index_file, self._dtype) as index:
index.write(self._sizes, self._doc_idx)


Expand Down

0 comments on commit ecb62b6

Please sign in to comment.