Skip to content

Commit

Permalink
refactor: restore example
Browse files Browse the repository at this point in the history
  • Loading branch information
zanussbaum committed Jul 12, 2024
1 parent a4cf8ba commit cb3b8d6
Showing 1 changed file with 7 additions and 16 deletions.
23 changes: 7 additions & 16 deletions examples/map_text.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from nomic import atlas, AtlasDataset
from nomic import atlas
import numpy as np
from datasets import load_dataset

Expand All @@ -7,21 +7,12 @@
max_documents = 100000
subset_idxs = np.random.choice(len(dataset), size=max_documents, replace=False).tolist()
documents = [dataset[i] for i in subset_idxs]
documents = [{'id': i, 'text': doc['text']} for i, doc in enumerate(documents)]

dataset = AtlasDataset(identifier="nomic/test-ag-news", unique_id_field='id')

for start in range(0, len(documents), 10000):
dataset.add_data(documents[start:start+10000])

dataset.create_index(indexed_field='text')


# project = atlas.map_data(data=documents,
# indexed_field='text',
# identifier='News 100k Example',
# description='News 100k Example from the ag_news dataset hosted on huggingface.'
# )
# print(project.maps)
project = atlas.map_data(data=documents,
indexed_field='text',
identifier='News 100k Example',
description='News 100k Example from the ag_news dataset hosted on huggingface.'
)
print(project.maps)


0 comments on commit cb3b8d6

Please sign in to comment.