-
Notifications
You must be signed in to change notification settings - Fork 0
/
generate_samples.py
37 lines (30 loc) · 1.49 KB
/
generate_samples.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import math
import random
from utils import *
# Radius of cluster ball
RADIUS = 1e6 + 0.0
def generate_centroid(centroids, num_clusters):
next_centroid = 2 * RADIUS * np.random.randint(0, NUM_CLUSTERS,
NUM_DIMENSIONS)
for cluster_id in range(0, num_clusters):
if np.array_equal(next_centroid, centroids[cluster_id]):
return generate_centroid(centroids, num_clusters)
return next_centroid
def generate_sample(pseudo_centroid):
in_hypercube_side = 2 * int(RADIUS / math.sqrt(NUM_DIMENSIONS))
sample = pseudo_centroid - (in_hypercube_side / 2) + np.random.randint(0, in_hypercube_side, NUM_DIMENSIONS)
return sample
def generate_samples():
pseudo_centroids = np.empty((NUM_CLUSTERS, NUM_DIMENSIONS),
dtype=np.float64)
for cluster_id in range(0, NUM_CLUSTERS):
pseudo_centroids[cluster_id] = generate_centroid(pseudo_centroids, num_clusters=cluster_id)
samples = np.empty((NUM_SAMPLES, NUM_DIMENSIONS), dtype=np.float64)
expected_clustering = np.empty(NUM_SAMPLES, dtype=int)
for sample_id in range(0, NUM_SAMPLES):
cluster_id = random.randint(0, NUM_CLUSTERS - 1)
expected_clustering[sample_id] = cluster_id
samples[sample_id] = generate_sample(pseudo_centroids[cluster_id])
assert euclidean_distance(pseudo_centroids[cluster_id],
samples[sample_id] <= RADIUS)
return samples, expected_clustering