Skip to content

Commit

Permalink
Compute column feat3 of the fake data in a better way and improve doc…
Browse files Browse the repository at this point in the history
…umentation
  • Loading branch information
abelsiqueira committed Feb 15, 2023
1 parent 334db22 commit 18dfc99
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 17 deletions.
40 changes: 29 additions & 11 deletions bird_cloud_gnn/fake.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,27 @@ def generate_data(
):
"""Generate fake point cloud radar data
Ranges are generated from exponential decay, azimuth are
The fake data containts polar coordinates `range`, `azimuth`, and `elevation`; cartesian
coordinates `x`, `y`, `z`; random numerical features `useless_feature`, `feat1`, `feat2`,
and `feat3`; and the target `class` with values 0 or 1.
- `range` is generated like a exponential decay.
- `azimuth` is generated uniformly in the interval [0.5, 365.5).
- `elevation` is taken from an input array.
- `x`, `y`, and `z` are converted from these polar values.
- `useless_feature` is taken from a Normal(0, 1) distribution.
- `feat1`, `feat2`, and `feat3` are randomly constructed to remain in the interval [0, 1].
The columns are `range`, `azimuth` and `elevation` for the polar coordinates,
`x`, `y`, and `z` for the cartesian coordinates, `useless_feature`, `feat1`, `feat2`,
`feat3` for the numerical features, and `class` for the target.
Args:
filename (str, optional): Filename to save the data. Use None to ignore.
num_points (int, optional): Number of points. Defaults to 2**13.
max_range (float, optional): Maximum generated range. Defaults to 300_000.0.
azimuth_skip (float, optional): azimuth is generated from 0.5 to 355.5 with the given
skip. Defaults to 2.0.
azimuth_skip (float, optional): Size between azimuth values. Defaults to 2.0.
elevations (array of floats, optional): List of elevations. Defaults to
np.array([0.3, 0.8, 1.2, 2, 2.8, 4.5, 6, 8, 10, 12, 15, 20, 25]).
add_na (bool, optional): Whether to add missing data. Defaults to False.
Expand All @@ -39,9 +52,7 @@ def generate_data(
Returns:
pandas.DataFrames: Generated data. It was also saved to `filename` if that argument was
passed. The columns are `range`, `azimuth` and `elevation` for the polar coordinates,
`x`, `y`, and `z` for the cartesian coordinates, `useless_feature`, `feat1`, `feat2`,
`feat3` for the numerical features, and `class` for the target.
passed.
"""

point_cloud = pd.DataFrame({
Expand Down Expand Up @@ -75,23 +86,30 @@ def generate_data(
def sigmoid(value):
return 1 / (1 + np.exp(-value))

def to01(value):
return (value - np.min(value)) / (np.max(value) - np.min(value))

point_cloud['feat1'] = sigmoid(
(point_cloud.x + point_cloud.y - point_cloud.z) / max_range)
point_cloud['feat2'] = sigmoid(
(point_cloud.z - point_cloud.z.mean()) / point_cloud.z.std())
point_cloud['feat3'] = (np.cos(
np.exp(-0.3 * (point_cloud.x - 1)**2 - 0.2 *
(point_cloud.y + 0.3)**2)) + 1) / 2
point_cloud['feat3'] = (1 + np.cos(
np.exp(
-1 + 2 * to01(
-0.3 * (point_cloud.x - 1)**2 - 0.2 * (point_cloud.y + 0.3)**2
)
))
) / 2
hidden1 = (point_cloud.feat1 + point_cloud.feat2**2) / point_cloud.feat3
hidden1 = (hidden1 - np.mean(hidden1)) / np.std(hidden1)
hidden2 = np.log(1 + point_cloud.feat1**2) - np.sin(
4 * np.pi * point_cloud.feat2)
hidden2 = (hidden2 - np.mean(hidden2)) / np.std(hidden2)

point_cloud['neighbours'] = point_cloud.apply(lambda row: len(
neighbours = point_cloud.apply(lambda row: len(
tree.query_ball_point(row[['x', 'y', 'z']], radius_influence)),
axis=1)
aux = (point_cloud['neighbours'] > 5).astype('int32')
aux = (neighbours > 5).astype('int32')
point_cloud['class'] = np.round(
sigmoid(0.5 * hidden1 + 0.2 * hidden2 + 3 * aux) +
np.random.randn(num_points) * 0.1)
Expand Down
11 changes: 5 additions & 6 deletions tests/test_fake.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,21 +17,20 @@ def test_generate_data(tmp_path):
azimuth_skip=90.0,
elevations=[0, 30, 60],
)
assert df.shape == (2**4, 12)
assert df.shape == (2**4, 11)
assert df.range.min() >= 0.1
assert df.range.max() <= 2.3
assert sorted(df.azimuth.unique()) == [0.5, 90.5, 180.5, 270.5]
assert sorted(df.elevation.unique()) == [0, 30, 60]
for feat in ['feat1', 'feat2', 'feat3']:
assert df[feat].min() >= 0
assert df[feat].max() <= 1
assert (df.columns == [
'range', 'azimuth', 'elevation', 'useless_feature', 'x', 'y', 'z',
'feat1', 'feat2', 'feat3', 'neighbours', 'class'
'feat1', 'feat2', 'feat3', 'class'
]).all()
assert df.notna().all(axis=None)
df2 = pd.read_csv(filename)
for col in df.columns:
if not df2[col].eq(df[col]).all():
print(col)
print(df[col] - df2[col])

assert (np.abs(df2 - df) < 1e-12).all(
axis=None) # Round off error might occur saving the file!
Expand Down

0 comments on commit 18dfc99

Please sign in to comment.