Compute column feat3 of the fake data in a better way and improve doc…

…umentation
point-cloud-radar · Feb 15, 2023 · 18dfc99 · 18dfc99
1 parent 334db22
commit 18dfc99
Show file tree

Hide file tree

Showing 2 changed files with 34 additions and 17 deletions.
diff --git a/bird_cloud_gnn/fake.py b/bird_cloud_gnn/fake.py
@@ -21,14 +21,27 @@ def generate_data(
 ):
     """Generate fake point cloud radar data
 
-    Ranges are generated from exponential decay, azimuth are
+    The fake data containts polar coordinates `range`, `azimuth`, and `elevation`; cartesian
+    coordinates `x`, `y`, `z`; random numerical features `useless_feature`, `feat1`, `feat2`,
+    and `feat3`; and the target `class` with values 0 or 1.
+
+    - `range` is generated like a exponential decay.
+    - `azimuth` is generated uniformly in the interval [0.5, 365.5).
+    - `elevation` is taken from an input array.
+    - `x`, `y`, and `z` are converted from these polar values.
+    - `useless_feature` is taken from a Normal(0, 1) distribution.
+    - `feat1`, `feat2`, and `feat3` are randomly constructed to remain in the interval [0, 1].
+
+
+     The columns are `range`, `azimuth` and `elevation` for the polar coordinates,
+    `x`, `y`, and `z` for the cartesian coordinates, `useless_feature`, `feat1`, `feat2`,
+    `feat3` for the numerical features, and `class` for the target.
 
     Args:
         filename (str, optional): Filename to save the data. Use None to ignore.
         num_points (int, optional): Number of points. Defaults to 2**13.
         max_range (float, optional): Maximum generated range. Defaults to 300_000.0.
-        azimuth_skip (float, optional): azimuth is generated from 0.5 to 355.5 with the given
-            skip. Defaults to 2.0.
+        azimuth_skip (float, optional): Size between azimuth values. Defaults to 2.0.
         elevations (array of floats, optional): List of elevations. Defaults to
             np.array([0.3, 0.8, 1.2, 2, 2.8, 4.5, 6, 8, 10, 12, 15, 20, 25]).
         add_na (bool, optional): Whether to add missing data. Defaults to False.
@@ -39,9 +52,7 @@ def generate_data(
 
     Returns:
         pandas.DataFrames: Generated data. It was also saved to `filename` if that argument was
-            passed. The columns are `range`, `azimuth` and `elevation` for the polar coordinates,
-            `x`, `y`, and `z` for the cartesian coordinates, `useless_feature`, `feat1`, `feat2`,
-            `feat3` for the numerical features, and `class` for the target.
+            passed.
     """
 
     point_cloud = pd.DataFrame({
@@ -75,23 +86,30 @@ def generate_data(
     def sigmoid(value):
         return 1 / (1 + np.exp(-value))
 
+    def to01(value):
+        return (value - np.min(value)) / (np.max(value) - np.min(value))
+
     point_cloud['feat1'] = sigmoid(
         (point_cloud.x + point_cloud.y - point_cloud.z) / max_range)
     point_cloud['feat2'] = sigmoid(
         (point_cloud.z - point_cloud.z.mean()) / point_cloud.z.std())
-    point_cloud['feat3'] = (np.cos(
-        np.exp(-0.3 * (point_cloud.x - 1)**2 - 0.2 *
-               (point_cloud.y + 0.3)**2)) + 1) / 2
+    point_cloud['feat3'] = (1 + np.cos(
+        np.exp(
+            -1 + 2 * to01(
+                -0.3 * (point_cloud.x - 1)**2 - 0.2 * (point_cloud.y + 0.3)**2
+            )
+        ))
+    ) / 2
     hidden1 = (point_cloud.feat1 + point_cloud.feat2**2) / point_cloud.feat3
     hidden1 = (hidden1 - np.mean(hidden1)) / np.std(hidden1)
     hidden2 = np.log(1 + point_cloud.feat1**2) - np.sin(
         4 * np.pi * point_cloud.feat2)
     hidden2 = (hidden2 - np.mean(hidden2)) / np.std(hidden2)
 
-    point_cloud['neighbours'] = point_cloud.apply(lambda row: len(
+    neighbours = point_cloud.apply(lambda row: len(
         tree.query_ball_point(row[['x', 'y', 'z']], radius_influence)),
                                                   axis=1)
-    aux = (point_cloud['neighbours'] > 5).astype('int32')
+    aux = (neighbours > 5).astype('int32')
     point_cloud['class'] = np.round(
         sigmoid(0.5 * hidden1 + 0.2 * hidden2 + 3 * aux) +
         np.random.randn(num_points) * 0.1)

diff --git a/tests/test_fake.py b/tests/test_fake.py
@@ -17,21 +17,20 @@ def test_generate_data(tmp_path):
         azimuth_skip=90.0,
         elevations=[0, 30, 60],
     )
-    assert df.shape == (2**4, 12)
+    assert df.shape == (2**4, 11)
     assert df.range.min() >= 0.1
     assert df.range.max() <= 2.3
     assert sorted(df.azimuth.unique()) == [0.5, 90.5, 180.5, 270.5]
     assert sorted(df.elevation.unique()) == [0, 30, 60]
+    for feat in ['feat1', 'feat2', 'feat3']:
+        assert df[feat].min() >= 0
+        assert df[feat].max() <= 1
     assert (df.columns == [
         'range', 'azimuth', 'elevation', 'useless_feature', 'x', 'y', 'z',
-        'feat1', 'feat2', 'feat3', 'neighbours', 'class'
+        'feat1', 'feat2', 'feat3', 'class'
     ]).all()
     assert df.notna().all(axis=None)
     df2 = pd.read_csv(filename)
-    for col in df.columns:
-        if not df2[col].eq(df[col]).all():
-            print(col)
-            print(df[col] - df2[col])
 
     assert (np.abs(df2 - df) < 1e-12).all(
         axis=None)  # Round off error might occur saving the file!