cli: fitactivity remove aggregate limitation (#378)

muktihari · Aug 30, 2024 · b5f7b1a · b5f7b1a
1 parent 45dc8f0
commit b5f7b1a
Show file tree

Hide file tree

Showing 4 changed files with 1,341 additions and 233 deletions.
diff --git a/cmd/fitactivity/README.md b/cmd/fitactivity/README.md
@@ -1,6 +1,6 @@
 # FIT Activity CLI
 
-A program to combine multiple FIT activity files (\*.fit) and conceal its position (Lat & Long at specified distance). Available for download in [Release's Assets](https://github.com/muktihari/fit/releases).
+A program to combine multiple FIT (\*.fit) activity files into one continuous activity and conceal its position (Lat & Long at specified distance) for privacy. Available for download in [Release's Assets](https://github.com/muktihari/fit/releases).
 
 TLDR: [Usage](#Usage)
 
@@ -19,48 +19,32 @@ Strava Specification: [https://developers.strava.com/docs/uploads](https://devel
 ## How We Combine
 
 First, we will order the files by `FileId.TimeCreated`.
-The first file will be the base for the resulting file and we will combine these following messages from the next FIT files into the resulting file:
+The first file will be the base for the resulting file and we will combine all messages from the next FIT files into the resulting file except: **FileId**, **FileCreator**, **Activity**.
 
-- Session: combine session by calculating some fields (list fields will be shown after this)
-- Record: field `distance` will be calculated before append, the rest will be appended as it is
+The common messages in an Activity File:
+
+- Activity: we use activity message from first FIT file then update it accordingly.
+- Session: fields will be aggregated with the correspoding session of the next FIT file.
+- Lap: append as it is.
 - Event: append as it is
-- Lap: field `start_position_lat`, `start_position_long`, `end_position_lat`, and `end_position_long` will be removed only if conceal option is specified, the rest will be appended as it is.
-- SplitSummary: combine split summary only if it has the same `split_type`.
-
-  Why lap positions must be removed? GPS Positions saved in lap messages can be vary, user may set new lap every 500m or new lap every 1 hour for example, we don't know the exact distance for each lap. If user want to conceal 1km, we need to find all laps within the conceal distance and decide whether to remove it or change it with new positions, this will add complexity. So, let's just remove it for now, if our upload target is Strava, they don't specify positions in lap message anyway.
-
-Other messages from the next FIT files will be appended as it is except **FileId** and **FileCreator**.
-
-### Calculated Session Fields:
-
-Currently we only care these following session fields:
-
-- sport (is used to match two sessions)
-- sub_sport (is not used since different devices may have different value)
-- start_time (is used to calculate time gap between two sessions, add time gap to total_elapsed_time)
-- end_position_lat (will be replaced with next files session's end_position_lat)
-- end_position_long (will be replaced with next files session's end_position_long)
-- total_elapsed_time
-- total_timer_time
-- total_distance
-- total_ascent
-- total_descent
-- total_cycles
-- total_calories
-- avg_speed
-- max_speed
-- avg_heart_rate
-- max_heart_rate
-- avg_cadence
-- max_cadence
-- avg_power
-- max_power
-- avg_temperature
-- max_temperature
-- avg_altitude
-- max_altitude
-
-### Combine process
+- Record: field `distance` will be accumulated before append, the rest will be appended as it is
+- SplitSummary: fields will be aggregated with the split summary of the next FIT file that has the same `split_type`.
+
+The rest of the messages from the next FIT files will be appended as it is.
+
+### Aggregating Fields:
+
+We will aggregate fields depends on the prefix and suffix of the field name:
+
+- prefix **'total**': sum of the two values. (e.g. **total_elapsed_time**)
+- prefix **'num**' and suffix **'s**': sum of the two values. (e.g. **num_splits**)
+- prefix **'max**': max value between two values. (e.g. **max_heart_rate**)
+- prefix **'min**': min value between two values. (e.g. **min_cadence**)
+- prefix **'avg**': average of the two values. (e.g. **avg_temperature**)
+
+Otherwise, they will be assigned with value from the corresponding field only if they are invalid.
+
+### The process
 
 We will combine last session group (include record, event, and lap) of the first file with the first session group of the next file (and so on).
 
@@ -90,6 +74,8 @@ _NOTE: Combining FIT activity files is NOT the same as merging multiple files in
 1. Conceal End Position
    We will backward-iterate from the end of the FIT messages up to the desired conceal distance and for every record found, we will remove the `position_lat` and `position_long` fields. And also, we will update the corresponding session fields: `end_position_lat` and `end_position_long`.
 
+We will remove `start_position_lat`, `start_position_long`, `end_position_lat`, and `end_position_long` fields from Laps. But why? GPS Positions saved in lap messages can be vary, user may set new lap every 500m or new lap every 1 hour for example, we don't know the exact distance for each lap. If user want to conceal 1km, we need to find all laps within the conceal distance and decide whether to remove it or change it with new positions, this will add complexity. So, let's just remove it for now, if our upload target is Strava, they don't specify positions in lap message anyway.
+
 ## Build or Install
 
 _Prerequisite: Install golang: [https://go.dev/doc/install](https://go.dev/doc/install)_

diff --git a/cmd/fitactivity/aggregator/aggregator.go b/cmd/fitactivity/aggregator/aggregator.go
@@ -0,0 +1,300 @@
+package aggregator
+
+import (
+	"math"
+	"reflect"
+	"strings"
+	"time"
+
+	"github.com/muktihari/fit/profile/basetype"
+)
+
+// Aggregate aggregates src and dst into dst using reflection where T
+// should be a pointer to a struct, otherwise, it panics.
+// The logic depends on the prefix of the Field's Name; If it starts with:
+//   - "Total": sum of the two values.
+//   - "Max": max value between the two values.
+//   - "Min": min value between the two values.
+//   - "Avg": avg of the two values.
+//   - Otherwise: fill with src's Value only if dst's Value is invalid.
+func Aggregate[T any](dst, src T) {
+	dv := reflect.Indirect(reflect.ValueOf(dst))
+	sv := reflect.Indirect(reflect.ValueOf(src))
+
+	for i := 0; i < dv.NumField(); i++ {
+		f := dv.Type().Field(i)
+		if !f.IsExported() {
+			continue
+		}
+		if f.Type != reflect.TypeOf(time.Time{}) && reflect.Indirect(dv.Field(i)).Kind() == reflect.Struct {
+			Aggregate(dv.Field(i).Interface(), sv.Field(i).Interface())
+			continue
+		}
+		switch {
+		case strings.HasPrefix(f.Name, "Total"):
+			sum(dv.Field(i), sv.Field(i)) // TotalElapsedTime, TotalCycles, etc.
+		case strings.HasPrefix(f.Name, "Num") && strings.HasSuffix(f.Name, "s"):
+			sum(dv.Field(i), sv.Field(i)) // NumSessions, NumLaps, NumSplits, etc.
+		case strings.HasPrefix(f.Name, "Max"):
+			max(dv.Field(i), sv.Field(i)) // MaxHeartRate, MaxCadence, etc.
+		case strings.HasPrefix(f.Name, "Min"):
+			min(dv.Field(i), sv.Field(i)) // MinHeartRate, MinCadence, etc.
+		case strings.HasPrefix(f.Name, "Avg"):
+			avg(dv.Field(i), sv.Field(i)) // AvgHeartRate, AvgCadence, etc.
+		default:
+			fill(dv.Field(i), sv.Field(i)) // Timestamp, Sport, Event, etc.
+		}
+	}
+}
+
+var sInvalids = [...]int64{
+	reflect.Int8:  int64(basetype.Sint8Invalid),
+	reflect.Int16: int64(basetype.Sint16Invalid),
+	reflect.Int32: int64(basetype.Sint32Invalid),
+	reflect.Int64: int64(basetype.Sint64Invalid),
+}
+
+var uInvalids = [...]uint64{
+	reflect.Uint8:  uint64(basetype.Uint8Invalid),
+	reflect.Uint16: uint64(basetype.Uint16Invalid),
+	reflect.Uint32: uint64(basetype.Uint32Invalid),
+	reflect.Uint64: uint64(basetype.Uint64Invalid),
+}
+
+func sum(dst, src reflect.Value) {
+	switch k := dst.Kind(); k {
+	case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
+		if dst.Int() != sInvalids[k] && src.Int() != sInvalids[k] {
+			dst.SetInt(dst.Int() + src.Int())
+		} else if src.Int() != sInvalids[k] {
+			dst.Set(src)
+		}
+	case reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
+		if dst.Uint() != uInvalids[k] && src.Uint() != uInvalids[k] {
+			dst.SetUint(dst.Uint() + src.Uint())
+		} else if src.Uint() != uInvalids[k] {
+			dst.Set(src)
+		}
+	case reflect.Float32, reflect.Float64:
+		if !math.IsNaN(dst.Float()) && !math.IsNaN(src.Float()) {
+			dst.SetFloat(dst.Float() + src.Float())
+		} else if !math.IsNaN(src.Float()) {
+			dst.Set(src)
+		}
+	case reflect.Slice:
+		if dst.Len() >= src.Len() {
+			for i := 0; i < dst.Len(); i++ {
+				if i >= src.Len() {
+					break
+				}
+				sum(dst.Index(i), src.Index(i))
+			}
+		} else {
+			for i := 0; i < src.Len(); i++ {
+				if i >= dst.Len() {
+					dst.Set(reflect.AppendSlice(dst, src.Slice(i, src.Len())))
+					break
+				}
+				sum(dst.Index(i), src.Index(i))
+			}
+		}
+	case reflect.Array:
+		for i := 0; i < dst.Len(); i++ {
+			sum(dst.Index(i), src.Index(i))
+		}
+	}
+}
+
+func max(dst, src reflect.Value) {
+	switch k := dst.Kind(); k {
+	case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
+		if dst.Int() != sInvalids[k] && src.Int() != sInvalids[k] {
+			if dst.Int() < src.Int() {
+				dst.Set(src)
+			}
+		} else if src.Int() != sInvalids[k] {
+			dst.Set(src)
+		}
+	case reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
+		if dst.Uint() != uInvalids[k] && src.Uint() != uInvalids[k] {
+			if dst.Uint() < src.Uint() {
+				dst.Set(src)
+			}
+		} else if src.Uint() != uInvalids[k] {
+			dst.Set(src)
+		}
+	case reflect.Float32, reflect.Float64:
+		if !math.IsNaN(dst.Float()) && !math.IsNaN(src.Float()) {
+			if dst.Float() < src.Float() {
+				dst.Set(src)
+			}
+		} else if !math.IsNaN(src.Float()) {
+			dst.Set(src)
+		}
+	case reflect.Slice:
+		if dst.Len() >= src.Len() {
+			for i := 0; i < dst.Len(); i++ {
+				if i >= src.Len() {
+					break
+				}
+				max(dst.Index(i), src.Index(i))
+			}
+		} else {
+			for i := 0; i < src.Len(); i++ {
+				if i >= dst.Len() {
+					dst.Set(reflect.AppendSlice(dst, src.Slice(i, src.Len())))
+					break
+				}
+				max(dst.Index(i), src.Index(i))
+			}
+		}
+	case reflect.Array:
+		for i := 0; i < dst.Len(); i++ {
+			max(dst.Index(i), src.Index(i))
+		}
+	}
+}
+
+func min(dst, src reflect.Value) {
+	switch k := dst.Kind(); k {
+	case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
+		if dst.Int() != sInvalids[k] && src.Int() != sInvalids[k] {
+			if dst.Int() > src.Int() {
+				dst.Set(src)
+			}
+		} else if src.Int() != sInvalids[k] {
+			dst.Set(src)
+		}
+	case reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
+		if dst.Uint() != uInvalids[k] && src.Uint() != uInvalids[k] {
+			if dst.Uint() > src.Uint() {
+				dst.Set(src)
+			}
+		} else if src.Uint() != uInvalids[k] {
+			dst.Set(src)
+		}
+	case reflect.Float32, reflect.Float64:
+		if !math.IsNaN(dst.Float()) && !math.IsNaN(src.Float()) {
+			if dst.Float() > src.Float() {
+				dst.Set(src)
+			}
+		} else if !math.IsNaN(src.Float()) {
+			dst.Set(src)
+		}
+	case reflect.Slice:
+		if dst.Len() >= src.Len() {
+			for i := 0; i < dst.Len(); i++ {
+				if i >= src.Len() {
+					break
+				}
+				min(dst.Index(i), src.Index(i))
+			}
+		} else {
+			for i := 0; i < src.Len(); i++ {
+				if i >= dst.Len() {
+					dst.Set(reflect.AppendSlice(dst, src.Slice(i, src.Len())))
+					break
+				}
+				min(dst.Index(i), src.Index(i))
+			}
+		}
+	case reflect.Array:
+		for i := 0; i < dst.Len(); i++ {
+			min(dst.Index(i), src.Index(i))
+		}
+	}
+}
+
+func avg(dst, src reflect.Value) {
+	switch k := dst.Kind(); k {
+	case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
+		if dst.Int() != sInvalids[k] && src.Int() != sInvalids[k] {
+			dst.SetInt((dst.Int() + src.Int()) / 2)
+		} else if src.Int() != sInvalids[k] {
+			dst.Set(src)
+		}
+	case reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
+		if dst.Uint() != uInvalids[k] && src.Uint() != uInvalids[k] {
+			dst.SetUint((dst.Uint() + src.Uint()) / 2)
+		} else if src.Uint() != uInvalids[k] {
+			dst.Set(src)
+		}
+	case reflect.Float32, reflect.Float64:
+		if !math.IsNaN(dst.Float()) && !math.IsNaN(src.Float()) {
+			dst.SetFloat((dst.Float() + src.Float()) / 2)
+		} else if !math.IsNaN(src.Float()) {
+			dst.Set(src)
+		}
+	case reflect.Slice:
+		if dst.Len() >= src.Len() {
+			for i := 0; i < dst.Len(); i++ {
+				if i >= src.Len() {
+					break
+				}
+				avg(dst.Index(i), src.Index(i))
+			}
+		} else {
+			for i := 0; i < src.Len(); i++ {
+				if i >= dst.Len() {
+					dst.Set(reflect.AppendSlice(dst, src.Slice(i, src.Len())))
+					break
+				}
+				avg(dst.Index(i), src.Index(i))
+			}
+		}
+	case reflect.Array:
+		for i := 0; i < dst.Len(); i++ {
+			avg(dst.Index(i), src.Index(i))
+		}
+	}
+}
+
+func fill(dst, src reflect.Value) {
+	switch k := dst.Kind(); k {
+	case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
+		if dst.Int() == sInvalids[k] {
+			dst.Set(src)
+		}
+	case reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
+		if dst.Uint() == uInvalids[k] {
+			dst.Set(src)
+		}
+	case reflect.Float32, reflect.Float64:
+		if math.IsNaN(dst.Float()) {
+			dst.Set(src)
+		}
+	case reflect.String:
+		if dst.String() == basetype.StringInvalid || dst.String() == "\x00" {
+			dst.Set(src)
+		}
+	case reflect.Bool:
+		if !dst.Bool() {
+			dst.Set(src)
+		}
+	case reflect.Slice:
+		if dst.Len() >= src.Len() {
+			for i := 0; i < dst.Len(); i++ {
+				if i >= src.Len() {
+					break
+				}
+				fill(dst.Index(i), src.Index(i))
+			}
+		} else {
+			for i := 0; i < src.Len(); i++ {
+				if i >= dst.Len() {
+					dst.Set(reflect.AppendSlice(dst, src.Slice(i, src.Len())))
+					break
+				}
+				fill(dst.Index(i), src.Index(i))
+			}
+		}
+	case reflect.Array:
+		for i := 0; i < dst.Len(); i++ {
+			fill(dst.Index(i), src.Index(i))
+		}
+	case reflect.Struct:
+		if dst.IsZero() && dst.Type() == reflect.TypeOf(time.Time{}) {
+			dst.Set(src)
+		}
+	}
+}