Skip to content

Commit

Permalink
Merge pull request #85 from adamtornhill/84-support-custom-temporal-p…
Browse files Browse the repository at this point in the history
…eriod

Support custom temporal period
  • Loading branch information
adamtornhill authored Oct 24, 2022
2 parents c8ffe12 + 4829229 commit 4297037
Show file tree
Hide file tree
Showing 6 changed files with 213 additions and 71 deletions.
1 change: 1 addition & 0 deletions project.clj
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
[clj-time "0.9.0"]
[org.clojure/math.numeric-tower "0.0.4"]
[org.clojure/math.combinatorics "0.1.1"]
[medley "1.4.0"]
[semantic-csv "0.2.1-alpha1"]
[instaparse "1.4.1"]]
:main code-maat.cmd-line
Expand Down
7 changes: 4 additions & 3 deletions src/code_maat/app/app.clj
Original file line number Diff line number Diff line change
Expand Up @@ -176,9 +176,10 @@
the given temporal period. Allows the user to treat
all commits during one day as a single, logical change set.
NOTE: will probably not work with author's analyses!!!"
[options commits]
(if-let [time-period (:temporal-period options)]
(time-grouper/run commits time-period)
[{:keys [temporal-period] :as options}
commits]
(if temporal-period
(time-grouper/by-time-period commits options)
commits))

(defn- aggregate-authors-in-teams
Expand Down
139 changes: 115 additions & 24 deletions src/code_maat/app/time_based_grouper.clj
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
;;; Copyright (C) 2014 Adam Tornhill
;;;

(ns code-maat.app.time-based-grouper)
(ns code-maat.app.time-based-grouper
(:require [clj-time.core :as t]
[clj-time.format :as tf]
[clj-time.periodic :as time-period]
[clj-time.core :as tc]
[medley.core :as m]))

;;; Sometimes we'd like to use a different temporal window than
;;; the commit. For example, when multiple teams are involved
Expand All @@ -11,30 +16,116 @@
;;; To remove these biases we use this module to re-group all
;;; changes according to a given time window before analysis.
;;;
;;; LIMITATION: At the moment we only support grouping commits that
;;; occour within the same day. This is because I could implement
;;; that aggregation easily. I plan to extend Code Maat with
;;; support for arbitrary temporal windows.

(defn- date-as-commit-id
[commit]
(let [date (:date commit)]
(update-in commit [:rev] (fn [_old] date))))

(defn- throw-on-invalid
[time-period]
(when (not (= "1" time-period)) ; Let's support more in the future...
(throw
(IllegalArgumentException.
(str "Invalid time-period: the current version only supports one (1) day")))))

(defn run
;;; Grouping commits by time involves a sliding window over the
;;; original commits. This means that logically, the same physical commit
;;; can be counted multiple times since it overlaps with several slides
;;; of the window. This works well for change coupling but not hotspots.
;;; Hence, the validation ensures it's a supported analysis before
;;; applying the filter.

(defn- string->date
[s]
(tf/parse (tf/formatters :year-month-day) s))

(defn date->string
[d]
(tf/unparse (tf/formatters :year-month-day) d))

(defn- date-of
[cs]
(some-> cs first :date string->date))

(defn- daily-dates-between
"Create a range of DateTime objects where each date represens one day."
[start end]
(let [feeding-range (time-period/periodic-seq start (tc/days 1))
end-condition-date (tc/plus end (tc/days 1))
full-range? (fn [current-date] (t/before? current-date end-condition-date))]
(take-while full-range? feeding-range)))

(defn- pad-commits-to-complete-time-series
"There are probably many days which don't have any commits.
This functions pads up those days with empty commit sets. That way, we can
partition over the sequence and easily create the sliding window commit set."
[commits]
(let [commits-ascending (sort-by :date commits)
first-commit-date (date-of commits-ascending)
last-commit-date (date-of (reverse commits-ascending))
commits-on-non-active-days []]
(reduce (fn [acc date-in-range]
(let [as-date (date->string date-in-range)
commits-on-day (get acc as-date commits-on-non-active-days)]
(assoc acc as-date commits-on-day)))
(group-by :date commits)
(daily-dates-between first-commit-date last-commit-date))))

(defn- drop-date-key
"We used group-by to get commits by date. Now, drop the key so that
only the commits remain."
[grouped-commits]
(map second grouped-commits))

(defn- remove-empty-windows
"Not all dates have commit activity."
[commits-within-sliding-windows]
(remove (fn [cs]
(every? empty? cs))
commits-within-sliding-windows))

(defn- adjust-revision-to
"The edge case is that the same file should only be included once, so
let's filter out duplicates."
[new-rev cs]
(->> cs
(map (fn [c]
(assoc c :rev new-rev)))
(m/distinct-by :entity)))

(defn- combine-commits-to-logical-changesets
[commits-within-sliding-windows]
(mapcat (fn [commits-in-window]
(let [cs (reduce (partial into) commits-in-window)
latest-day (->> cs (sort-by :date) reverse first :date)]
(adjust-revision-to latest-day cs)))
commits-within-sliding-windows))

(defn- combine-sliding-commits
"After partitioning commits according to the sliding window, we
need to deliver a flat sequence where each commit group in the window
represents a logical commitset."
[commits-within-sliding-windows]
(->> commits-within-sliding-windows
remove-empty-windows
combine-commits-to-logical-changesets))

(defn- partition-commits-into-sliding-periods-of
[time-period padded-cs]
(->> padded-cs
(sort-by first)
drop-date-key
(partition time-period 1)))

(defn- commits->sliding-window-seq
[time-period cs]
(->> cs
pad-commits-to-complete-time-series
(partition-commits-into-sliding-periods-of time-period)
combine-sliding-commits))

(defn- validated-time-period-from
[{:keys [temporal-period] :as _options}]
(if (re-matches #"\d+" temporal-period)
(int (Double/parseDouble temporal-period))
(throw (IllegalArgumentException.
(str "Invalid time-period: the given value '" temporal-period "' is not an integer.")))))

(defn by-time-period
"Alright, this is a hack: we just set the commit ID to
the current date. That makes the rest of the analyses treat
our faked grouping as beloning to the same change set."
([raw-data]
(run raw-data "1"))
([raw-data time-period]
(throw-on-invalid time-period)
(map date-as-commit-id raw-data)))
[cs options]
(let [time-period (validated-time-period-from options)]
(if (seq cs)
(commits->sliding-window-seq time-period cs)
cs)))

2 changes: 1 addition & 1 deletion src/code_maat/cmd_line.clj
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
:default 30 :parse-fn #(Integer/parseInt %)]
["-e" "--expression-to-match MATCH-EXPRESSION" "A regex to match against commit messages. Used with -messages analyses"]
["-t" "--temporal-period TEMPORAL-PERIOD"
"Instructs Code Maat to consider all commits during the same day as a single, logical commit"]
"Used for coupling analyses. Instructs Code Maat to consider all commits during the rolling temporal period as a single, logical commit set"]
["-d" "--age-time-now AGE-TIME_NOW" "Specify a date as YYYY-MM-dd that counts as time zero when doing a code age analysis"]
["-h" "--help"]])

Expand Down
44 changes: 44 additions & 0 deletions test/code_maat/app/time_based_end_to_end_test.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
;;; Copyright (C) 2014 Adam Tornhill
;;;

(ns code-maat.app.time-based-end-to-end-test
(:require [code-maat.app.app :as app])
(:use [clojure.test]
[code-maat.tools.test-tools]))

;;; End-to-end tests to simulate a time-based analysis.
;;;
;;; The test data contains two commits done the same day.
;;; With the default options we'll treat them as separate.
;;; In a time-base analysis we consider them as a logical
;;; part of the same work.

(def ^:const log-file "./test/code_maat/app/day_coupled_entities_git.txt")

(def ^:const csv-options
{:version-control "git"
:analysis "coupling"
:min-revs 1
:min-shared-revs 1
:min-coupling 10
:max-coupling 100
:max-changeset-size 10})

(def ^:const csv-options-for-time-based
(merge csv-options {:temporal-period "1"}))

(deftest only-calculates-coupling-within-same-commit-by-default
(is (= (run-with-str-output log-file csv-options)
"entity,coupled,degree,average-revs\n/Infrastrucure/Network/Connection.cs,/Presentation/Status/ClientPresenter.cs,100,1\n")))

(deftest calculates-coupling-within-same-day
(is (= (run-with-str-output log-file csv-options-for-time-based)
"entity,coupled,degree,average-revs\n/Infrastrucure/Network/Connection.cs,/Presentation/Status/ClientPresenter.cs,100,1\n/Infrastrucure/Network/Connection.cs,/Infrastrucure/Network/TcpConnection.cs,100,1\n/Infrastrucure/Network/TcpConnection.cs,/Presentation/Status/ClientPresenter.cs,100,1\n")))

(def ^:const options-with-invalid-time-period
(merge csv-options {:temporal-period "not a number"}))

(deftest throws-on-unsupported-time-periods
"We hope to support more options in the future."
(is (thrown? IllegalArgumentException
(run-with-str-output log-file options-with-invalid-time-period))))
91 changes: 48 additions & 43 deletions test/code_maat/app/time_based_grouper_test.clj
Original file line number Diff line number Diff line change
@@ -1,44 +1,49 @@
;;; Copyright (C) 2014 Adam Tornhill
;;;

(ns code-maat.app.time-based-grouper-test
(:require [code-maat.app.app :as app])
(:use [clojure.test]
[code-maat.tools.test-tools]))

;;; End-to-end tests to simulate a time-based analysis.
;;;
;;; The test data contains two commits done the same day.
;;; With the default options we'll treat them as separate.
;;; In a time-base analysis we consider them as a logical
;;; part of the same work.

(def ^:const log-file "./test/code_maat/app/day_coupled_entities_git.txt")

(def ^:const csv-options
{:version-control "git"
:analysis "coupling"
:min-revs 1
:min-shared-revs 1
:min-coupling 10
:max-coupling 100
:max-changeset-size 10})

(def ^:const csv-options-for-time-based
(merge csv-options {:temporal-period "1"}))

(deftest only-calculates-coupling-within-same-commit-by-default
(is (= (run-with-str-output log-file csv-options)
"entity,coupled,degree,average-revs\n/Infrastrucure/Network/Connection.cs,/Presentation/Status/ClientPresenter.cs,100,1\n")))

(deftest calculates-coupling-within-same-day
(is (= (run-with-str-output log-file csv-options-for-time-based)
"entity,coupled,degree,average-revs\n/Infrastrucure/Network/Connection.cs,/Presentation/Status/ClientPresenter.cs,100,1\n/Infrastrucure/Network/Connection.cs,/Infrastrucure/Network/TcpConnection.cs,100,1\n/Infrastrucure/Network/TcpConnection.cs,/Presentation/Status/ClientPresenter.cs,100,1\n")))

(def ^:const options-with-invalid-time-period
(merge csv-options {:temporal-period "2"}))

(deftest throws-on-unsupported-time-periods
"We hope to support more options in the future."
(is (thrown? IllegalArgumentException
(run-with-str-output log-file options-with-invalid-time-period))))
(:require [code-maat.app.time-based-grouper :as grouper])
(:use [clojure.test]))

(deftest commits-by-day
(testing "Expect a non-modifying operation"
(let [input-commits [{:entity "A" :rev 1 :date "2022-10-20"}
{:entity "B" :rev 2 :date "2022-10-20"}]]
(is (= [{:date "2022-10-20" :entity "A" :rev "2022-10-20"}
{:date "2022-10-20" :entity "B" :rev "2022-10-20"}]
(grouper/by-time-period input-commits {:temporal-period "1"}))))))

(deftest multiple-days-give-a-rolling-dataset
(let [input-commits [{:entity "A" :rev 1 :date "2022-10-20"}
{:entity "B" :rev 2 :date "2022-10-20"}

{:entity "B" :rev 3 :date "2022-10-19"} ; double entry, two B's when looking at last two days
{:entity "D" :rev 3 :date "2022-10-19"}

{:entity "C" :rev 4 :date "2022-10-18"}
{:entity "D" :rev 4 :date "2022-10-18"}

{:entity "D" :rev 5 :date "2022-10-15"}]] ; a gap in days between the commits
(is (= [
; Only commits on 2022-10-15, not on subsequent day:
{:date "2022-10-15" :entity "D" :rev "2022-10-15"}

; 17-18th
{:date "2022-10-18" :entity "C" :rev "2022-10-18"}
{:date "2022-10-18" :entity "D" :rev "2022-10-18"}

; 18-19th
{:date "2022-10-18" :entity "C" :rev "2022-10-19"}
{:date "2022-10-18" :entity "D" :rev "2022-10-19"}
{:date "2022-10-19" :entity "B" :rev "2022-10-19"}

; 19-20th
{:date "2022-10-19" :entity "B" :rev "2022-10-20"}
{:date "2022-10-19" :entity "D" :rev "2022-10-20"}
{:date "2022-10-20" :entity "A" :rev "2022-10-20"}]
(grouper/by-time-period input-commits {:temporal-period "2"})))))

(deftest edge-cases
(testing "Works on an empty input sequence, ie. no commits"
(is (= []
(grouper/by-time-period [] {:temporal-period "2"}))))
(testing "Works on a single commit"
(is (= [{:date "2022-10-19" :entity "B" :rev "2022-10-19"}]
(grouper/by-time-period [{:entity "B" :rev 3 :date "2022-10-19"}] {:temporal-period "1"})))))

0 comments on commit 4297037

Please sign in to comment.