Skip to content

Commit

Permalink
Add back the waterfall chart with several improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
dyang415 committed Oct 7, 2023
1 parent 4fd4b5c commit 02e807d
Show file tree
Hide file tree
Showing 8 changed files with 588 additions and 67 deletions.
28 changes: 27 additions & 1 deletion backend/app/insight/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from app.insight.datasource.bqMetrics import BqMetrics
from app.insight.services.insight_builders import DFBasedInsightBuilder
from app.insight.services.metrics import AggregateMethod, SingleColumnMetric, DualColumnMetric, CombineMethod, DimensionValuePair, Filter
from app.insight.services.segment_insight_builder import get_related_segments, get_segment_insight
from app.insight.services.segment_insight_builder import get_related_segments, get_segment_insight, get_waterfall_insight
from app.insight.services.utils import load_df_from_csv


Expand Down Expand Up @@ -175,6 +175,32 @@ def get_related_segments(self):
)
)

@expose('file/waterfall-insight', methods=['POST'])
def get_waterfall_insight(self):
def _build_dimension_value_pair(segment_key):
return [DimensionValuePair(key_component['dimension'], key_component['value']) for key_component in segment_key]

data = request.get_json()

(baseline_start, baseline_end, comparison_start, comparison_end, date_column, date_column_type) = self.parse_date_info(data)
metric_column = data['metricColumn']
metric = self.parse_metrics(metric_column)
filters = self.parse_filters(data)

file_id = data['fileId']
logger.info('Reading file')
df = load_df_from_csv(f'/tmp/dsensei/{file_id}', date_column) \
.with_columns(pl.col(date_column).cast(pl.Utf8).str.slice(0, 10).str.to_date().alias("date"))

return orjson.dumps(get_waterfall_insight(
df,
(baseline_start, baseline_end),
(comparison_start, comparison_end),
[_build_dimension_value_pair(segment_key) for segment_key in data['segmentKeys']],
metric,
filters
))

@expose('file/metric', methods=['POST'])
def get_insight(self):
data = request.get_json()
Expand Down
38 changes: 38 additions & 0 deletions backend/app/insight/services/segment_insight_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,44 @@ def get_related_segments(
}


def get_waterfall_insight(
df: pl.DataFrame,
baseline_date_range: Tuple[datetime.date, datetime.date],
comparison_date_range: Tuple[datetime.date, datetime.date],
segment_keys: list[list[DimensionValuePair]],
metric: Metric,
filters: list[Filter],
):
df = df.filter(get_filter_expression(filters))

result = {}
for segment_key in segment_keys:
filtering_clause = pl.lit(True)
for sub_key in segment_key:
filtering_clause = filtering_clause & (pl.col(
sub_key.dimension).cast(str).eq(pl.lit(sub_key.value)))

current_df = df.filter(filtering_clause)

dimensions = [sub_key.dimension for sub_key in segment_key]
baseline_df = build_base_df(current_df, baseline_date_range, dimensions, [metric])
baseline_value = baseline_df.row(0, named=True)[metric.get_id()] if len(baseline_df.rows()) > 0 else 0

comparison_df = build_base_df(current_df, comparison_date_range, dimensions, [metric])
comparison_value = comparison_df.row(0, named=True)[metric.get_id()] if len(comparison_df.rows()) > 0 else 0

serialized_key = "|".join([
f"{sub_key.dimension}:{sub_key.value}" for sub_key in segment_key
])
result[serialized_key] = {
"changeWithNoOverlap": comparison_value - baseline_value
}

df = df.filter(filtering_clause.is_not())

return result


def map_to_segment_info(row, baseline_count: int, comparison_count: int, metric: Metric):
values = row["dimension_value"]
dimensions = row['dimension_name']
Expand Down
6 changes: 4 additions & 2 deletions backend/app/insight/services/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,13 @@ def get_filter_expression(filters: list[Filter]) -> Expr:
return filter_expr


def load_df_from_csv(path: str):
def load_df_from_csv(path: str, date_column: str = None):
df = pl.read_csv(path, try_parse_dates=True)
for column_and_d_type in zip(df.columns, df.dtypes):
[column, d_type] = column_and_d_type
if date_column is not None and column != date_column:
continue

if d_type == pl.Utf8:
non_null_count = df.filter(pl.col(column).str.lengths().gt(0) & pl.col(column).is_not_null()).select(pl.col(column).count()).row(0)[0]
if non_null_count > 0:
Expand All @@ -96,6 +99,5 @@ def load_df_from_csv(path: str):
pl.col(column).str.to_date("%-m/%-d/%y %k:%M").alias(column)
)
except:
print(column)
pass
return df
42 changes: 37 additions & 5 deletions frontend/src/components/main-dashboard/MainDashboard.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,13 @@ import { MetricOverviewTable } from "./MetricOverviewTable";
import { SidebarReportConfig } from "./SidebarReportConfig";
import TopDimensionSlicesTable from "./TopDimensionSlicesTable";
import { DimensionSliceDetailModal } from "./dimention-slice-detail-modal/DimentionSliceDetailModal";
import { WaterfallPanel } from "./waterfall/WaterfallPanel";

enum ReportingType {
OVERVIEW,
TOP_SEGMENTS,
SEGMENTS_BY_DIMENSIONS,
WATERFALL,
}

interface RouterState {
Expand Down Expand Up @@ -111,8 +113,8 @@ export default function MainDashboard() {
tableRowStatusByDimension,
groupRows,
isLoading,
error,
waterfallRows,
error,
} = useSelector((state: RootState) => state.comparisonInsight);

const navigate = useNavigate();
Expand Down Expand Up @@ -204,6 +206,19 @@ export default function MainDashboard() {
key="segments-by-dimensions"
/>,
];
if (!metricColumn.ratioMetric) {
reportMenuElements.push(
<SidebarElement
text="Waterfall Chart"
icon={Squares2X2Icon}
isSelected={reportingType === ReportingType.WATERFALL}
onClick={() => {
setReportingType(ReportingType.WATERFALL);
}}
key="segments-by-dimensions"
/>
);
}
}
}

Expand Down Expand Up @@ -588,13 +603,29 @@ export default function MainDashboard() {
showCalculationMode={true}
targetDirection={targetDirection}
onReRunOnSegment={onReRunOnSegment}
showSensitiveControl={true}
/>
</Flex>
)}
{reportingType === ReportingType.WATERFALL && (
<Flex className="gap-y-4 pt-10" flexDirection="col">
<Title>Waterfall View of Top Segments</Title>
<Divider />
<WaterfallPanel
waterfallRows={waterfallRows}
metric={analyzingMetrics}
targetDirection="increasing"
fileId={fileId!}
filters={filters}
dateColumn={dateColumn}
metricColumn={metricColumn}
baseDateRange={dateRangeData.baseDateRangeData.range}
comparisonDateRange={dateRangeData.comparisonDateRangeData.range}
dataSourceType={dataSourceType}
onReRunOnSegment={onReRunOnSegment}
/>
</Flex>
)}
{/* <WaterfallPanel
waterfallRows={waterfallRows}
metric={analyzingMetrics}
/> */}
{reportingType === ReportingType.SEGMENTS_BY_DIMENSIONS && (
<Flex className="gap-y-4 pt-10" flexDirection="col">
<Title>Top Segments by Dimension</Title>
Expand All @@ -610,6 +641,7 @@ export default function MainDashboard() {
maxDefaultRows={5}
showDimensionSelector={false}
showCalculationMode={false}
showSensitiveControl={false}
title={
<>
<Title>Dimension: {dimension.name}</Title>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ type Props = {
showCalculationMode: boolean;
targetDirection: TargetDirection;
onReRunOnSegment: (key: DimensionSliceKey) => void;
showSensitiveControl: boolean;
};

export default function TopDimensionSlicesTable({
Expand All @@ -54,6 +55,7 @@ export default function TopDimensionSlicesTable({
showCalculationMode,
targetDirection,
onReRunOnSegment,
showSensitiveControl,
}: Props) {
const dispatch = useDispatch();
const [isCollapsed, setIsCollapse] = useState(true);
Expand Down Expand Up @@ -124,7 +126,7 @@ export default function TopDimensionSlicesTable({
/>
</>
)}
{mode === "outlier" && (
{mode === "outlier" && showSensitiveControl && (
<>
<Text>Sensitivity:</Text>
<Select
Expand Down
Loading

0 comments on commit 02e807d

Please sign in to comment.