-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
101 lines (78 loc) · 3.12 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import pandas as pd
import streamlit as st
from io import StringIO
import zipfile
import plotly.express as px
# Local path to the zip file
zip_file_path = "data.zip"
# Streamlit UI components
st.title("Price Evolution Analysis")
st.write("Select the period of interest, time range, and sources:")
# Load data
@st.cache_data
def load_data(zip_file_path, chunk_size=10000):
chunks = []
# Open the local zip file
with zipfile.ZipFile(zip_file_path, "r") as zip_file:
# Extract the CSV file from the zip
with zip_file.open("data.csv") as csv_file:
csv_content = StringIO(csv_file.read().decode("utf-8"))
for chunk in pd.read_csv(csv_content, chunksize=chunk_size):
# Convert the Block Timestamp to datetime format
chunk["Block Timestamp"] = pd.to_datetime(chunk["Block Timestamp"])
chunk["Price"] = chunk["Price"] / 100000000
# Convert Volume to numeric, replacing non-numeric values with NaN
chunk["Volume"] = pd.to_numeric(chunk["Volume"], errors="coerce")
# Filter to keep only timestamps with at least 5 sources
chunk = chunk.groupby("Block Timestamp").filter(lambda x: len(x) >= 5)
chunks.append(chunk)
data = pd.concat(chunks, ignore_index=True)
median_data = data.groupby("Block Timestamp")["Price"].median().reset_index()
median_data["Source"] = "Median"
data = pd.concat([data, median_data], ignore_index=True)
return data
data = load_data(zip_file_path)
# Select date range
start_date = st.date_input("Start date", data["Block Timestamp"].min().date())
end_date = st.date_input("End date", data["Block Timestamp"].max().date())
# Select hourly range
start_hour = st.slider("Start hour", 0, 23, 0)
end_hour = st.slider("End hour", 0, 23, 23)
# Select sources
all_sources = data["Source"].unique()
selected_sources = st.multiselect("Select sources", all_sources, default=all_sources)
if start_date > end_date:
st.error("Error: End date must fall after start date.")
else:
# Filter data by date, hour, and selected sources
filtered_data = data[
(data["Block Timestamp"].dt.date >= start_date)
& (data["Block Timestamp"].dt.date <= end_date)
& (data["Block Timestamp"].dt.hour >= start_hour)
& (data["Block Timestamp"].dt.hour <= end_hour)
& (data["Source"].isin(selected_sources))
]
# Plotting using Plotly
fig = px.line(
filtered_data,
x="Block Timestamp",
y="Price",
color="Source",
title="Price Evolution by Source",
)
fig.update_layout(
xaxis_title="Timestamp", yaxis_title="Price (Divided by 100,000,000)"
)
# Display the plot using Streamlit
st.plotly_chart(fig, use_container_width=True)
# Add a data table below the chart
st.write("### Data Table")
st.dataframe(filtered_data)
# Add download button for CSV
csv = filtered_data.to_csv(index=False)
st.download_button(
label="Download data as CSV",
data=csv,
file_name="price_evolution_data.csv",
mime="text/csv",
)