Skip to content

Latest commit

 

History

History
516 lines (467 loc) · 12 KB

survey.md

File metadata and controls

516 lines (467 loc) · 12 KB
title marimo-version width layout_file
Survey
0.9.16
medium
layouts/survey.slides.json
import marimo as mo
import polars as pl
import altair as alt
import random
import datetime
# Various constants
TECHNICAL_COLUMNS = ["StartDate", "EndDate", "Status", "Progress", "Duration (in seconds)", "Finished", "RecordedDate", "ResponseId", "DistributionChannel", "UserLanguage", "GDPR"]

# Helpers
def list_to_md(list_of_things, title=None):
    text = f"## {title} \n" if title is not None else ""
    for thing in list_of_things:
        text += f" - {thing} \n"
    return mo.md(text)

Analysing Q2 group 2024 survey

survey_start = datetime.datetime(2024, 10, 15)

df = pl.read_csv("survey-results.csv").filter(
    pl.col("Status") != '{"ImportId":"status"}').filter(
    pl.col("StartDate").str.to_datetime("%Y-%m-%d %H:%M:%S", strict=False) >= survey_start
)
random.seed(1234) # to get the same lines every run
df.sample(50)

To get a more advanced feeling:

df.select(pl.all().exclude(TECHNICAL_COLUMNS)).describe()
answers = len(df)
finished = len(df.filter(pl.col("Finished") == "True"))
mo.md(f"""## Total answers: {answers}""")
ddd = pl.DataFrame({"Status": ["Unfinished","Finished"],"y": [answers-finished, finished]})
base = alt.Chart(ddd, title="Finished questionnaires ratio").mark_arc().encode(theta="y", color="Status")
pie = base.mark_arc(outerRadius=120)
text = base.mark_text(radius=140, size=20).encode(text="y:N")
pie + text
_chart = (
    alt.Chart(df)
    .transform_aggregate(count="count()", groupby=["b1\.q2"])
    .transform_window(
        rank="rank()",
        sort=[
            alt.SortField("count", order="descending"),
            alt.SortField("b1\.q2", order="ascending"),
        ],
    )
    .transform_filter(alt.datum.rank <= 10)
    .mark_bar()
    .encode(
        y=alt.Y("b1\.q2", type="nominal", sort="-x"),
        x=alt.X("count", type="quantitative"),
    )
    .properties(title="Research Area", width="container")
)
_chart
_chart = (
    alt.Chart(df)
    .transform_aggregate(count="count()", groupby=["b1\.q2_8_TEXT"])
    .transform_window(
        rank="rank()",
        sort=[
            alt.SortField("count", order="descending"),
            alt.SortField("b1\.q2_8_TEXT", order="ascending"),
        ],
    )
    .transform_filter(alt.datum.rank <= 10)
    .mark_bar()
    .encode(
        y=alt.Y("b1\.q2_8_TEXT", type="nominal", sort="-x"),
        x=alt.X("count", type="quantitative"),
    )
    .properties(title="Other research areas", width="container")
)
_chart
insts = df.select(
    pl.col("b1.q3").alias("Institution")
).filter(
    pl.col("Institution").is_not_null()
)

mo.ui.table(insts)
df.select(
    pl.col("b1.q4").str.to_lowercase().alias("Country")
).filter(
    pl.col("Country").is_not_null()
).group_by("Country").agg(
    pl.len().alias("count")
).sort("count", descending=True)
df_ddi_kl = df.select(
    pl.col("b2.q1_1").alias("Codebook"),
    pl.col("b2.q1_2").alias("Lifecycle"),
    pl.col("b2.q1_3").alias("CDI"),
).unpivot(["Codebook", "Lifecycle", "CDI"]).group_by(
    "variable", "value"
).len().sort("variable")

df_ddi_kl

alt.Chart(df_ddi_kl, title="Skills and knowledge for DDI products").mark_bar().encode(
    x="value:O",
    y="len:Q",
    color="value",
    column="variable"
)
mo.md("## Products used")
df_pu = df.select(
    pl.col("b2.q2").alias("Products")
).group_by(
    "Products"
).len().sort("Products", descending=True)
mo.ui.table(df_pu)
def mark(col_name, thing):
    return pl.when(
        pl.col(col_name).str.contains(thing)
    ).then(1).otherwise(0).alias(thing)

def markb2q3(thing):
    return mark("b2.q3", thing)

df.select(
    pl.col("b2.q3").alias("Lifecycle"),
    markb2q3("Concept"),
    markb2q3("Collection"),
    markb2q3("Processing"),
    markb2q3("Distribution"),
    markb2q3("Discovery"),
    markb2q3("Analysis"),
    markb2q3("Repurposing"),
    markb2q3("Archiving")
).unpivot(
    ["Concept", "Collection", "Processing", "Distribution", "Discovery", "Analysis", "Repurposing", "Archiving"]
).group_by(
    "variable"
).agg(
    pl.col("value").sum()
).sort("value", descending=True)
_chart = (
    alt.Chart(df, title="Datasets...")
    .mark_bar()
    .encode(
        y=alt.Y("b2\.q4_1", type="nominal"),
        x=alt.X("count()", type="quantitative"),
    )
    .properties(width="container")
)
_chart
_chart = (
    alt.Chart(df, title="Variables...")
    .mark_bar()
    .encode(
        y=alt.Y("b2\.q4_2", type="nominal"),
        x=alt.X("count()", type="quantitative"),
    )
    .properties(width="container")
)
_chart
_chart = (
    alt.Chart(df, title="Concepts...")
    .mark_bar()
    .encode(
        y=alt.Y("b2\.q4_3", type="nominal"),
        x=alt.X("count()", type="quantitative"),
    )
    .properties(width="container")
)
_chart
_chart = (
    alt.Chart(df, title="Questions wording...")
    .mark_bar()
    .encode(
        y=alt.Y("b2\.q4_4", type="nominal"),
        x=alt.X("count()", type="quantitative"),
    )
    .properties(width="container")
)
_chart
_chart = (
    alt.Chart(df, title="Responses & code lists...")
    .mark_bar()
    .encode(
        y=alt.Y("b2\.q4_5", type="nominal"),
        x=alt.X("count()", type="quantitative"),
    )
    .properties(width="container")
)
_chart
_chart = (
    alt.Chart(df)
    .transform_aggregate(count="count()", groupby=["b2\.q5"])
    .transform_window(
        rank="rank()",
        sort=[
            alt.SortField("count", order="descending"),
            alt.SortField("b2\.q5", order="ascending"),
        ],
    )
    .transform_filter(alt.datum.rank <= 10)
    .mark_bar()
    .encode(
        y=alt.Y("b2\.q5", type="nominal", sort="-x"),
        x=alt.X("count", type="quantitative"),
    )
    .properties(title="Questionnaire documentation with...", width="container")
)
_chart
_chart = (
    alt.Chart(df.filter(pl.col("b2.q6").is_not_null()))
    .transform_aggregate(count="count()", groupby=["b2\.q6"])
    .transform_window(
        rank="rank()",
        sort=[
            alt.SortField("count", order="descending"),
            alt.SortField("b2\.q6", order="ascending"),
        ],
    )
    .transform_filter(alt.datum.rank <= 10)
    .mark_bar()
    .encode(
        y=alt.Y("b2\.q6", type="nominal", sort="-x"),
        x=alt.X("count", type="quantitative"),
    )
    .properties(title="Survey tools", width="container")
)
_chart
_chart = (
    alt.Chart(df)
    .transform_aggregate(count="count()", groupby=["b2\.q6_7_TEXT"])
    .transform_window(
        rank="rank()",
        sort=[
            alt.SortField("count", order="descending"),
            alt.SortField("b2\.q6_7_TEXT", order="ascending"),
        ],
    )
    .transform_filter(alt.datum.rank <= 10)
    .mark_bar()
    .encode(
        y=alt.Y("b2\.q6_7_TEXT", type="nominal", sort="-x"),
        x=alt.X("count", type="quantitative"),
    )
    .properties(title="Other survey tools", width="container")
)
_chart
_chart = (
    alt.Chart(df.filter(pl.col("b2.q7").is_not_null()))
    .transform_aggregate(count="count()", groupby=["b2\.q7"])
    .transform_window(
        rank="rank()",
        sort=[
            alt.SortField("count", order="descending"),
            alt.SortField("b2\.q7", order="ascending"),
        ],
    )
    .transform_filter(alt.datum.rank <= 10)
    .mark_bar()
    .encode(
        y=alt.Y("b2\.q7", type="nominal", sort="-x"),
        x=alt.X("count", type="quantitative"),
    )
    .properties(title="Documentation tools", width="container")
)
_chart
_chart = (
    alt.Chart(df.filter(pl.col("b2.q7_4_TEXT").is_not_null()))
    .mark_bar()
    .encode(
        y=alt.Y("b2\.q7_4_TEXT", type="nominal"),
        x=alt.X("count()", type="quantitative"),
    )
    .properties(title="Internal documentation tools",width="container")
)
_chart
_chart = (
    alt.Chart(df.filter(pl.col("b2.q7_5_TEXT").is_not_null()))
    .mark_bar()
    .encode(
        y=alt.Y("b2\.q7_5_TEXT", type="nominal"),
        x=alt.X("count()", type="quantitative"),
    )
    .properties(title="Other documentation tools ", width="container")
)
_chart
_chart = (
    alt.Chart(df)
    .mark_bar()
    .encode(
        y=alt.Y("b2\.q10", type="nominal"),
        x=alt.X("count()", type="quantitative"),
    )
    .properties(title="DDI satisfaction", width="container")
)
_chart
_chart = (
    alt.Chart(df.filter(pl.col("b2.q11").is_not_null()))
    .transform_aggregate(count="count()", groupby=["b2\.q11"])
    .transform_window(
        rank="rank()",
        sort=[
            alt.SortField("count", order="descending"),
            alt.SortField("b2\.q11", order="ascending"),
        ],
    )
    .transform_filter(alt.datum.rank <= 10)
    .mark_bar()
    .encode(
        y=alt.Y("b2\.q11", type="nominal", sort="-x"),
        x=alt.X("count", type="quantitative"),
    )
    .properties(title="Why?", width="container")
)
_chart
_chart = (
    alt.Chart(df.select(pl.col("b3.q1").alias("b3q1"))
              .filter(pl.col("b3q1").is_not_null()))
    .mark_bar()
    .encode(
        y=alt.Y("b3q1", type="nominal"),
        x=alt.X("count()", type="quantitative"),
    )
    .properties(
        title="Are you planning to document your questionnaires in DDI?", 
        width="container"
    )
)
_chart
qddiyes = df.filter(
    pl.col("b3.q1") == "Yes"
).select(
    pl.col("b3.q2")
).filter(
    pl.col("b3.q2").is_not_null()
).to_series()

list_to_md(qddiyes, "If _yes_, why?")
qddino = df.filter(
    pl.col("b3.q1") == "No"
).select(
    pl.col("b3.q2")
).filter(
    pl.col("b3.q2").is_not_null()
).to_series()

list_to_md(qddino, "If _no_, why?")
qddiimprov = df.select(
    pl.col("b3.q3")
).filter(pl.col("b3.q3").is_not_null()).to_series()

list_to_md(qddiimprov, "DDI improvements for questionnaire")
training = df.select("b4.q1").filter(pl.col("b4.q1").is_not_null()).group_by("b4.q1").len()

mo.ui.table(training)
list_to_md(
    df.select("b4.q2").filter(pl.col("b4.q2").is_not_null()).to_series(),
    "What training?"
)
_chart = (
    alt.Chart(df.filter(pl.col("b4.q3").is_not_null()))
    .mark_bar()
    .encode(
        y=alt.Y("b4\.q3", type="nominal"),
        x=alt.X("count()", type="quantitative"),
    )
    .properties(width="container")
)
_chart
_chart = (
    alt.Chart(df.filter(pl.col("b4.q4").is_not_null()))
    .transform_aggregate(count="count()", groupby=["b4\.q4"])
    .transform_window(
        rank="rank()",
        sort=[
            alt.SortField("count", order="descending"),
            alt.SortField("b4\.q4", order="ascending"),
        ],
    )
    .transform_filter(alt.datum.rank <= 10)
    .mark_bar()
    .encode(
        y=alt.Y("b4\.q4", type="nominal", sort="-x"),
        x=alt.X("count", type="quantitative"),
    )
    .properties(title="Top 10 b4\.q4", width="container")
)
_chart
_chart = (
    alt.Chart(df.filter(pl.col("b4.q5").is_not_null()))
    .transform_aggregate(count="count()", groupby=["b4\.q5"])
    .transform_window(
        rank="rank()",
        sort=[
            alt.SortField("count", order="descending"),
            alt.SortField("b4\.q5", order="ascending"),
        ],
    )
    .transform_filter(alt.datum.rank <= 10)
    .mark_bar()
    .encode(
        y=alt.Y("b4\.q5", type="nominal", sort="-x"),
        x=alt.X("count", type="quantitative"),
    )
    .properties(title="Top 10 b4\.q5", width="container")
)
_chart