[analysis] add with resulting plots

Stunkymonkey · Sep 15, 2021 · 481a395 · 481a395
1 parent 8178790
commit 481a395
Show file tree

Hide file tree

Showing 10 changed files with 1,454 additions and 206 deletions.
diff --git a/analysis/graph-details.ipynb b/analysis/graph-details.ipynb
@@ -35,12 +35,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "EVAL_DIR = \"/home/felix/todo/osm-tmp\"\n",
+    "OUTPUT = \"single\"\n",
+    "EVAL_DIR = \"/home/felix/todo/algohol/single\"\n",
     "MLP_METHODS = [\"kmeans\", \"gonzalez\", \"merge\"]\n",
-    "MLP_LEVELS = [[int(2 ** i)] for i in np.arange(8.0, 13.5, 1.0)]\n",
+    "MLP_LEVELS = [[int(2 ** i)] for i in np.arange(9.0, 12.5, 1.0)]\n",
     "FAST_QUERY_METHODS = [\"pcrp\", \"pch\", \"prp\"]\n",
-    "QUERY_METHODS = [\"normal\", \"bi\"] + FAST_QUERY_METHODS\n",
-    "AREAS = [\"saarland\"]\n",
+    "QUERY_METHODS = [\"normal\"] + FAST_QUERY_METHODS\n",
+    "AREAS = [\"baden-wuerttemberg\"]\n",
     "print(MLP_METHODS, \"with\", MLP_LEVELS)"
    ]
   },
@@ -80,46 +81,41 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "assert((df_graph.groupby([\"amount_edges\"]).size() == 5).all())"
+    "assert((df_graph.groupby([\"amount_edges\"]).size() == len(QUERY_METHODS)).all())"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "amber-cycle",
+   "id": "unauthorized-contest",
    "metadata": {},
    "outputs": [],
    "source": [
-    "fig, ax = plt.subplots()\n",
-    "speedups = list()\n",
-    "for area in AREAS:\n",
-    "    dijkstra = df_graph[(df_graph.Query == \"normal\") & (df_graph.Area == area)][\"amount_used_edges\"].iloc[0]\n",
-    "    bidijkstra = df_graph[(df_graph.Query == \"bi\") & (df_graph.Area == area)][\"amount_used_edges\"].iloc[0]\n",
-    "    for query in FAST_QUERY_METHODS:\n",
-    "        for mlp in MLP_METHODS:\n",
-    "            x = list()\n",
-    "            y = list()\n",
-    "            for partitions in MLP_LEVELS:\n",
-    "                tmp = df_graph[(df_graph.Area == area) & (df_graph.Query == query) & (df_graph.MLP_method == mlp) & (df_graph.Levels == \"_\".join(map(str, partitions)))]\n",
-    "                x.append(partitions[0])\n",
-    "                y.append(tmp[\"amount_used_edges\"])\n",
-    "#                 speedups.append({\"Query\": query, \"MLP\": mlp, \"_\".join(map(str, partitions)): dijkstra / tmp[\"time\"].mean()})\n",
-    "            plt.plot(x, y, marker=plot_get(query), color=plot_get(mlp), label=query + \"-\" + mlp, alpha=0.7)\n",
-    "plt.xlabel(\"MLP-Partition-Size\")\n",
-    "plt.ylabel(\"edges per algorithm\")\n",
-    "ax.set_yscale('log')\n",
-    "plt.legend(loc='upper left')\n",
-    "fig.savefig(\"used-edges-single-level.pgf\", bbox_inches=\"tight\")"
+    "df_graph.groupby([\"MLP_method\", \"Levels\", \"Query\"])[\"amount_used_edges\"].first()"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "unauthorized-contest",
+   "id": "humanitarian-armenia",
    "metadata": {},
    "outputs": [],
    "source": [
-    "df_graph.groupby([\"MLP_method\", \"Levels\", \"Query\"])[\"amount_used_edges\"].first()"
+    "def format_tex(float_number):\n",
+    "#     exponent = np.floor(np.log10(float_number))\n",
+    "    exponent = 6\n",
+    "    mantissa = float_number/10**exponent\n",
+    "    return \"${:0.1f}\\\\times10^{{{:}}}$\".format(float(mantissa), str(int(exponent)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "handy-preservation",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_graph[\"amount_used_edges\"] = pd.to_numeric(df_graph[\"amount_used_edges\"], downcast=\"float\")"
    ]
   },
   {
@@ -133,16 +129,15 @@
     "for area in AREAS:\n",
     "    dijkstra = df_graph[(df_graph.Query == \"normal\") & (df_graph.Area == area)][\"amount_used_edges\"].iloc[0]\n",
     "    print(\"original edge amount:\", dijkstra)\n",
-    "    bidijkstra = df_graph[(df_graph.Query == \"bi\") & (df_graph.Area == area)][\"amount_used_edges\"].iloc[0]\n",
     "    for query in FAST_QUERY_METHODS:\n",
     "        for mlp in MLP_METHODS:\n",
     "            line = dict()\n",
     "            for partitions in MLP_LEVELS:\n",
     "                tmp = df_graph[(df_graph.Area == area) & (df_graph.Query == query) & (df_graph.MLP_method == mlp) & (df_graph.Levels == \"_\".join(map(str, partitions)))]\n",
     "                line[partitions[0]] = tmp[\"amount_used_edges\"].values[0]\n",
     "            df_new = pd.DataFrame([line])\n",
-    "            df_new[\"MLP_method\"] = mlp\n",
-    "            df_new[\"Query\"] = query\n",
+    "            df_new[\"MLP_method\"] = mlp_title(mlp)\n",
+    "            df_new[\"Query\"] = query.upper()\n",
     "            df_table = pd.concat([df_table, df_new], ignore_index=True)"
    ]
   },
@@ -154,7 +149,8 @@
    "outputs": [],
    "source": [
     "df_edges = df_table.groupby([\"Query\", \"MLP_method\"]).first()\n",
-    "latex = df_edges.to_latex(float_format=\"{:0.1f}\".format)\n",
+    "# latex = df_edges.to_latex(float_format=\"{:0.1f}\".format)\n",
+    "latex = df_edges.to_latex(float_format=format_tex, escape=False)\n",
     "df_edges"
    ]
   },
@@ -180,16 +176,162 @@
     "latex_list.insert(len(latex_list)-8, '\\midrule')\n",
     "latex_list.insert(len(latex_list)-5, '\\midrule')\n",
     "latex_new = '\\n'.join(latex_list)\n",
-    "with open(\"edges.tex\", \"w\") as latex_file:\n",
+    "latex_new = latex_new.replace(\"MLP_method\", \"MLP-method\")\n",
+    "with open(OUTPUT + \"-edges.tex\", \"w\") as latex_file:\n",
     "    latex_file.writelines(latex_new)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "approximate-belgium",
+   "metadata": {},
+   "source": [
+    "# level"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "funny-tennis",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "OUTPUT = \"level\"\n",
+    "EVAL_DIR = \"/home/felix/todo/algohol/level\"\n",
+    "MLP_METHODS = [\"merge\"]\n",
+    "MLP_LEVELS = [[int(2 ** i)] for i in np.arange(9.0, 11.5, 1.0)]\n",
+    "MLP_LEVELS = MLP_LEVELS + [[int(2 ** i), 4] for i in np.arange(9.0, 11.5, 1.0)]\n",
+    "MLP_LEVELS = MLP_LEVELS + [[int(2 ** i), 16] for i in np.arange(9.0, 11.5, 1.0)]\n",
+    "MLP_LEVELS = MLP_LEVELS + [[int(2 ** i), 4, 4] for i in np.arange(9.0, 11.5, 1.0)]\n",
+    "FAST_QUERY_METHODS = [\"pcrp\", \"pch\", \"prp\"]\n",
+    "QUERY_METHODS = [\"normal\"] + FAST_QUERY_METHODS\n",
+    "AREAS = [\"baden-wuerttemberg\"]\n",
+    "print(MLP_METHODS, \"with\", MLP_LEVELS)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bottom-bottom",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_graph = pd.DataFrame()\n",
+    "for area in AREAS:\n",
+    "    for mlp_method in MLP_METHODS:\n",
+    "        for level in MLP_LEVELS:\n",
+    "            for query in QUERY_METHODS:\n",
+    "                df_new = pd.read_json(EVAL_DIR + \"/\" + area + \"-\" + mlp_method + \"-\" +  \"_\".join(map(str, level)) + \"-\" + query + \"-info.json\", typ='series')\n",
+    "                df_new = pd.DataFrame([df_new])\n",
+    "                df_new[\"Area\"] = area\n",
+    "                df_new[\"MLP_method\"] = mlp_method\n",
+    "                df_new[\"Levels\"] = \"_\".join(map(str, level))\n",
+    "                df_new[\"Query\"] = query\n",
+    "                df_graph = pd.concat([df_graph, df_new], ignore_index=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "champion-spray",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "assert((df_graph.groupby([\"amount_edges\"]).size() == len(QUERY_METHODS)).all())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "popular-birth",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_graph[\"amount_used_edges\"] = pd.to_numeric(df_graph[\"amount_used_edges\"], downcast=\"float\")"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
    "id": "accessible-courage",
    "metadata": {},
    "outputs": [],
+   "source": [
+    "df_table = pd.DataFrame()\n",
+    "for area in AREAS:\n",
+    "    dijkstra = df_graph[(df_graph.Query == \"normal\") & (df_graph.Area == area)][\"amount_used_edges\"].iloc[0]\n",
+    "    print(\"original edge amount:\", dijkstra)\n",
+    "    for mlp in MLP_METHODS:\n",
+    "        for partitions in MLP_LEVELS:\n",
+    "            line = dict()\n",
+    "            for query in FAST_QUERY_METHODS:\n",
+    "                tmp = df_graph[(df_graph.Area == area) & (df_graph.Query == query) & (df_graph.MLP_method == mlp) & (df_graph.Levels == \"_\".join(map(str, partitions)))]\n",
+    "                line[query.upper()] = tmp[\"amount_used_edges\"].values[0]\n",
+    "            df_new = pd.DataFrame([line])\n",
+    "            df_new[\"partitions\"] = \"_\".join(map(str, partitions))\n",
+    "            df_table = pd.concat([df_table, df_new], ignore_index=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "blind-saint",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def special_hacky_sort(x):\n",
+    "    splited = x.str.split(\"-\", expand=True)\n",
+    "    return pd.DataFrame(splited).astype(float).sum(axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "attached-physics",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_table['partitions'] = df_table['partitions'].str.replace('_','-')\n",
+    "df_edges = df_table.groupby(\"partitions\").first()\n",
+    "df_edges = df_edges.sort_values(by=\"partitions\", key=special_hacky_sort)\n",
+    "latex = df_edges.to_latex(float_format=format_tex, escape=False)\n",
+    "df_edges"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "confidential-decade",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# fixup ugly latex code to have single line header\n",
+    "latex_list = latex.splitlines()\n",
+    "\n",
+    "latex_list[0] = latex_list[0].replace('lr', 'l|r', 1)\n",
+    "\n",
+    "columns = latex_list[2].split(\"&\")\n",
+    "indices = latex_list[3].split(\"&\")\n",
+    "\n",
+    "latex_list[2] = \" & \\multicolumn{\" + str(len(FAST_QUERY_METHODS)) + \"}{c}{Dijkstra-Query} \\\\\\\\\"\n",
+    "\n",
+    "latex_list[3] = \"&\".join(indices[:1] + columns[1:])\n",
+    "\n",
+    "\n",
+    "latex_list.insert(len(latex_list)-10, '\\midrule')\n",
+    "latex_list.insert(len(latex_list)-6, '\\midrule')\n",
+    "latex_new = '\\n'.join(latex_list)\n",
+    "\n",
+    "with open(OUTPUT + \"-edges.tex\", \"w\") as latex_file:\n",
+    "    latex_file.writelines(latex_new)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fresh-nitrogen",
+   "metadata": {},
+   "outputs": [],
    "source": []
   }
  ],
@@ -209,7 +351,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.9"
+   "version": "3.8.11"
   }
  },
  "nbformat": 4,

diff --git a/analysis/helper.py b/analysis/helper.py
@@ -14,22 +14,43 @@
 markers = ['s', 'X', '*', '+', 'o', '^']
 
 identifiert = dict()
+mapping_colors = dict()
 
 for method, color in zip(MLP_METHODS, colors):
     identifiert[method] = color
 
 for method, marker in zip(QUERY_METHODS, markers):
     identifiert[method] = marker
 
+mapping_colors["pch"] = "kmeans"
+mapping_colors["pcrp"] = "gonzalez"
+mapping_colors["prp"] = "merge"
+
+TEXT_WIDTH = 426.0
+
 
 def ns_to_ms(value):
     return value / 1e6
 
 
+def sec_to_min(value):
+    return value / 60
+
+
 def plot_get(method):
     return identifiert[method]
 
 
+def plot_color_get(method):
+    return identifiert[mapping_colors[method]]
+
+
+def mlp_title(method):
+    if method == "kmeans":
+        return "K-means"
+    return method.title()
+
+
 def shell_execute(command, EVAL_DIR):
     start_time = time.time()
     result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
@@ -75,6 +96,38 @@ def not_created_yet(file, EVAL_DIR):
         return False
 
 
+def set_pgf_size(width_pt, fraction=1, subplots=(1, 1)):
+    """Set figure dimensions to sit nicely in our document.
+
+    Parameters
+    ----------
+    width_pt: float
+            Document width in points
+    fraction: float, optional
+            Fraction of the width which you wish the figure to occupy
+    subplots: array-like, optional
+            The number of rows and columns of subplots.
+    Returns
+    -------
+    fig_dim: tuple
+            Dimensions of figure in inches
+    """
+    # Width of figure (in pts)
+    fig_width_pt = width_pt * fraction
+    # Convert from pt to inches
+    inches_per_pt = 1 / 72.27
+
+    # Golden ratio to set aesthetic figure height
+    golden_ratio = (5**.5 - 1) / 2
+
+    # Figure width in inches
+    fig_width_in = fig_width_pt * inches_per_pt
+    # Figure height in inches
+    fig_height_in = fig_width_in * golden_ratio * (subplots[0] / subplots[1])
+
+    return (fig_width_in, fig_height_in)
+
+
 def main():
     pass