diff --git a/neuropathy_preprocessing.ipynb b/neuropathy_preprocessing.ipynb index 7ffe3db..242899e 100644 --- a/neuropathy_preprocessing.ipynb +++ b/neuropathy_preprocessing.ipynb @@ -55,13 +55,13 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Lm5wc13ZTKPj", - "outputId": "c80cdef9-e877-4913-9fd2-60e3a4b005c0" + "outputId": "f7ed2614-caa6-43fd-99ca-17e6ef55645f" }, "outputs": [ { @@ -69,34 +69,34 @@ "name": "stdout", "text": [ "Requirement already satisfied: wfdb in /usr/local/lib/python3.7/dist-packages (3.4.1)\n", - "Requirement already satisfied: pandas>=0.17.0 in /usr/local/lib/python3.7/dist-packages (from wfdb) (1.3.5)\n", - "Requirement already satisfied: scipy>=0.17.0 in /usr/local/lib/python3.7/dist-packages (from wfdb) (1.4.1)\n", - "Requirement already satisfied: numpy>=1.10.1 in /usr/local/lib/python3.7/dist-packages (from wfdb) (1.21.6)\n", "Requirement already satisfied: matplotlib>=3.3.4 in /usr/local/lib/python3.7/dist-packages (from wfdb) (3.5.2)\n", + "Requirement already satisfied: numpy>=1.10.1 in /usr/local/lib/python3.7/dist-packages (from wfdb) (1.21.6)\n", "Requirement already satisfied: requests>=2.8.1 in /usr/local/lib/python3.7/dist-packages (from wfdb) (2.23.0)\n", - "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.7/dist-packages (from matplotlib>=3.3.4->wfdb) (4.33.3)\n", - "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.7/dist-packages (from matplotlib>=3.3.4->wfdb) (21.3)\n", - "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.7/dist-packages (from matplotlib>=3.3.4->wfdb) (2.8.2)\n", - "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.7/dist-packages (from matplotlib>=3.3.4->wfdb) (0.11.0)\n", + "Requirement already satisfied: pandas>=0.17.0 in /usr/local/lib/python3.7/dist-packages (from wfdb) (1.3.5)\n", + "Requirement already satisfied: scipy>=0.17.0 in /usr/local/lib/python3.7/dist-packages (from wfdb) (1.4.1)\n", "Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.7/dist-packages (from matplotlib>=3.3.4->wfdb) (7.1.2)\n", "Requirement already satisfied: pyparsing>=2.2.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib>=3.3.4->wfdb) (3.0.9)\n", + "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.7/dist-packages (from matplotlib>=3.3.4->wfdb) (2.8.2)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.7/dist-packages (from matplotlib>=3.3.4->wfdb) (21.3)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.7/dist-packages (from matplotlib>=3.3.4->wfdb) (4.33.3)\n", "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib>=3.3.4->wfdb) (1.4.2)\n", + "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.7/dist-packages (from matplotlib>=3.3.4->wfdb) (0.11.0)\n", "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from kiwisolver>=1.0.1->matplotlib>=3.3.4->wfdb) (4.2.0)\n", "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=0.17.0->wfdb) (2022.1)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7->matplotlib>=3.3.4->wfdb) (1.15.0)\n", "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests>=2.8.1->wfdb) (2.10)\n", + "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests>=2.8.1->wfdb) (1.24.3)\n", "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests>=2.8.1->wfdb) (3.0.4)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests>=2.8.1->wfdb) (2021.10.8)\n", - "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests>=2.8.1->wfdb) (1.24.3)\n", "Requirement already satisfied: matplotlib in /usr/local/lib/python3.7/dist-packages (3.5.2)\n", - "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib) (1.4.2)\n", - "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from matplotlib) (1.21.6)\n", "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.7/dist-packages (from matplotlib) (21.3)\n", - "Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.7/dist-packages (from matplotlib) (7.1.2)\n", - "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.7/dist-packages (from matplotlib) (4.33.3)\n", - "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.7/dist-packages (from matplotlib) (0.11.0)\n", - "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.7/dist-packages (from matplotlib) (2.8.2)\n", + "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from matplotlib) (1.21.6)\n", "Requirement already satisfied: pyparsing>=2.2.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib) (3.0.9)\n", + "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.7/dist-packages (from matplotlib) (2.8.2)\n", + "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.7/dist-packages (from matplotlib) (0.11.0)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.7/dist-packages (from matplotlib) (4.33.3)\n", + "Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.7/dist-packages (from matplotlib) (7.1.2)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib) (1.4.2)\n", "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from kiwisolver>=1.0.1->matplotlib) (4.2.0)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7->matplotlib) (1.15.0)\n" ] @@ -134,18 +134,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "rnRS891LX1NQ", - "outputId": "9f2dcca5-3579-488d-e4ec-9c4364d5c517" + "outputId": "33798faf-c8fd-4c2b-e2f2-ea11f9655664" }, "outputs": [ { - "name": "stdout", "output_type": "stream", + "name": "stdout", "text": [ "Mounted at /content/ecg_data\n", "/content/ecg_data/MyDrive/ecg_ai\n" @@ -235,7 +235,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": { "id": "yRanatQY3q2o" }, @@ -669,13 +669,561 @@ " " ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "N0XJY5aM01hH" + }, + "source": [ + "## 3. Repeat for randomsplit samples" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "Pp7UnU457HU3" + }, + "outputs": [], + "source": [ + "# Apply function to all .hea files:\n", + "ecg_root = '/content/ecg_data/MyDrive/ecg_ai/ECG_DDA/ecg_wfdb_randomsplit/'\n", + "\n", + "rand_hea_list = [val for sublist in [[os.path.join(i[0], j) for j in i[2] if j.endswith('.hea')] for i in os.walk(ecg_root)] for val in sublist]\n", + "\n", + "\n", + "rand_hea_list_2 = list()\n", + "\n", + "# Remove filename endings and feed to function:\n", + "for filename in rand_hea_list:\n", + " name = filename.split('.')[0]\n", + " rand_hea_list_2.append(name)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "3a95aa2d-6020-419d-8029-9ba0c900f973", + "id": "6Vuo6Uzj7S92" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n", + "Removing year from line 1\n" + ] + } + ], + "source": [ + "for files in rand_hea_list_2:\n", + " remove_year_from_hea(files)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "Dlbiuv2i7oyk" + }, + "outputs": [], + "source": [ + "ecg_root = '/content/ecg_data/MyDrive/ecg_ai/ECG_DDA/ecg_wfdb_randomsplit/'\n", + "\n", + "\n", + "rand_lst = [val for sublist in [[os.path.join(i[0], j) for j in i[2] if j.endswith(('.hea','.dat'))] for i in os.walk(ecg_root)] for val in sublist]\n", + "\n", + "rand_lst2 = []\n", + "\n", + "for filename in rand_lst:\n", + " name = filename.split('.')[0]\n", + " rand_lst2.append(name)\n", + "\n", + "# Get unique list, sorted for keep indices reproducible in case of crashes\n", + "rand_lst3 = sorted(list(set(rand_lst2))) \n", + "\n", + "# Remove image files (in case some splitting to images has been already done)\n", + "rand_lst3 = [x for x in rand_lst3 if x.endswith('ECG')]\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "wTFgV-y073x9", + "outputId": "5603669c-bf8a-4e28-c9c6-207371dcc45f" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "index: 42\n", + "/content/ecg_data/MyDrive/ecg_ai/ECG_DDA/ecg_wfdb_randomsplit/valid/neuropathy/S0381ECG\n", + "How many 10 second measurement intervals: 139.47840000000002\n", + "140\n", + "1\n", + "2\n", + "3\n", + "4\n", + "5\n", + "6\n", + "7\n", + "8\n", + "9\n", + "10\n", + "11\n", + "12\n", + "13\n", + "14\n", + "15\n", + "16\n", + "17\n", + "18\n", + "19\n", + "20\n", + "21\n", + "22\n", + "23\n", + "24\n", + "25\n", + "26\n", + "27\n", + "28\n", + "29\n", + "30\n", + "31\n", + "32\n", + "33\n", + "34\n", + "35\n", + "36\n", + "37\n", + "38\n", + "39\n", + "40\n", + "41\n", + "42\n", + "43\n", + "44\n", + "45\n", + "46\n", + "47\n", + "48\n", + "49\n", + "50\n", + "51\n", + "52\n", + "53\n", + "54\n", + "55\n", + "56\n", + "57\n", + "58\n", + "59\n", + "60\n", + "61\n", + "62\n", + "63\n", + "64\n", + "65\n", + "66\n", + "67\n", + "68\n", + "69\n", + "70\n", + "71\n", + "72\n", + "73\n", + "74\n", + "75\n", + "76\n", + "77\n", + "78\n", + "79\n", + "80\n", + "81\n", + "82\n", + "83\n", + "84\n", + "85\n", + "86\n", + "87\n", + "88\n", + "89\n", + "90\n", + "91\n", + "92\n", + "93\n", + "94\n", + "95\n", + "96\n", + "97\n", + "98\n", + "99\n", + "100\n", + "101\n", + "102\n", + "103\n", + "104\n", + "105\n", + "106\n", + "107\n", + "108\n", + "109\n", + "110\n", + "111\n", + "112\n", + "113\n", + "114\n", + "115\n", + "116\n", + "117\n", + "118\n", + "119\n", + "120\n", + "121\n", + "122\n", + "123\n", + "124\n", + "125\n", + "126\n", + "127\n", + "128\n", + "129\n", + "130\n", + "131\n", + "132\n", + "133\n", + "134\n", + "135\n", + "136\n", + "137\n", + "138\n", + "139\n" + ] + } + ], + "source": [ + "# Plot first 200 10 second ECGs for all individuals, both ECG signals\n", + "\n", + "### Signal 1 ###\n", + "for index, filename in enumerate(rand_lst3[17:]):\n", + " \n", + " \n", + " clear_output(wait=True)\n", + " print('index: ', index)\n", + " print(filename)\n", + " \n", + " # load a record using the 'rdrecord' function\n", + " record = wfdb.rdrecord(filename)\n", + " \n", + " sig1 = pd.DataFrame(record.p_signal)\n", + " \n", + " # record time\n", + " print('How many 10 second measurement intervals:', sig1.shape[0] / record.fs/ 10)\n", + " \n", + " # Define 10 sec interval via record freq times 10\n", + " ten = record.fs*10\n", + " # Create sequence of intervals \n", + " ten_int = np.arange(0, sig1.shape[0], ten)\n", + " \n", + " # Limit to first 200\n", + " if (len(ten_int) > 200):\n", + " len_sig = 200\n", + " else:\n", + " len_sig = len(ten_int)\n", + " \n", + " print(len_sig)\n", + " \n", + " for x in range(1,len_sig):\n", + " print(x)\n", + " plt.figure(figsize=(15, 2.5), dpi=100)\n", + " plt.plot(sig1.iloc[ten_int[x-1]:ten_int[x],0])\n", + " plt.ylim(-350, 750)\n", + " plt.ylabel('V1/V2 ', rotation=0)\n", + " plt.savefig(filename.replace('ecg_wfdb', 'ecg_img') + '_signal1_' + str(x) + '.png', bbox_inches='tight', dpi=100)\n", + " plt.cla()\n", + " plt.close()\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "c9e502b4-2794-408c-8590-ee2cab057024", + "id": "fP_ddw3k8w3m" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "index: 59\n", + "/content/ecg_data/MyDrive/ecg_ai/ECG_DDA/ecg_wfdb_randomsplit/valid/neuropathy/S0381ECG\n", + "How many 10 second measurement intervals: 139.47840000000002\n", + "140\n", + "1\n", + "2\n", + "3\n", + "4\n", + "5\n", + "6\n", + "7\n", + "8\n", + "9\n", + "10\n", + "11\n", + "12\n", + "13\n", + "14\n", + "15\n", + "16\n", + "17\n", + "18\n", + "19\n", + "20\n", + "21\n", + "22\n", + "23\n", + "24\n", + "25\n", + "26\n", + "27\n", + "28\n", + "29\n", + "30\n", + "31\n", + "32\n", + "33\n", + "34\n", + "35\n", + "36\n", + "37\n", + "38\n", + "39\n", + "40\n", + "41\n", + "42\n", + "43\n", + "44\n", + "45\n", + "46\n", + "47\n", + "48\n", + "49\n", + "50\n", + "51\n", + "52\n", + "53\n", + "54\n", + "55\n", + "56\n", + "57\n", + "58\n", + "59\n", + "60\n", + "61\n", + "62\n", + "63\n", + "64\n", + "65\n", + "66\n", + "67\n", + "68\n", + "69\n", + "70\n", + "71\n", + "72\n", + "73\n", + "74\n", + "75\n", + "76\n", + "77\n", + "78\n", + "79\n", + "80\n", + "81\n", + "82\n", + "83\n", + "84\n", + "85\n", + "86\n", + "87\n", + "88\n", + "89\n", + "90\n", + "91\n", + "92\n", + "93\n", + "94\n", + "95\n", + "96\n", + "97\n", + "98\n", + "99\n", + "100\n", + "101\n", + "102\n", + "103\n", + "104\n", + "105\n", + "106\n", + "107\n", + "108\n", + "109\n", + "110\n", + "111\n", + "112\n", + "113\n", + "114\n", + "115\n", + "116\n", + "117\n", + "118\n", + "119\n", + "120\n", + "121\n", + "122\n", + "123\n", + "124\n", + "125\n", + "126\n", + "127\n", + "128\n", + "129\n", + "130\n", + "131\n", + "132\n", + "133\n", + "134\n", + "135\n", + "136\n", + "137\n", + "138\n", + "139\n" + ] + } + ], + "source": [ + "### Signal 2 ###\n", + "for index, filename in enumerate(rand_lst3):\n", + " \n", + " clear_output(wait=True)\n", + " print('index: ', index)\n", + " print(filename)\n", + " \n", + " # load a record using the 'rdrecord' function\n", + " record = wfdb.rdrecord(filename)\n", + " \n", + " sig1 = pd.DataFrame(record.p_signal)\n", + " \n", + " # record time\n", + " print('How many 10 second measurement intervals:', sig1.shape[0] / record.fs/ 10)\n", + " \n", + " # Define 10 sec interval via record freq times 10\n", + " ten = record.fs*10\n", + " # Create sequence of intervals \n", + " ten_int = np.arange(0, sig1.shape[0], ten)\n", + " \n", + " # Limit to first 200\n", + " if (len(ten_int) > 200):\n", + " len_sig = 200\n", + " else:\n", + " len_sig = len(ten_int)\n", + " \n", + " print(len_sig)\n", + " \n", + " for x in range(1,len_sig):\n", + " print(x)\n", + " plt.figure(figsize=(15, 2.5), dpi=100)\n", + " plt.plot(sig1.iloc[ten_int[x-1]:ten_int[x],1])\n", + " plt.ylim(-350, 750)\n", + " plt.ylabel('V5/V6 ', rotation=0)\n", + " plt.savefig(filename.replace('ecg_wfdb', 'ecg_img') + '_signal2_' + str(x) + '.png', bbox_inches='tight', dpi=100)\n", + " plt.cla()\n", + " plt.close()\n", + " " + ] + }, { "cell_type": "markdown", "metadata": { "id": "Udevv8IiwPvF" }, "source": [ - "## 3. Next up: Training a model\n", + "## 4. Next up: Training a model\n", "Model training is done in the [next notebook](https://colab.research.google.com/drive/1_b-j3hDzYTYbbqGdyUnoaGiPZyRMbcJE?usp=sharing)" ] } @@ -686,7 +1234,8 @@ "machine_shape": "hm", "name": "neuropathy_preprocessing.ipynb", "provenance": [], - "authorship_tag": "ABX9TyOGQ98lumAye7YsdQ4s67Jp", + "toc_visible": true, + "authorship_tag": "ABX9TyPhttGzGRMzRxWuEOrxgtsH", "include_colab_link": true }, "kernelspec": {