From 46bcb44850380d3275c5c37f2b8f876fd4056074 Mon Sep 17 00:00:00 2001 From: t03i Date: Wed, 29 Jan 2025 18:33:21 +0100 Subject: [PATCH] feat: Add GO word cloud on detail page * style: fix page alignment * feat: add go term retrieval * feat: add go term word cloud * feat: add wordcloud display * style: add text-styling to svg * refactor: prevent single letter words * refactor: improve text * style: better font scaling * feat: add GO loading state --- frontend/package.json | 4 + frontend/pnpm-lock.yaml | 251 ++++++++++++++++++ .../components/GoOverview/GoLoading.svelte | 36 +++ .../lib/components/GoOverview/GoView.svelte | 76 ++++++ .../lib/components/GoOverview/WordCloud.ts | 75 ++++++ .../src/lib/components/GoOverview/goTerms.ts | 62 +++++ .../src/lib/components/GoOverview/index.ts | 5 + .../ProteinDetail/ProteinDetailLoading.svelte | 2 +- frontend/src/lib/external/uniprot.ts | 98 +++++-- .../src/routes/detail/[slug]/+page.svelte | 27 +- 10 files changed, 616 insertions(+), 20 deletions(-) create mode 100644 frontend/src/lib/components/GoOverview/GoLoading.svelte create mode 100644 frontend/src/lib/components/GoOverview/GoView.svelte create mode 100644 frontend/src/lib/components/GoOverview/WordCloud.ts create mode 100644 frontend/src/lib/components/GoOverview/goTerms.ts create mode 100644 frontend/src/lib/components/GoOverview/index.ts diff --git a/frontend/package.json b/frontend/package.json index fbd3849..a6a0452 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -22,6 +22,8 @@ "@tailwindcss/forms": "^0.5.9", "@tailwindcss/typography": "^0.5.15", "@tanstack/svelte-query-devtools": "^5.61.5", + "@types/d3": "^7.4.3", + "@types/d3-cloud": "^1.2.9", "@types/node": "^22.10.1", "@vincjo/datatables": "^1.14.10", "autoprefixer": "^10.4.20", @@ -53,6 +55,8 @@ "@sentry/svelte": "^8.41.0", "@tanstack/svelte-query": "^5.61.5", "axios": "^1.7.8", + "d3": "^7.9.0", + "d3-cloud": "^1.2.7", "iconify-icon": "^2.1.0", "pdbe-molstar": "^3.3.2" } diff --git a/frontend/pnpm-lock.yaml b/frontend/pnpm-lock.yaml index d51f921..802d1a8 100644 --- a/frontend/pnpm-lock.yaml +++ b/frontend/pnpm-lock.yaml @@ -53,6 +53,12 @@ importers: axios: specifier: ^1.7.8 version: 1.7.8 + d3: + specifier: ^7.9.0 + version: 7.9.0 + d3-cloud: + specifier: ^1.2.7 + version: 1.2.7 iconify-icon: specifier: ^2.1.0 version: 2.1.0 @@ -90,6 +96,12 @@ importers: '@tanstack/svelte-query-devtools': specifier: ^5.61.5 version: 5.61.5(@tanstack/svelte-query@5.61.5(svelte@4.2.19))(svelte@4.2.19) + '@types/d3': + specifier: ^7.4.3 + version: 7.4.3 + '@types/d3-cloud': + specifier: ^1.2.9 + version: 1.2.9 '@types/node': specifier: ^22.10.1 version: 22.10.1 @@ -926,6 +938,105 @@ packages: '@types/cookie@0.6.0': resolution: {integrity: sha512-4Kh9a6B2bQciAhf7FSuMRRkUWecJgJu9nPnx3yzpsfXX/c50REIqpHY4C82bXP90qrLtXtkDxTZosYO3UpOwlA==} + '@types/d3-array@3.2.1': + resolution: {integrity: sha512-Y2Jn2idRrLzUfAKV2LyRImR+y4oa2AntrgID95SHJxuMUrkNXmanDSed71sRNZysveJVt1hLLemQZIady0FpEg==} + + '@types/d3-axis@3.0.6': + resolution: {integrity: sha512-pYeijfZuBd87T0hGn0FO1vQ/cgLk6E1ALJjfkC0oJ8cbwkZl3TpgS8bVBLZN+2jjGgg38epgxb2zmoGtSfvgMw==} + + '@types/d3-brush@3.0.6': + resolution: {integrity: sha512-nH60IZNNxEcrh6L1ZSMNA28rj27ut/2ZmI3r96Zd+1jrZD++zD3LsMIjWlvg4AYrHn/Pqz4CF3veCxGjtbqt7A==} + + '@types/d3-chord@3.0.6': + resolution: {integrity: sha512-LFYWWd8nwfwEmTZG9PfQxd17HbNPksHBiJHaKuY1XeqscXacsS2tyoo6OdRsjf+NQYeB6XrNL3a25E3gH69lcg==} + + '@types/d3-cloud@1.2.9': + resolution: {integrity: sha512-5EWJvnlCrqTThGp8lYHx+DL00sOjx2HTlXH1WRe93k5pfOIhPQaL63NttaKYIbT7bTXp/USiunjNS/N4ipttIQ==} + + '@types/d3-color@3.1.3': + resolution: {integrity: sha512-iO90scth9WAbmgv7ogoq57O9YpKmFBbmoEoCHDB2xMBY0+/KVrqAaCDyCE16dUspeOvIxFFRI+0sEtqDqy2b4A==} + + '@types/d3-contour@3.0.6': + resolution: {integrity: sha512-BjzLgXGnCWjUSYGfH1cpdo41/hgdWETu4YxpezoztawmqsvCeep+8QGfiY6YbDvfgHz/DkjeIkkZVJavB4a3rg==} + + '@types/d3-delaunay@6.0.4': + resolution: {integrity: sha512-ZMaSKu4THYCU6sV64Lhg6qjf1orxBthaC161plr5KuPHo3CNm8DTHiLw/5Eq2b6TsNP0W0iJrUOFscY6Q450Hw==} + + '@types/d3-dispatch@3.0.6': + resolution: {integrity: sha512-4fvZhzMeeuBJYZXRXrRIQnvUYfyXwYmLsdiN7XXmVNQKKw1cM8a5WdID0g1hVFZDqT9ZqZEY5pD44p24VS7iZQ==} + + '@types/d3-drag@3.0.7': + resolution: {integrity: sha512-HE3jVKlzU9AaMazNufooRJ5ZpWmLIoc90A37WU2JMmeq28w1FQqCZswHZ3xR+SuxYftzHq6WU6KJHvqxKzTxxQ==} + + '@types/d3-dsv@3.0.7': + resolution: {integrity: sha512-n6QBF9/+XASqcKK6waudgL0pf/S5XHPPI8APyMLLUHd8NqouBGLsU8MgtO7NINGtPBtk9Kko/W4ea0oAspwh9g==} + + '@types/d3-ease@3.0.2': + resolution: {integrity: sha512-NcV1JjO5oDzoK26oMzbILE6HW7uVXOHLQvHshBUW4UMdZGfiY6v5BeQwh9a9tCzv+CeefZQHJt5SRgK154RtiA==} + + '@types/d3-fetch@3.0.7': + resolution: {integrity: sha512-fTAfNmxSb9SOWNB9IoG5c8Hg6R+AzUHDRlsXsDZsNp6sxAEOP0tkP3gKkNSO/qmHPoBFTxNrjDprVHDQDvo5aA==} + + '@types/d3-force@3.0.10': + resolution: {integrity: sha512-ZYeSaCF3p73RdOKcjj+swRlZfnYpK1EbaDiYICEEp5Q6sUiqFaFQ9qgoshp5CzIyyb/yD09kD9o2zEltCexlgw==} + + '@types/d3-format@3.0.4': + resolution: {integrity: sha512-fALi2aI6shfg7vM5KiR1wNJnZ7r6UuggVqtDA+xiEdPZQwy/trcQaHnwShLuLdta2rTymCNpxYTiMZX/e09F4g==} + + '@types/d3-geo@3.1.0': + resolution: {integrity: sha512-856sckF0oP/diXtS4jNsiQw/UuK5fQG8l/a9VVLeSouf1/PPbBE1i1W852zVwKwYCBkFJJB7nCFTbk6UMEXBOQ==} + + '@types/d3-hierarchy@3.1.7': + resolution: {integrity: sha512-tJFtNoYBtRtkNysX1Xq4sxtjK8YgoWUNpIiUee0/jHGRwqvzYxkq0hGVbbOGSz+JgFxxRu4K8nb3YpG3CMARtg==} + + '@types/d3-interpolate@3.0.4': + resolution: {integrity: sha512-mgLPETlrpVV1YRJIglr4Ez47g7Yxjl1lj7YKsiMCb27VJH9W8NVM6Bb9d8kkpG/uAQS5AmbA48q2IAolKKo1MA==} + + '@types/d3-path@3.1.0': + resolution: {integrity: sha512-P2dlU/q51fkOc/Gfl3Ul9kicV7l+ra934qBFXCFhrZMOL6du1TM0pm1ThYvENukyOn5h9v+yMJ9Fn5JK4QozrQ==} + + '@types/d3-polygon@3.0.2': + resolution: {integrity: sha512-ZuWOtMaHCkN9xoeEMr1ubW2nGWsp4nIql+OPQRstu4ypeZ+zk3YKqQT0CXVe/PYqrKpZAi+J9mTs05TKwjXSRA==} + + '@types/d3-quadtree@3.0.6': + resolution: {integrity: sha512-oUzyO1/Zm6rsxKRHA1vH0NEDG58HrT5icx/azi9MF1TWdtttWl0UIUsjEQBBh+SIkrpd21ZjEv7ptxWys1ncsg==} + + '@types/d3-random@3.0.3': + resolution: {integrity: sha512-Imagg1vJ3y76Y2ea0871wpabqp613+8/r0mCLEBfdtqC7xMSfj9idOnmBYyMoULfHePJyxMAw3nWhJxzc+LFwQ==} + + '@types/d3-scale-chromatic@3.1.0': + resolution: {integrity: sha512-iWMJgwkK7yTRmWqRB5plb1kadXyQ5Sj8V/zYlFGMUBbIPKQScw+Dku9cAAMgJG+z5GYDoMjWGLVOvjghDEFnKQ==} + + '@types/d3-scale@4.0.8': + resolution: {integrity: sha512-gkK1VVTr5iNiYJ7vWDI+yUFFlszhNMtVeneJ6lUTKPjprsvLLI9/tgEGiXJOnlINJA8FyA88gfnQsHbybVZrYQ==} + + '@types/d3-selection@3.0.11': + resolution: {integrity: sha512-bhAXu23DJWsrI45xafYpkQ4NtcKMwWnAC/vKrd2l+nxMFuvOT3XMYTIj2opv8vq8AO5Yh7Qac/nSeP/3zjTK0w==} + + '@types/d3-shape@3.1.7': + resolution: {integrity: sha512-VLvUQ33C+3J+8p+Daf+nYSOsjB4GXp19/S/aGo60m9h1v6XaxjiT82lKVWJCfzhtuZ3yD7i/TPeC/fuKLLOSmg==} + + '@types/d3-time-format@4.0.3': + resolution: {integrity: sha512-5xg9rC+wWL8kdDj153qZcsJ0FWiFt0J5RB6LYUNZjwSnesfblqrI/bJ1wBdJ8OQfncgbJG5+2F+qfqnqyzYxyg==} + + '@types/d3-time@3.0.4': + resolution: {integrity: sha512-yuzZug1nkAAaBlBBikKZTgzCeA+k1uy4ZFwWANOfKw5z5LRhV0gNA7gNkKm7HoK+HRN0wX3EkxGk0fpbWhmB7g==} + + '@types/d3-timer@3.0.2': + resolution: {integrity: sha512-Ps3T8E8dZDam6fUyNiMkekK3XUsaUEik+idO9/YjPtfj2qruF8tFBXS7XhtE4iIXBLxhmLjP3SXpLhVf21I9Lw==} + + '@types/d3-transition@3.0.9': + resolution: {integrity: sha512-uZS5shfxzO3rGlu0cC3bjmMFKsXv+SmZZcgp0KD22ts4uGXp5EVYGzu/0YdwZeKmddhcAccYtREJKkPfXkZuCg==} + + '@types/d3-zoom@3.0.8': + resolution: {integrity: sha512-iqMC4/YlFCSlO8+2Ii1GGGliCAY4XdeG748w5vQUbevlbDu0zSjH/+jojorQVBK/se0j6DUFNPBGSqD3YWYnDw==} + + '@types/d3@3.5.53': + resolution: {integrity: sha512-8yKQA9cAS6+wGsJpBysmnhlaaxlN42Qizqkw+h2nILSlS+MAG2z4JdO6p+PJrJ+ACvimkmLJL281h157e52psQ==} + + '@types/d3@7.4.3': + resolution: {integrity: sha512-lZXZ9ckh5R8uiFVt8ogUNf+pIrK4EsWrx2Np75WvF/eTpJ0FMHNhjXk8CKEx/+gpHbNQyJWehbFaTvqmHWB3ww==} + '@types/debug@4.1.12': resolution: {integrity: sha512-vIChWdVG3LG1SMxEvI/AK+FWJthlrqlTu7fbrlywTkkaONwk/UAGaULXRlf8vkzFBLVm0zkMdCquhL5aOjhXPQ==} @@ -941,6 +1052,9 @@ packages: '@types/express@4.17.21': resolution: {integrity: sha512-ejlPM315qwLpaQlQDTjPdsUFSc6ZsP4AN6AlWnogPjQ7CVi7PYF3YVz+CY3jE2pwYf7E/7HlDAN0rV2GxTG0HQ==} + '@types/geojson@7946.0.16': + resolution: {integrity: sha512-6C8nqWur3j98U6+lXDfTUWIfgvZU+EumvpHKcYjujKH7woYyLj2sUmff0tRhrqM7BohUw7Pz3ZB1jj2gW9Fvmg==} + '@types/hast@3.0.4': resolution: {integrity: sha512-WPs+bbQw5aCj+x6laNGWLH3wviHtoCv/P3+otBhbOhJgG8qtpdAMlTCxLtsTWA7LH1Oh/bFCHsBn0TPS5m30EQ==} @@ -1335,6 +1449,9 @@ packages: resolution: {integrity: sha512-VE5S6TNa+j8msksl7HwjxMHDM2yNK3XCkusIlpX5kwauBfXuyLAtNg9jCp/iHH61tgI4sb6R/EIMWCqEIdjT/g==} engines: {node: '>=12'} + d3-cloud@1.2.7: + resolution: {integrity: sha512-8TrgcgwRIpoZYQp7s3fGB7tATWfhckRb8KcVd1bOgqkNdkJRDGWfdSf4HkHHzZxSczwQJdSxvfPudwir5IAJ3w==} + d3-color@3.1.0: resolution: {integrity: sha512-zg/chbXyeBtMQ1LbD/WSoW2DpC3I0mpmPdW+ynRTj/x2DAWYrIY7qeZIHidozwV24m4iavr15lNwIwLxRmOxhA==} engines: {node: '>=12'} @@ -1347,6 +1464,9 @@ packages: resolution: {integrity: sha512-mdjtIZ1XLAM8bm/hx3WwjfHt6Sggek7qH043O8KEjDXN40xi3vx/6pYSVTwLjEgiXQTbvaouWKynLBiUZ6SK6A==} engines: {node: '>=12'} + d3-dispatch@1.0.6: + resolution: {integrity: sha512-fVjoElzjhCEy+Hbn8KygnmMS7Or0a9sI2UzGwoB7cCtvI1XpVN9GpoYlnb3xt2YV66oXYb1fLJ8GMvP4hdU1RA==} + d3-dispatch@3.0.1: resolution: {integrity: sha512-rzUyPU/S7rwUflMyLc1ETDeBj0NRuHKKAcvukozwhshr6g6c5d8zh4c2gQjY2bZ0dXeGLWc1PF174P2tVvKhfg==} engines: {node: '>=12'} @@ -3941,6 +4061,129 @@ snapshots: '@types/cookie@0.6.0': {} + '@types/d3-array@3.2.1': {} + + '@types/d3-axis@3.0.6': + dependencies: + '@types/d3-selection': 3.0.11 + + '@types/d3-brush@3.0.6': + dependencies: + '@types/d3-selection': 3.0.11 + + '@types/d3-chord@3.0.6': {} + + '@types/d3-cloud@1.2.9': + dependencies: + '@types/d3': 3.5.53 + + '@types/d3-color@3.1.3': {} + + '@types/d3-contour@3.0.6': + dependencies: + '@types/d3-array': 3.2.1 + '@types/geojson': 7946.0.16 + + '@types/d3-delaunay@6.0.4': {} + + '@types/d3-dispatch@3.0.6': {} + + '@types/d3-drag@3.0.7': + dependencies: + '@types/d3-selection': 3.0.11 + + '@types/d3-dsv@3.0.7': {} + + '@types/d3-ease@3.0.2': {} + + '@types/d3-fetch@3.0.7': + dependencies: + '@types/d3-dsv': 3.0.7 + + '@types/d3-force@3.0.10': {} + + '@types/d3-format@3.0.4': {} + + '@types/d3-geo@3.1.0': + dependencies: + '@types/geojson': 7946.0.16 + + '@types/d3-hierarchy@3.1.7': {} + + '@types/d3-interpolate@3.0.4': + dependencies: + '@types/d3-color': 3.1.3 + + '@types/d3-path@3.1.0': {} + + '@types/d3-polygon@3.0.2': {} + + '@types/d3-quadtree@3.0.6': {} + + '@types/d3-random@3.0.3': {} + + '@types/d3-scale-chromatic@3.1.0': {} + + '@types/d3-scale@4.0.8': + dependencies: + '@types/d3-time': 3.0.4 + + '@types/d3-selection@3.0.11': {} + + '@types/d3-shape@3.1.7': + dependencies: + '@types/d3-path': 3.1.0 + + '@types/d3-time-format@4.0.3': {} + + '@types/d3-time@3.0.4': {} + + '@types/d3-timer@3.0.2': {} + + '@types/d3-transition@3.0.9': + dependencies: + '@types/d3-selection': 3.0.11 + + '@types/d3-zoom@3.0.8': + dependencies: + '@types/d3-interpolate': 3.0.4 + '@types/d3-selection': 3.0.11 + + '@types/d3@3.5.53': {} + + '@types/d3@7.4.3': + dependencies: + '@types/d3-array': 3.2.1 + '@types/d3-axis': 3.0.6 + '@types/d3-brush': 3.0.6 + '@types/d3-chord': 3.0.6 + '@types/d3-color': 3.1.3 + '@types/d3-contour': 3.0.6 + '@types/d3-delaunay': 6.0.4 + '@types/d3-dispatch': 3.0.6 + '@types/d3-drag': 3.0.7 + '@types/d3-dsv': 3.0.7 + '@types/d3-ease': 3.0.2 + '@types/d3-fetch': 3.0.7 + '@types/d3-force': 3.0.10 + '@types/d3-format': 3.0.4 + '@types/d3-geo': 3.1.0 + '@types/d3-hierarchy': 3.1.7 + '@types/d3-interpolate': 3.0.4 + '@types/d3-path': 3.1.0 + '@types/d3-polygon': 3.0.2 + '@types/d3-quadtree': 3.0.6 + '@types/d3-random': 3.0.3 + '@types/d3-scale': 4.0.8 + '@types/d3-scale-chromatic': 3.1.0 + '@types/d3-selection': 3.0.11 + '@types/d3-shape': 3.1.7 + '@types/d3-time': 3.0.4 + '@types/d3-time-format': 4.0.3 + '@types/d3-timer': 3.0.2 + '@types/d3-transition': 3.0.9 + '@types/d3-zoom': 3.0.8 + '@types/debug@4.1.12': dependencies: '@types/ms': 0.7.34 @@ -3965,6 +4208,8 @@ snapshots: '@types/qs': 6.9.17 '@types/serve-static': 1.15.7 + '@types/geojson@7946.0.16': {} + '@types/hast@3.0.4': dependencies: '@types/unist': 3.0.3 @@ -4430,6 +4675,10 @@ snapshots: dependencies: d3-path: 3.1.0 + d3-cloud@1.2.7: + dependencies: + d3-dispatch: 1.0.6 + d3-color@3.1.0: {} d3-contour@4.0.2: @@ -4440,6 +4689,8 @@ snapshots: dependencies: delaunator: 5.0.1 + d3-dispatch@1.0.6: {} + d3-dispatch@3.0.1: {} d3-drag@3.0.0: diff --git a/frontend/src/lib/components/GoOverview/GoLoading.svelte b/frontend/src/lib/components/GoOverview/GoLoading.svelte new file mode 100644 index 0000000..18df494 --- /dev/null +++ b/frontend/src/lib/components/GoOverview/GoLoading.svelte @@ -0,0 +1,36 @@ + + + + +
+
+
+
+
+
+
+ {#each Array(8) as _, i} +
+ {/each} +
+
+
+
diff --git a/frontend/src/lib/components/GoOverview/GoView.svelte b/frontend/src/lib/components/GoOverview/GoView.svelte new file mode 100644 index 0000000..a546a8e --- /dev/null +++ b/frontend/src/lib/components/GoOverview/GoView.svelte @@ -0,0 +1,76 @@ + + + +
+ + View all terms in QuickGO + + +
+ + + + + + + Summary of {goAnnotations.length} GO terms + + + This word cloud visualizes the top {maxWordCount} distinct words found + in all Gene Ontology (GO) terms associated with this protein. Larger words + indicate words that appear more frequently in the annotations. Go Annotations + are retrieved from UniProt. + + + +
+ +
+
+
diff --git a/frontend/src/lib/components/GoOverview/WordCloud.ts b/frontend/src/lib/components/GoOverview/WordCloud.ts new file mode 100644 index 0000000..784a6a1 --- /dev/null +++ b/frontend/src/lib/components/GoOverview/WordCloud.ts @@ -0,0 +1,75 @@ +// Copyright 2025 Tobias Olenyi. +// SPDX-License-Identifier: Apache-2.0 + +import * as d3 from "d3"; +import cloud, { type Word } from "d3-cloud"; + +export interface WordCloudOptions { + width?: number; + height?: number; + word?: (d: T) => string; + size?: (d: T) => number; + rotate?: (d: T) => number; + fontScale?: number; + padding?: number; + marginTop?: number; + marginRight?: number; + marginBottom?: number; + marginLeft?: number; + fontFamily?: string; +} + +export function WordCloud( + words: Array, + svg: SVGElement, + options: WordCloudOptions = {}, +) { + const { + width = 16, + height = 9, + marginTop = 0, + marginRight = 0, + marginBottom = 0, + marginLeft = 0, + fontScale = 10, + padding = 0, + rotate = 0, + } = options; + + const svgContainer = d3 + .select(svg) + .attr("viewBox", [0, 0, width, height]) + .attr("text-anchor", "middle"); + + const g = svgContainer + .append("g") + .attr("transform", `translate(${marginLeft},${marginTop})`); + + const wordCloud = cloud() + .size([width - marginLeft - marginRight, height - marginTop - marginBottom]) + .words(words) + .padding(options.padding ?? 0) + .rotate((d) => options.rotate?.(d as T) ?? 0) + .font(options.fontFamily ?? "sans-serif") + .text((d) => options.word?.(d as T) ?? "") + .fontSize((d) => (options.size?.(d as T) ?? 1) * (options.fontScale ?? 30)) + .on("word", (word: T) => { + g.append("text") + .datum(word.text) + .attr("font-size", `${word.size ?? 1}px`) + .classed("text-token", true) + .classed("fill-token", true) + .attr( + "transform", + `translate(${word.x},${word.y}) rotate(${word.rotate})`, + ) + .text(word.text ?? ""); + }); + + wordCloud.start(); + + return () => { + wordCloud.stop(); + d3.select(svg).selectAll("*").remove(); + }; +} diff --git a/frontend/src/lib/components/GoOverview/goTerms.ts b/frontend/src/lib/components/GoOverview/goTerms.ts new file mode 100644 index 0000000..bf8f1a7 --- /dev/null +++ b/frontend/src/lib/components/GoOverview/goTerms.ts @@ -0,0 +1,62 @@ +// Copyright 2025 Tobias Olenyi. +// SPDX-License-Identifier: Apache-2.0 + +import type { GOAnnotation } from "$lib/external/uniprot"; + +const stopwords = new Set( + "i,me,my,myself,we,us,our,ours,ourselves,you,your,yours,yourself,yourselves,he,him,his,himself,she,her,hers,herself,it,its,itself,they,them,their,theirs,themselves,what,which,who,whom,whose,this,that,these,those,am,is,are,was,were,be,been,being,have,has,had,having,do,does,did,doing,will,would,should,can,could,ought,i'm,you're,he's,she's,it's,we're,they're,i've,you've,we've,they've,i'd,you'd,he'd,she'd,we'd,they'd,i'll,you'll,he'll,she'll,we'll,they'll,isn't,aren't,wasn't,weren't,hasn't,haven't,hadn't,doesn't,don't,didn't,won't,wouldn't,shan't,shouldn't,can't,cannot,couldn't,mustn't,let's,that's,who's,what's,here's,there's,when's,where's,why's,how's,a,an,the,and,but,if,or,because,as,until,while,of,at,by,for,with,about,against,between,into,through,during,before,after,above,below,to,from,up,upon,down,in,out,on,off,over,under,again,further,then,once,here,there,when,where,why,how,all,any,both,each,few,more,most,other,some,such,no,nor,not,only,own,same,so,than,too,very,say,says,said,shall".split( + ",", + ), +); + +export function sortGoTerms( + goAnnotations: Array, + maxWordCount: number = 40, +) { + // Flatten and pre-process the terms + const source = goAnnotations.flatMap((annotation) => { + const words = annotation.term.split(/[\s.]+/g); + // Merge single letters with next word + const mergedWords = words.reduce((acc: string[], word, index) => { + if (word.length === 1 && index < words.length - 1) { + acc.push(`${word}${words[index + 1]}`); + words[index + 1] = ""; // Mark next word as processed + } else if (word !== "") { + acc.push(word); + } + return acc; + }, []); + return mergedWords; + }); + + const words = source + .map((w) => w.replace(/^["'"\-—()\[\]{}]+/g, "")) + .map((w) => w.replace(/[;:.!?()\[\]{},"''"\-—]+$/g, "")) + .map((w) => w.replace(/['']s$/g, "")) + .map((w) => w.substring(0, 30)) + .map((w) => w.toLowerCase()) + .filter((w) => w && !stopwords.has(w)); + + // Count word frequencies + const wordCounts = words.reduce( + (acc, word) => { + acc[word] = (acc[word] || 0) + 1; + return acc; + }, + {} as Record, + ); + + const sortedWords = Object.entries(wordCounts) + .sort(([, countA], [, countB]) => (countB as number) - (countA as number)) + .slice(0, maxWordCount) + .map(([word, count]) => ({ + text: word, + value: count, + })); + + return sortedWords; +} + +export function getQuickGOUrl(uniprotAcc: string) { + return `https://www.ebi.ac.uk/QuickGO/annotations?geneProductId=${uniprotAcc}`; +} diff --git a/frontend/src/lib/components/GoOverview/index.ts b/frontend/src/lib/components/GoOverview/index.ts new file mode 100644 index 0000000..4272503 --- /dev/null +++ b/frontend/src/lib/components/GoOverview/index.ts @@ -0,0 +1,5 @@ +// Copyright 2024 Tobias Olenyi. +// SPDX-License-Identifier: Apache-2.0 + +export { default as GoLoading } from "./GoLoading.svelte"; +export { default as GoView } from "./GoView.svelte"; diff --git a/frontend/src/lib/components/ProteinDetail/ProteinDetailLoading.svelte b/frontend/src/lib/components/ProteinDetail/ProteinDetailLoading.svelte index af6f02c..38ceceb 100644 --- a/frontend/src/lib/components/ProteinDetail/ProteinDetailLoading.svelte +++ b/frontend/src/lib/components/ProteinDetail/ProteinDetailLoading.svelte @@ -7,7 +7,7 @@ export { className as class }; -
+
diff --git a/frontend/src/lib/external/uniprot.ts b/frontend/src/lib/external/uniprot.ts index e86c3a8..33dc5c6 100644 --- a/frontend/src/lib/external/uniprot.ts +++ b/frontend/src/lib/external/uniprot.ts @@ -1,10 +1,10 @@ // Copyright 2024 Tobias Olenyi. // SPDX-License-Identifier: Apache-2.0 -import { createQuery, type CreateQueryResult } from '@tanstack/svelte-query'; -import type { QueryFunction } from '@tanstack/svelte-query'; -import axios, { AxiosError } from 'axios'; -import type { AnnotationData, PublicAnnotation } from '$lib/client/model'; +import type { AnnotationData, PublicAnnotation } from "$lib/client/model"; +import type { QueryFunction } from "@tanstack/svelte-query"; +import { createQuery, type CreateQueryResult } from "@tanstack/svelte-query"; +import axios, { AxiosError } from "axios"; export enum UniprotACCType { UNIPROT_ACCESSION = 0, @@ -12,20 +12,38 @@ export enum UniprotACCType { UNKNOWN = -1, } +export enum UniprotGOCategory { + UNKNOWN = 0, + BIOLOGICAL_PROCESS = "P", + MOLECULAR_FUNCTION = "F", + CELLULAR_COMPONENT = "C", +} + +export interface GOAnnotation { + id: string; + term: string; + category: UniprotGOCategory; +} + export const UniprotACCTypeNameMap: Record = { - [UniprotACCType.UNIPROT_ACCESSION]: 'UniProt Accession', - [UniprotACCType.UNIPROT_NAME]: 'UniProt Name', + [UniprotACCType.UNIPROT_ACCESSION]: "UniProt Accession", + [UniprotACCType.UNIPROT_NAME]: "UniProt Name", [UniprotACCType.UNKNOWN]: null, }; export interface UniprotAnnotationData extends AnnotationData { accession: string; name: string; sequence_length: number; + go_annotations?: Array; } export const uniprot_get_input_type = (selected_id: string): UniprotACCType => { const test_str = selected_id.toUpperCase(); - if (/^[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9](?:[A-Z][A-Z0-9]{2}[0-9]){1,2}$/.test(test_str)) { + if ( + /^[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9](?:[A-Z][A-Z0-9]{2}[0-9]){1,2}$/.test( + test_str, + ) + ) { return UniprotACCType.UNIPROT_ACCESSION; } else if (/^[A-Z0-9]{3,20}_[A-Z0-9]{3,20}$/.test(test_str)) { return UniprotACCType.UNIPROT_NAME; @@ -34,13 +52,16 @@ export const uniprot_get_input_type = (selected_id: string): UniprotACCType => { } }; -const uniprot_query_url = (selected_id: string, input_type: UniprotACCType): string => { +const uniprot_query_url = ( + selected_id: string, + input_type: UniprotACCType, +): string => { const query_prefix = { [UniprotACCType.UNIPROT_ACCESSION]: `accession:${selected_id}`, [UniprotACCType.UNIPROT_NAME]: `id:${selected_id}`, [UniprotACCType.UNKNOWN]: selected_id, }; - return `https://rest.uniprot.org/uniprotkb/search?query=${query_prefix[input_type]} AND active:true&fields=id,accession,length,ft_transmem&format=json&size=1`; + return `https://rest.uniprot.org/uniprotkb/search?query=${query_prefix[input_type]} AND active:true&fields=id,accession,length,ft_transmem,go&format=json&size=1`; }; export const uniprot_entry_url = (selected_id: string): string => { @@ -51,7 +72,10 @@ export const uniprot_taxonomy_url = (taxon_id: string): string => { return `https://www.uniprot.org/taxonomy/${taxon_id}`; }; -const uniprot_parse_response = (body: any, selected_id: string): UniprotAnnotationData | null => { +const uniprot_parse_response = ( + body: any, + selected_id: string, +): UniprotAnnotationData | null => { if (body && body.results && body.results.length > 0) { const result = body.results[0]; const annotations: PublicAnnotation[] = []; @@ -66,22 +90,62 @@ const uniprot_parse_response = (body: any, selected_id: string): UniprotAnnotati label, source_db: "uniprot", source_db_url: uniprot_entry_url(selected_id), - date_added: new Date().toISOString().split('T')[0], + date_added: new Date().toISOString().split("T")[0], }); } } + + // Parse GO annotations + const go_annotations = (result.uniProtKBCrossReferences || []) + .filter((go: any) => go.database === "GO") + .map((go: any) => { + const go_term = + go.properties.find((prop: any) => prop.key === "GoTerm")?.value || ""; + const go_split = go_term.split(":"); + const category = go_split[0]?.charAt(0) || ""; + + let goCategory: UniprotGOCategory; + switch (category) { + case "P": + goCategory = UniprotGOCategory.BIOLOGICAL_PROCESS; + break; + case "F": + goCategory = UniprotGOCategory.MOLECULAR_FUNCTION; + break; + case "C": + goCategory = UniprotGOCategory.CELLULAR_COMPONENT; + break; + default: + goCategory = UniprotGOCategory.UNKNOWN; + } + + return { + id: go.id, + term: go_split[1] || go.id, + category: goCategory, + }; + }) + .filter(Boolean); + return { accession: result.primaryAccession, name: result.uniProtkbId, sequence_length: result.sequence.length, annotations: annotations, + go_annotations: go_annotations, }; } return null; }; -export const createGetUniprotAnnotation = (selected_id: string): CreateQueryResult => { - const queryFn: QueryFunction = async ({ signal }) => { +export const createGetUniprotAnnotation = ( + selected_id: string, +): CreateQueryResult => { + const queryFn: QueryFunction< + UniprotAnnotationData | null, + [string, string], + AxiosError + > = async ({ signal }) => { try { const input_type = uniprot_get_input_type(selected_id); const url = uniprot_query_url(selected_id, input_type); @@ -92,18 +156,18 @@ export const createGetUniprotAnnotation = (selected_id: string): CreateQueryResu throw error; } else { throw new AxiosError( - 'An unexpected error occurred while fetching UniProt data', - 'UNKNOWN_ERROR', + "An unexpected error occurred while fetching UniProt data", + "UNKNOWN_ERROR", + undefined, undefined, undefined, - undefined ); } } }; return createQuery({ - queryKey: ['uniprotAnnotation', selected_id], + queryKey: ["uniprotAnnotation", selected_id], queryFn, }); }; diff --git a/frontend/src/routes/detail/[slug]/+page.svelte b/frontend/src/routes/detail/[slug]/+page.svelte index fe059b5..fe2eda4 100644 --- a/frontend/src/routes/detail/[slug]/+page.svelte +++ b/frontend/src/routes/detail/[slug]/+page.svelte @@ -19,6 +19,8 @@ ProteinDetailError, } from "$lib/components/ProteinDetail"; + import { GoView, GoLoading } from "$lib/components/GoOverview"; + import { DBReferencesView, DBReferencesLoading, @@ -83,6 +85,7 @@ annotationStructureSelection, annotationDBReferences, annotationTracks, + uniprotQuery, } = createAnnotationStore(uniprotAcc, infoQuery); onMount(() => { @@ -115,8 +118,11 @@ {config.APP_NAME} - {uniprotAcc} -
-
+
+
{#if $structureQuery?.isLoading}
@@ -162,6 +168,7 @@ {/if}
+

Annotations

{#if $annotationsIsFetching} @@ -174,4 +181,20 @@ /> {/if}
+ +
+

GO Term Overview

+ {#if $uniprotQuery?.isFetching} + + {:else if $uniprotQuery?.error} +
+ Failed to load GO terms: {$uniprotQuery.error.message} +
+ {:else if $uniprotQuery?.data?.go_annotations} + + {/if} +