Skip to content

Commit

Permalink
Merge pull request #3565 from magda-io/seo-improvements
Browse files Browse the repository at this point in the history
#3564: Add rel="canonical" annotations to dataset & distribution page crawler views
  • Loading branch information
t83714 authored Sep 23, 2024
2 parents df315bc + 10bcccd commit cff01be
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 10 deletions.
1 change: 1 addition & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
- Increase indexer client connection idle-timeout to avoid encountering connection reset error for downloading large region files
- Upgraded OpenSearch to v2.16.0
- #3556: Serves robots.txt as content-type `text/plain` instead and other sitemap & crawler view related improvements.
- #3564: Add rel="canonical" annotations to dataset & distribution page crawler views

## v4.2.3

Expand Down
21 changes: 16 additions & 5 deletions magda-web-server/src/crawlerViews/commonView.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
import markdownToHtml from "magda-typescript-common/src/markdownToHtml.js";

type ContentType = {
title: string;
interface ContentType {
title?: string;
__content: string;
};
canonicalUrl: string;
sitemapUrl: string;
}

const commonView = (
{ title, __content }: ContentType,
{ title, __content, canonicalUrl, sitemapUrl }: ContentType,
shouldShowFullVersionLink: boolean = false,
fullVersionUrl: string = ""
) => {
Expand All @@ -15,7 +17,16 @@ const commonView = (
<head>
<meta charset="UTF-8">
<title>${title}</title>
<style></style>
${
canonicalUrl
? `<link rel="canonical" href="${canonicalUrl}">`
: ""
}
${
sitemapUrl
? `<link rel="sitemap" type="application/xml" href="${sitemapUrl}">`
: ""
}
</head>
<body>
${markdownToHtml(__content, true)}
Expand Down
35 changes: 30 additions & 5 deletions magda-web-server/src/createCrawlerViewRouter.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { Router } from "express";
import getAbsoluteUrl from "magda-typescript-common/src/getAbsoluteUrl.js";
import shouldRenderCrawlerView from "./shouldRenderCrawlerView.js";
import datasetView from "./crawlerViews/dataset.js";
import distributionView from "./crawlerViews/distribution.js";
Expand All @@ -13,6 +14,7 @@ const { safeLoadFront } = yamlFrontMatter;
type OptionType = {
enableDiscourseSupport: boolean;
uiBaseUrl: string;
baseExternalUrl: string;
registryApiBaseUrl: string;
};

Expand All @@ -22,12 +24,17 @@ function getTenantIdFromReq(req: Request) {
: 0;
}

const createCralwerViewRouter = ({
const createCrawlerViewRouter = ({
enableDiscourseSupport,
registryApiBaseUrl,
uiBaseUrl
uiBaseUrl,
baseExternalUrl
}: OptionType) => {
const router: Router = Router();
const sitemapUrl = `${getAbsoluteUrl(
uiBaseUrl,
baseExternalUrl ? baseExternalUrl : "/"
)}sitemap.xml`;

async function datasetViewHandler(
req: Request<
Expand Down Expand Up @@ -70,7 +77,16 @@ const createCralwerViewRouter = ({
throw datasetData;
}
const content = safeLoadFront(datasetView(datasetData, uiBaseUrl));
res.send(commonView(content as any));
res.send(
commonView({
...content,
sitemapUrl,
canonicalUrl: getAbsoluteUrl(
`${uiBaseUrl}dataset/${datasetId}`,
baseExternalUrl
)
})
);
} catch (e) {
console.warn(
`Failed to producing crawler view for datasetId \`${datasetId}\`: ${
Expand Down Expand Up @@ -142,7 +158,16 @@ const createCralwerViewRouter = ({
const content = safeLoadFront(
distributionView(distributionData, datasetData, uiBaseUrl)
);
res.send(commonView(content as any));
res.send(
commonView({
...content,
sitemapUrl,
canonicalUrl: getAbsoluteUrl(
`${uiBaseUrl}dataset/${datasetId}/distribution/${distributionId}`,
baseExternalUrl
)
})
);
} catch (e) {
console.warn(
`Failed to producing crawler view for distributionId \`${distributionId}\`: ${
Expand All @@ -165,4 +190,4 @@ const createCralwerViewRouter = ({
return router;
};

export default createCralwerViewRouter;
export default createCrawlerViewRouter;
1 change: 1 addition & 0 deletions magda-web-server/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -519,6 +519,7 @@ if (argv.enableCrawlerViews || enableDiscourseSupport) {
createCrawlerViewRouter({
registryApiBaseUrl: argv.registryApiBaseUrlInternal,
enableDiscourseSupport: enableDiscourseSupport,
baseExternalUrl,
uiBaseUrl
})
);
Expand Down
1 change: 1 addition & 0 deletions magda-web-server/src/shouldRenderCrawlerView.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ const browserNames = [

const crawlerPatterns = [
"Googlebot\\/", // Google
"Google-InspectionTool\\/", // Google inspectionTool
"bingbot", // Bing
"Slurp", // Yahoo
"DuckDuckBot", // DuckDuckGo
Expand Down

0 comments on commit cff01be

Please sign in to comment.