Skip to content

Commit

Permalink
fix(datasource/pypi): handle non-normalized package names for pypi si…
Browse files Browse the repository at this point in the history
…mple lookup (#30716)

Co-authored-by: Norbert Szulc <norbert@not7cd.net>
  • Loading branch information
Shegox and not7cd committed Aug 14, 2024
1 parent 833df8e commit 5ff0778
Show file tree
Hide file tree
Showing 5 changed files with 178 additions and 53 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<!DOCTYPE html>
<html><head>
<meta http-equiv="content-type" content="text/html; charset=UTF-8">
<title>Links for invalid-version</title>
</head>
<body>
<h1>Links for invalid-version</h1>
<a href="">invalid-version-2.0.0.customZip</a><br>
<a href="">invalid-version-2.0.1.whl</a><br>
<a href="">invalid-version-2.0.2</a><br>
</body></html>
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@

<!DOCTYPE html>
<html>
<head>
<meta name="pypi:repository-version" content="1.1">
<title>Links for snowflake-legacy</title>
</head>
<body>
<h1>Links for snowflake-legacy</h1>
<a href="https://files.pythonhosted.org/packages/86/dc/cb863039b01a9fa0627f00d7b751277f8c58c5214753943040dd70b7cefd/snowflake_legacy-0.3.0-py3-none-any.whl#sha256=ce59a956dccdbd8fe9cf2890cafef87d8213ee867f757adcfdc4b1ed642df325" data-requires-python="&lt;3.11,&gt;=3.8" data-dist-info-metadata="sha256=69da4682935afcaaa807279a8686996eb5bb735f854142bdc2fe2d05cbf074e1" data-core-metadata="sha256=69da4682935afcaaa807279a8686996eb5bb735f854142bdc2fe2d05cbf074e1">snowflake_legacy-0.3.0-py3-none-any.whl</a><br />
<a href="https://files.pythonhosted.org/packages/85/30/e00b8c381602de12b17b56c9c26c933b59b88a4284df38b39134e3c2e668/snowflake_legacy-0.3.0.tar.gz#sha256=9940cacff79b542b8480a5adb3c463d4ac0e9a1320f518b00664ee8b3adcd559" data-requires-python="&lt;3.11,&gt;=3.8" >snowflake_legacy-0.3.0.tar.gz</a><br />
<a href="https://files.pythonhosted.org/packages/62/6f/afcf46c8161f8ee12164820c9be2d07958a05d18ee128324d4e45b57cb9a/snowflake_legacy-0.4.0-py3-none-any.whl#sha256=af70468f73ccac77aca40042386fec8c6fae0f27046d1bf4f116e5020ede3fdb" data-requires-python="&lt;3.11,&gt;=3.8" data-dist-info-metadata="sha256=0c4a387857b5f407d4be3f47805be2fba0387d632cd69553584082fd37814b5a" data-core-metadata="sha256=0c4a387857b5f407d4be3f47805be2fba0387d632cd69553584082fd37814b5a">snowflake_legacy-0.4.0-py3-none-any.whl</a><br />
<a href="https://files.pythonhosted.org/packages/19/0c/9d0b6b076bd1b28b59322b9ba8a677a0aa33bd19443121ade49eae861a46/snowflake_legacy-0.4.0.tar.gz#sha256=502a0dbdb2976cebc678aee257c5b20612050ae2f1397a29f501fd8c466037cf" data-requires-python="&lt;3.11,&gt;=3.8" >snowflake_legacy-0.4.0.tar.gz</a><br />
<a href="https://files.pythonhosted.org/packages/db/80/61730dd5c387fc9d18fe3c21fca8b98bc00deb3de7dfcdb5324ee9059320/snowflake_legacy-0.5.0-py3-none-any.whl#sha256=0ccc59e012f2758c308b133e8aa26337848d7fa6a23ae9fa8400cf4a863b0e44" data-requires-python="&lt;3.12,&gt;=3.8" data-dist-info-metadata="sha256=5253c2ad694e9a3594adb77fa2336f5bbc4ccd9bf3d0ee6a5e21afae6613aab4" data-core-metadata="sha256=5253c2ad694e9a3594adb77fa2336f5bbc4ccd9bf3d0ee6a5e21afae6613aab4">snowflake_legacy-0.5.0-py3-none-any.whl</a><br />
<a href="https://files.pythonhosted.org/packages/bd/46/751cd520992330f6b93290ec5ac045e4990300a912e9f658f03469c76aa2/snowflake_legacy-0.5.0.tar.gz#sha256=cfee13233c218154b79ee663e487fd90676e7feaee3d4e33087439fa3c5eddf8" data-requires-python="&lt;3.12,&gt;=3.8" >snowflake_legacy-0.5.0.tar.gz</a><br />
<a href="https://files.pythonhosted.org/packages/a8/88/26a8f562cda49d1b2aa65bddf30701d9b7f350b179257a6ee6cc16c11f5b/snowflake_legacy-0.7.0-py3-none-any.whl#sha256=1a28ac196f776a2ae2e017151448fbdd2fa51ae61e22291b48b90b757ee3b425" data-requires-python="&lt;3.12,&gt;=3.8" data-dist-info-metadata="sha256=d600e41a421f7cde4ef4da4839538a84adf6a0c7ab3db52be9f23cd8a64bb312" data-core-metadata="sha256=d600e41a421f7cde4ef4da4839538a84adf6a0c7ab3db52be9f23cd8a64bb312">snowflake_legacy-0.7.0-py3-none-any.whl</a><br />
<a href="https://files.pythonhosted.org/packages/a1/83/7f233601a56e93106bc79dd692f8b9b67f48358f42fbc05d7e88f6a62d3b/snowflake_legacy-0.7.0.tar.gz#sha256=f29f578d06ba037727cb58d988cfbd45b996a45d390b32fa6bb5c15c4178312d" data-requires-python="&lt;3.12,&gt;=3.8" >snowflake_legacy-0.7.0.tar.gz</a><br />
<a href="https://files.pythonhosted.org/packages/75/c9/bf69f07cf4f23fba13bd79e8e93b3b8fa7914fa8e0ef66e42332cc02b6d5/snowflake_legacy-0.8.0-py3-none-any.whl#sha256=8db39572dcb79782fb6561960cd0988b5b9788e16c03a947c4e8304558140771" data-requires-python="&lt;3.12,&gt;=3.8" data-dist-info-metadata="sha256=dec3f6e091f5dd4b5d44f3588fed71895a0164231ff9db3e97c6bd15518a1b56" data-core-metadata="sha256=dec3f6e091f5dd4b5d44f3588fed71895a0164231ff9db3e97c6bd15518a1b56">snowflake_legacy-0.8.0-py3-none-any.whl</a><br />
<a href="https://files.pythonhosted.org/packages/16/cc/d3b5eb6d14c02b99bfeca54a2f8df6f5ad617359bcfe23c9a7d04ef25560/snowflake_legacy-0.8.0.tar.gz#sha256=37096bb0020a52d641683ac8a705122dcae4b79b45ad3289d115dd82ba102af5" data-requires-python="&lt;3.12,&gt;=3.8" >snowflake_legacy-0.8.0.tar.gz</a><br />
<a href="https://files.pythonhosted.org/packages/6d/81/c90970eb8f9f3b195a50375dab3a1f0960054e2b1f553f62519f44a846e2/snowflake_legacy-0.8.1-py3-none-any.whl#sha256=62fe03c8f6b39f0cca4a18e478e664c35c513dd8157343ba67357c5104e69de7" data-requires-python="&lt;3.12,&gt;=3.8" data-dist-info-metadata="sha256=1e6c9ca0616c372dc56e72c02b17e596851001bd1225369b8b680785e21519a0" data-core-metadata="sha256=1e6c9ca0616c372dc56e72c02b17e596851001bd1225369b8b680785e21519a0">snowflake_legacy-0.8.1-py3-none-any.whl</a><br />
<a href="https://files.pythonhosted.org/packages/00/20/c589328c73f085e7557fd698239515bcba0aeda2d8e389c0b88eb3e8af37/snowflake_legacy-0.8.1.tar.gz#sha256=76b9e5bb51aba0bbbc402f1d691436941124110d58cc249cddd1c38a92095331" data-requires-python="&lt;3.12,&gt;=3.8" >snowflake_legacy-0.8.1.tar.gz</a><br />
<a href="https://files.pythonhosted.org/packages/5f/6d/c516c8d8569304370b19fe3dd50921b803e932b7398cc71a90bcba125f62/snowflake_legacy-0.9.0-py3-none-any.whl#sha256=135a56afe8ed4e7859620f1718063bc028650e32562e2442f2c0af7897acb1d2" data-requires-python="&lt;3.12,&gt;=3.8" data-dist-info-metadata="sha256=779accce3d8f1a7a159d6e26444d3bac238fa739678a2d788211067f66fc74f3" data-core-metadata="sha256=779accce3d8f1a7a159d6e26444d3bac238fa739678a2d788211067f66fc74f3">snowflake_legacy-0.9.0-py3-none-any.whl</a><br />
<a href="https://files.pythonhosted.org/packages/68/45/804f173498ae5febbe2236848a116977a7c9fefa40a783a0fe0ed2b094c9/snowflake_legacy-0.9.0.tar.gz#sha256=e032a66e39c5dd2fdea756d9f28c2560f6fc321ef96afde9b7f9c5b33fb5adf0" data-requires-python="&lt;3.12,&gt;=3.8" >snowflake_legacy-0.9.0.tar.gz</a><br />
<a href="https://files.pythonhosted.org/packages/92/f8/e3d2f7f6c60a3471583d1b29940f902b300a1585d50ab20e951ad4d240b1/snowflake_legacy-0.10.0-py3-none-any.whl#sha256=66f9e3f2a19bf4ff41212506980f65bd7e3fdd11cf1723f77377c255da272ab5" data-requires-python="&lt;3.12,&gt;=3.8" data-dist-info-metadata="sha256=a6d219b7dd2858f5e63fa13c15df1a9f66b0804f3e02cfcb13e68db747ac41e7" data-core-metadata="sha256=a6d219b7dd2858f5e63fa13c15df1a9f66b0804f3e02cfcb13e68db747ac41e7">snowflake_legacy-0.10.0-py3-none-any.whl</a><br />
<a href="https://files.pythonhosted.org/packages/7e/bb/32a559919ae29b2f5bbd7171a365916930d16e75046cff69b940868840dc/snowflake_legacy-0.10.0.tar.gz#sha256=94ebe66f9a45ac7c41f989c999fd36bf9dfaad15089565981e42aca6faef4b85" data-requires-python="&lt;3.12,&gt;=3.8" >snowflake_legacy-0.10.0.tar.gz</a><br />
<a href="https://files.pythonhosted.org/packages/92/2c/c37dd17e52442b0ff519ae55ed251ba5d421e0785892cba52540e6916610/snowflake_legacy-0.11.0-py3-none-any.whl#sha256=299cf923a26edf2566bda628e5f64458a7c10f19de24a93dd57d4de8a9032e0f" data-requires-python="&lt;3.12,&gt;=3.8" data-dist-info-metadata="sha256=b93d56b2179a74bb05aeda4fe3803244bdafe8e58868e5a20d28f3ea98b4f1d1" data-core-metadata="sha256=b93d56b2179a74bb05aeda4fe3803244bdafe8e58868e5a20d28f3ea98b4f1d1">snowflake_legacy-0.11.0-py3-none-any.whl</a><br />
<a href="https://files.pythonhosted.org/packages/33/21/8d43f1567ef3404ed76f955b81ae90c72539eab39b1fa915e8ec205c48c8/snowflake_legacy-0.11.0.tar.gz#sha256=37f9e99ae05b6b73bfd7e40ecec6c4ae4494fd0cd53464e728145a93c13dddb4" data-requires-python="&lt;3.12,&gt;=3.8" >snowflake_legacy-0.11.0.tar.gz</a><br />
</body>
</html>
<!--SERIAL 24295561-->
<!-- downloaded from https://pypi.org/simple/snowflake-legacy/ -->
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<!DOCTYPE html>
<html><head>
<meta http-equiv="content-type" content="text/html; charset=UTF-8">
<title>Links for FrIeNdLy-._.-bArD</title>
</head>
<body>
<h1>Links for FrIeNdLy-._.-bArD</h1>
<a href="">FrIeNdLy-._.-bArD-2.0.0.tar.gz</a><br>
<a href="">FrIeNdLy-._.-bArD-2.0.1-py3-none-any.whl</a><br>
<a href="">FrIeNdLy-._.-bArD-2.0.2-py3-none-any.whl</a><br>
<a href="">FrIeNdLy-._.-bArD-2.0.2.tar.gz</a><br>
<a href="">friendly_bard-2.0.3.tar.gz</a><br>
<a href="">friendly_bard-2.0.4-py3-none-any.whl</a><br>
</body></html>
112 changes: 94 additions & 18 deletions lib/modules/datasource/pypi/index.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,9 @@ import * as hostRules from '../../../util/host-rules';
import { PypiDatasource } from '.';

const res1 = Fixtures.get('azure-cli-monitor.json');
const res2 = Fixtures.get('azure-cli-monitor-updated.json');
const htmlResponse = Fixtures.get('versions-html.html');
const badResponse = Fixtures.get('versions-html-badfile.html');
const dataRequiresPythonResponse = Fixtures.get(
'versions-html-data-requires-python.html',
);
const mixedHyphensResponse = Fixtures.get('versions-html-mixed-hyphens.html');
const mixedCaseResponse = Fixtures.get('versions-html-mixed-case.html');
const withPeriodsResponse = Fixtures.get('versions-html-with-periods.html');
const withWhitespacesResponse = Fixtures.get(
'versions-html-with-whitespaces.html',
);
const hyphensResponse = Fixtures.get('versions-html-hyphens.html');
const zipResponse = Fixtures.get('versions-archives.html');

const baseUrl = 'https://pypi.org/pypi';
const datasource = PypiDatasource.id;
Expand Down Expand Up @@ -117,7 +106,7 @@ describe('modules/datasource/pypi/index', () => {
httpMock
.scope('https://third-index/foo')
.get('/azure-cli-monitor/json')
.reply(200, res2);
.reply(200, Fixtures.get('azure-cli-monitor-updated.json'));
const config = {
registryUrls: [
'https://custom.pypi.net/foo',
Expand Down Expand Up @@ -340,7 +329,7 @@ describe('modules/datasource/pypi/index', () => {
httpMock
.scope('https://some.registry.org/simple/')
.get('/package-with-hyphens/')
.reply(200, hyphensResponse);
.reply(200, Fixtures.get('versions-html-hyphens.html'));
const config = {
registryUrls: ['https://some.registry.org/simple/'],
};
Expand All @@ -360,7 +349,7 @@ describe('modules/datasource/pypi/index', () => {
httpMock
.scope('https://some.registry.org/simple/')
.get('/company-aws-sso-client/')
.reply(200, zipResponse);
.reply(200, Fixtures.get('versions-archives.html'));
const config = {
registryUrls: ['https://some.registry.org/simple/'],
};
Expand All @@ -379,7 +368,7 @@ describe('modules/datasource/pypi/index', () => {
httpMock
.scope('https://some.registry.org/simple/')
.get('/image-collector/')
.reply(200, mixedHyphensResponse);
.reply(200, Fixtures.get('versions-html-mixed-hyphens.html'));
const config = {
registryUrls: ['https://some.registry.org/simple/'],
};
Expand Down Expand Up @@ -453,11 +442,98 @@ describe('modules/datasource/pypi/index', () => {
]);
});

it('process data from simple endpoint with periods when using normalized name', async () => {
httpMock
.scope('https://some.registry.org/simple/')
.get('/package-with-periods/')
.reply(200, withPeriodsResponse);
const config = {
registryUrls: ['https://some.registry.org/simple/'],
};
const res = await getPkgReleases({
datasource,
...config,
packageName: 'package-with-periods',
});
expect(res?.releases).toMatchObject([
{ version: '2.0.0' },
{ version: '2.0.1' },
{ version: '2.0.2' },
]);
});

it('process data from simple endpoint for snowflake-legacy', async () => {
httpMock
.scope('https://some.registry.org/simple/')
.get('/snowflake-legacy/')
.reply(200, Fixtures.get('versions-html-snowflake-legacy.html'));
const config = {
registryUrls: ['https://some.registry.org/simple/'],
};
const res = await getPkgReleases({
datasource,
...config,
packageName: 'snowflake-legacy',
});
expect(res?.releases).toMatchObject([
{ version: '0.3.0' },
{ version: '0.4.0' },
{ version: '0.5.0' },
{ version: '0.7.0' },
{ version: '0.8.0' },
{ version: '0.8.1' },
{ version: '0.9.0' },
{ version: '0.10.0' },
{ version: '0.11.0' },
]);
});

it('ignores invalid distribution file name formats', async () => {
httpMock
.scope('https://some.registry.org/simple/')
.get('/invalid-version/')
.reply(200, Fixtures.get('versions-html-invalid-version.html'));
const config = {
registryUrls: ['https://some.registry.org/simple/'],
};
const res = await getPkgReleases({
datasource,
...config,
packageName: 'invalid-version',
});
expect(res?.releases).toMatchObject([]);
});

it('process data from simple endpoint with non normalized name', async () => {
httpMock
.scope('https://some.registry.org/simple/')
.get('/friendly-bard/')
.reply(
200,
Fixtures.get('versions-html-with-non-normalized-name.html'),
);
const config = {
registryUrls: ['https://some.registry.org/simple/'],
};
const res = await getPkgReleases({
datasource,
...config,
packageName: 'friendly-bard',
});
expect(res?.releases).toMatchObject([
{ version: '2.0.0' },
{ version: '2.0.1' },
{ version: '2.0.2' },
{ version: '2.0.3' },
{ version: '2.0.4' },
]);
});

it('process data from simple endpoint with extra whitespaces in html', async () => {
httpMock
.scope('https://some.registry.org/simple/')
.get('/package-with-whitespaces/')
.reply(200, withWhitespacesResponse);
.reply(200, Fixtures.get('versions-html-with-whitespaces.html'));
const config = {
registryUrls: ['https://some.registry.org/simple/'],
};
Expand Down Expand Up @@ -513,7 +589,7 @@ describe('modules/datasource/pypi/index', () => {
httpMock
.scope('https://some.registry.org/simple/')
.get('/dj-database-url/')
.reply(200, badResponse);
.reply(200, Fixtures.get('versions-html-badfile.html'));
const config = {
registryUrls: ['https://some.registry.org/simple/'],
};
Expand Down Expand Up @@ -551,7 +627,7 @@ describe('modules/datasource/pypi/index', () => {
httpMock
.scope('https://some.registry.org/simple/')
.get('/dj-database-url/')
.reply(200, dataRequiresPythonResponse);
.reply(200, Fixtures.get('versions-html-data-requires-python.html'));
const config = {
registryUrls: ['https://some.registry.org/simple/'],
};
Expand Down
63 changes: 28 additions & 35 deletions lib/modules/datasource/pypi/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ export class PypiDatasource extends Datasource {
const hostUrl = ensureTrailingSlash(
registryUrl!.replace('https://pypi.org/simple', 'https://pypi.org/pypi'),
);
const normalizedLookupName = PypiDatasource.normalizeName(packageName);
const normalizedLookupName = normalizePythonDepName(packageName);

// not all simple indexes use this identifier, but most do
if (hostUrl.endsWith('/simple/') || hostUrl.endsWith('/+simple/')) {
Expand Down Expand Up @@ -82,10 +82,6 @@ export class PypiDatasource extends Datasource {
return dependency;
}

private static normalizeName(input: string): string {
return input.toLowerCase().replace(regEx(/_/g), '-');
}

private async getDependency(
packageName: string,
hostUrl: string,
Expand Down Expand Up @@ -180,42 +176,39 @@ export class PypiDatasource extends Datasource {
packageName: string,
): string | null {
// source packages
const srcText = PypiDatasource.normalizeName(text);
const lcText = text.toLowerCase();
const normalizedSrcText = normalizePythonDepName(text);
const srcPrefix = `${packageName}-`;

// source distribution format: `{name}-{version}.tar.gz` (https://packaging.python.org/en/latest/specifications/source-distribution-format/#source-distribution-file-name)
// binary distribution: `{distribution}-{version}(-{build tag})?-{python tag}-{abi tag}-{platform tag}.whl` (https://packaging.python.org/en/latest/specifications/binary-distribution-format/#file-name-convention)
// officially both `name` and `distribution` should be normalized and then the - replaced with _, but in reality this is not the case
// We therefore normalize the name we have (replacing `_-.` with -) and then check if the text starts with the normalized name

if (!normalizedSrcText.startsWith(srcPrefix)) {
return null;
}

// strip off the prefix using the prefix length as we may have normalized the srcPrefix/packageName
// We assume that neither the version nor the suffix contains multiple `-` like `0.1.2---rc1.tar.gz`
// and use the difference in length to strip off the prefix in case the name contains double `--` characters
const normalizedLengthDiff = lcText.length - normalizedSrcText.length;
const res = lcText.slice(srcPrefix.length + normalizedLengthDiff);

// source distribution
const srcSuffixes = ['.tar.gz', '.tar.bz2', '.tar.xz', '.zip', '.tgz'];
if (
srcText.startsWith(srcPrefix) &&
srcSuffixes.some((srcSuffix) => srcText.endsWith(srcSuffix))
) {
const res = srcText.replace(srcPrefix, '');
for (const suffix of srcSuffixes) {
if (res.endsWith(suffix)) {
// strip off the suffix using character length
return res.slice(0, -suffix.length);
}
}
const srcSuffix = srcSuffixes.find((suffix) => lcText.endsWith(suffix));
if (srcSuffix) {
// strip off the suffix using character length
return res.slice(0, -srcSuffix.length);
}

// pep-0427 wheel packages
// {distribution}-{version}(-{build tag})?-{python tag}-{abi tag}-{platform tag}.whl.
// Also match the current wheel spec
// https://packaging.python.org/en/latest/specifications/binary-distribution-format/#escaping-and-unicode
// where any of -_. characters in {distribution} are replaced with _
const wheelText = text.toLowerCase();
const wheelPrefixWithPeriod =
packageName.replace(regEx(/[^\w\d.]+/g), '_') + '-';
const wheelPrefixWithoutPeriod =
packageName.replace(regEx(/[^\w\d]+/g), '_') + '-';
// binary distribution
// for binary distributions the version is the first part after the removed distribution name
const wheelSuffix = '.whl';
if (
(wheelText.startsWith(wheelPrefixWithPeriod) ||
wheelText.startsWith(wheelPrefixWithoutPeriod)) &&
wheelText.endsWith(wheelSuffix) &&
wheelText.split('-').length > 2
) {
return wheelText.split('-')[1];
if (lcText.endsWith(wheelSuffix) && lcText.split('-').length > 2) {
return res.split('-')[0];
}

return null;
}

Expand Down

0 comments on commit 5ff0778

Please sign in to comment.