Skip to content

Commit

Permalink
Merge pull request #121 from zvigrinberg/fix/improve-python-performance
Browse files Browse the repository at this point in the history
feat: fix performance issue in python version >= 3.11.X
Fixes: https://issues.redhat.com/browse/APPENG-2154
  • Loading branch information
zvigrinberg authored Mar 13, 2024
2 parents 860a955 + 1fc3495 commit 0a02893
Show file tree
Hide file tree
Showing 4 changed files with 163 additions and 62 deletions.
9 changes: 5 additions & 4 deletions src/analysis.js
Original file line number Diff line number Diff line change
Expand Up @@ -111,10 +111,11 @@ async function validateToken(url, opts = {}) {
...getTokenHeaders(opts),
}
})
let exRequestId = resp.headers.get("ex-request-id");
if(exRequestId)
{
console.log("Unique Identifier associated with this request - ex-request-id=" + exRequestId)
if (process.env["EXHORT_DEBUG"] === "true") {
let exRequestId = resp.headers.get("ex-request-id");
if (exRequestId) {
console.log("Unique Identifier associated with this request - ex-request-id=" + exRequestId)
}
}
return resp.status
}
Expand Down
121 changes: 98 additions & 23 deletions src/providers/python_controller.js
Original file line number Diff line number Diff line change
Expand Up @@ -161,24 +161,54 @@ export default class Python_controller {
}
#getDependenciesImpl(includeTransitive) {
let dependencies = new Array()
let freezeOutput = getPipFreezeOutput.call(this);
//debug
// freezeOutput = "alternative pip freeze output goes here for debugging"
let lines = freezeOutput.split(EOL)
let depNames = lines.map( line => getDependencyName(line)).join(" ")
let pipShowOutput = getPipShowOutput.call(this, depNames);
let usePipDepTree = getCustom("EXHORT_PIP_USE_DEP_TREE","false",this.options);
let freezeOutput
let lines
let depNames
let pipShowOutput
let allPipShowDeps
let pipDepTreeJsonArrayOutput
if(usePipDepTree !== "true") {
freezeOutput = getPipFreezeOutput.call(this);
lines = freezeOutput.split(EOL)
depNames = lines.map( line => getDependencyName(line)).join(" ")
}
else {
pipDepTreeJsonArrayOutput = getDependencyTreeJsonFromPipDepTree(this.pathToPipBin,this.pathToPythonBin)
}


if(usePipDepTree !== "true") {
pipShowOutput = getPipShowOutput.call(this, depNames);
allPipShowDeps = pipShowOutput.split( EOL +"---" + EOL);
}
//debug
// pipShowOutput = "alternative pip show output goes here for debugging"
let allPipShowDeps = pipShowOutput.split( EOL +"---" + EOL);

let matchManifestVersions = getCustom("MATCH_MANIFEST_VERSIONS","true",this.options);
let linesOfRequirements = fs.readFileSync(this.pathToRequirements).toString().split(EOL).filter( (line) => !line.startsWith("#")).map(line => line.trim())
let CachedEnvironmentDeps = {}
allPipShowDeps.forEach( (record) => {
let dependencyName = getDependencyNameShow(record).toLowerCase()
CachedEnvironmentDeps[dependencyName] = record
CachedEnvironmentDeps[dependencyName.replace("-","_")] = record
CachedEnvironmentDeps[dependencyName.replace("_","-")] = record
})
if(usePipDepTree !== "true") {
allPipShowDeps.forEach((record) => {
let dependencyName = getDependencyNameShow(record).toLowerCase()
CachedEnvironmentDeps[dependencyName] = record
CachedEnvironmentDeps[dependencyName.replace("-", "_")] = record
CachedEnvironmentDeps[dependencyName.replace("_", "-")] = record
})
}
else {
pipDepTreeJsonArrayOutput.forEach( depTreeEntry => {
let packageName = depTreeEntry["package"]["package_name"].toLowerCase()
let pipDepTreeEntryForCache = {
name: packageName,
version: depTreeEntry["package"]["installed_version"],
dependencies: depTreeEntry["dependencies"].map(dep => dep["package_name"])
};
CachedEnvironmentDeps[packageName] = pipDepTreeEntryForCache
CachedEnvironmentDeps[packageName.replace("-", "_")] = pipDepTreeEntryForCache
CachedEnvironmentDeps[packageName.replace("_", "-")] = pipDepTreeEntryForCache
})
}
linesOfRequirements.forEach( (dep) => {
// if matchManifestVersions setting is turned on , then
if(matchManifestVersions === "true")
Expand All @@ -199,7 +229,12 @@ export default class Python_controller {
dependencyName = getDependencyName(dep)
// only compare between declared version in manifest to installed version , if the package is installed.
if(CachedEnvironmentDeps[dependencyName.toLowerCase()] !== undefined) {
installedVersion = getDependencyVersion(CachedEnvironmentDeps[dependencyName.toLowerCase()])
if(usePipDepTree !== "true") {
installedVersion = getDependencyVersion(CachedEnvironmentDeps[dependencyName.toLowerCase()])
}
else {
installedVersion = CachedEnvironmentDeps[dependencyName.toLowerCase()].version
}
}
if(installedVersion) {
if (manifestVersion.trim() !== installedVersion.trim()) {
Expand All @@ -213,7 +248,7 @@ export default class Python_controller {
let depName = getDependencyName(dep)
//array to track a path for each branch in the dependency tree
path.push(depName.toLowerCase())
bringAllDependencies(dependencies,depName,CachedEnvironmentDeps,includeTransitive,path)
bringAllDependencies(dependencies,depName,CachedEnvironmentDeps,includeTransitive,path,usePipDepTree)
})
dependencies.sort((dep1,dep2) =>{
const DEP1 = dep1.name.toLowerCase()
Expand Down Expand Up @@ -285,25 +320,36 @@ function getDepsList(record) {
* @param dependencyName
* @param cachedEnvironmentDeps
* @param includeTransitive
* @param usePipDepTree
* @param {[string]}path array representing the path of the current branch in dependency tree, starting with a root dependency - that is - a given dependency in requirements.txt
*/
function bringAllDependencies(dependencies, dependencyName, cachedEnvironmentDeps, includeTransitive,path) {
function bringAllDependencies(dependencies, dependencyName, cachedEnvironmentDeps, includeTransitive, path, usePipDepTree) {
if(dependencyName === null || dependencyName === undefined || dependencyName.trim() === "" ) {
return
}
let record = cachedEnvironmentDeps[dependencyName.toLowerCase()]
if(record === null || record === undefined) {
throw new Error(`Package name=>${dependencyName} is not installed on your python environment,
either install it ( better to install requirements.txt altogether) or turn on
environment variable EXHORT_PYTHON_VIRTUAL_ENV=true to automatically installs
either install it ( better to install requirements.txt altogether) or set
setting EXHORT_PYTHON_VIRTUAL_ENV=true to automatically installs
it on virtual environment ( will slow down the analysis) `)
}

let version = getDependencyVersion(record)
let directDeps = getDepsList(record)
let depName
let version;
let directDeps
if(usePipDepTree !== "true") {
depName = getDependencyNameShow(record)
version = getDependencyVersion(record);
directDeps = getDepsList(record)
}
else {
depName = record.name
version = record.version
directDeps = record.dependencies
}
let targetDeps = new Array()

let entry = { "name" : getDependencyNameShow(record) , "version" : version, "dependencies" : [] }
let entry = { "name" : depName , "version" : version, "dependencies" : [] }
dependencies.push(entry)
directDeps.forEach( (dep) => {
let depArray = new Array()
Expand All @@ -313,7 +359,7 @@ function bringAllDependencies(dependencies, dependencyName, cachedEnvironmentDep
depArray.push(dep.toLowerCase())
if (includeTransitive) {
// send to recurrsion the array of all deps in path + the current dependency name which is not on the path.
bringAllDependencies(targetDeps, dep, cachedEnvironmentDeps, includeTransitive,path.concat(depArray))
bringAllDependencies(targetDeps, dep, cachedEnvironmentDeps, includeTransitive, path.concat(depArray), usePipDepTree)
}
}
// sort ra
Expand All @@ -332,3 +378,32 @@ function bringAllDependencies(dependencies, dependencyName, cachedEnvironmentDep
entry["dependencies"] = targetDeps
})
}

/**
* This function install tiny pipdeptree tool using pip ( if it's not already installed on python environment), and use it to fetch the dependency tree in json format.
* @param {string }pipPath - the filesystem path location of pip binary
* @param {string }pythonPath - the filesystem path location of python binary
* @return {Object[] } json array containing objects with the packages and their dependencies from pipdeptree utility
* @private
*/
function getDependencyTreeJsonFromPipDepTree(pipPath,pythonPath) {
let dependencyTree
try {
execSync(`${pipPath} install pipdeptree`)
} catch (e) {
throw new Error(`Couldn't install pipdeptree utility, reason: ${e.getMessage}`)
}

try {
if(pythonPath.startsWith("python")) {
dependencyTree = execSync(`pipdeptree --json`).toString()
}
else {
dependencyTree = execSync(`pipdeptree --json --python ${pythonPath} `).toString()
}
} catch (e) {
throw new Error(`couldn't produce dependency tree using pipdeptree tool, stop analysis, message -> ${e.getMessage}`)
}

return JSON.parse(dependencyTree)
}
2 changes: 1 addition & 1 deletion src/providers/python_pip.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ export default { isSupported, provideComponent, provideStack }

const dummyVersionNotation = "dummy*=#?";

/** @typedef {{name: string, , version: string, dependencies: DependencyEntry[]}} DependencyEntry */
/** @typedef {{name: string, version: string, dependencies: DependencyEntry[]}} DependencyEntry */

/**
* @type {string} ecosystem for python-pip is 'pip'
Expand Down
93 changes: 59 additions & 34 deletions test/providers/python_pip.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,48 @@ import {getCustomPath } from "../../src/tools.js"


let clock

async function sharedComponentAnalysisTestFlow(testCase,usePipDepTreeUtility) {
// load the expected list for the scenario
let expectedSbom = fs.readFileSync(`test/providers/tst_manifests/pip/${testCase}/expected_component_sbom.json`,).toString().trim()
expectedSbom = JSON.stringify(JSON.parse(expectedSbom))
// read target manifest file
let manifestContent = fs.readFileSync(`test/providers/tst_manifests/pip/${testCase}/requirements.txt`).toString()
// invoke sut stack analysis for scenario manifest
let opts = { "EXHORT_PIP_USE_DEP_TREE" : usePipDepTreeUtility }
let providedDatForComponent = await pythonPip.provideComponent(manifestContent,opts)
// verify returned data matches expectation
expect(providedDatForComponent).to.deep.equal({
ecosystem: 'pip',
contentType: 'application/vnd.cyclonedx+json',
content: expectedSbom
})
}

async function sharedStackAnalysisTestFlow(testCase,usePipDepTreeUtility) {
// load the expected graph for the scenario
let expectedSbom = fs.readFileSync(`test/providers/tst_manifests/pip/${testCase}/expected_stack_sbom.json`,).toString()
expectedSbom = JSON.stringify(JSON.parse(expectedSbom))
// invoke sut stack analysis for scenario manifest
let pipPath = getCustomPath("pip3");
execSync(`${pipPath} install -r test/providers/tst_manifests/pip/${testCase}/requirements.txt`, err => {
if (err) {
throw new Error('fail installing requirements.txt manifest in created virtual python environment --> ' + err.message)
}
})
let opts = { "EXHORT_PIP_USE_DEP_TREE" : usePipDepTreeUtility }
let providedDataForStack = await pythonPip.provideStack(`test/providers/tst_manifests/pip/${testCase}/requirements.txt`,opts)
// new(year: number, month: number, date?: number, hours?: number, minutes?: number, seconds?: number, ms?: number): Date

// providedDataForStack.content = providedDataForStack.content.replaceAll("\"timestamp\":\"[a-zA-Z0-9\\-\\:]+\"","")
// verify returned data matches expectation
expect(providedDataForStack).to.deep.equal({
ecosystem: 'pip',
contentType: 'application/vnd.cyclonedx+json',
content: expectedSbom
})
}

suite('testing the python-pip data provider', () => {
[
{name: 'requirements.txt', expected: true},
Expand All @@ -24,45 +66,28 @@ suite('testing the python-pip data provider', () => {
].forEach(testCase => {
let scenario = testCase.replace('pip_requirements_', '').replaceAll('_', ' ')
test(`verify requirements.txt sbom provided for stack analysis with scenario ${scenario}`, async () => {
// load the expected graph for the scenario
let expectedSbom = fs.readFileSync(`test/providers/tst_manifests/pip/${testCase}/expected_stack_sbom.json`,).toString()
expectedSbom = JSON.stringify(JSON.parse(expectedSbom))
// invoke sut stack analysis for scenario manifest
let pipPath = getCustomPath("pip3");
execSync(`${pipPath} install -r test/providers/tst_manifests/pip/${testCase}/requirements.txt`, err => {
if (err) {
throw new Error('fail installing requirements.txt manifest in created virtual python environment --> ' + err.message)
}
})
let providedDataForStack = await pythonPip.provideStack(`test/providers/tst_manifests/pip/${testCase}/requirements.txt`)
// new(year: number, month: number, date?: number, hours?: number, minutes?: number, seconds?: number, ms?: number): Date

// providedDataForStack.content = providedDataForStack.content.replaceAll("\"timestamp\":\"[a-zA-Z0-9\\-\\:]+\"","")
// verify returned data matches expectation
expect(providedDataForStack).to.deep.equal({
ecosystem: 'pip',
contentType: 'application/vnd.cyclonedx+json',
content: expectedSbom
})
// these test cases takes ~2500-2700 ms each pr >10000 in CI (for the first test-case)
await sharedStackAnalysisTestFlow(testCase,false);
// these test cases takes ~2500-2700 ms each pr >10000 in CI (for the first test-case)
}).timeout(process.env.GITHUB_ACTIONS ? 30000 : 10000)

test(`verify requirements.txt sbom provided for component analysis with scenario ${scenario}`, async () => {
// load the expected list for the scenario
let expectedSbom = fs.readFileSync(`test/providers/tst_manifests/pip/${testCase}/expected_component_sbom.json`,).toString().trim()
expectedSbom = JSON.stringify(JSON.parse(expectedSbom))
// read target manifest file
let manifestContent = fs.readFileSync(`test/providers/tst_manifests/pip/${testCase}/requirements.txt`).toString()
// invoke sut stack analysis for scenario manifest
let providedDatForComponent = await pythonPip.provideComponent(manifestContent)
// verify returned data matches expectation
expect(providedDatForComponent).to.deep.equal({
ecosystem: 'pip',
contentType: 'application/vnd.cyclonedx+json',
content: expectedSbom
})
await sharedComponentAnalysisTestFlow(testCase,false);
// these test cases takes ~1400-2000 ms each pr >10000 in CI (for the first test-case)
}).timeout(process.env.GITHUB_ACTIONS ? 15000 : 10000)

test(`verify requirements.txt sbom provided for stack analysis using pipdeptree utility with scenario ${scenario}`, async () => {
await sharedStackAnalysisTestFlow(testCase,true);
// these test cases takes ~2500-2700 ms each pr >10000 in CI (for the first test-case)
}).timeout(process.env.GITHUB_ACTIONS ? 30000 : 10000)

test(`verify requirements.txt sbom provided for component analysis using pipdeptree utility with scenario ${scenario}`, async () => {
await sharedComponentAnalysisTestFlow(testCase,true);
// these test cases takes ~1400-2000 ms each pr >10000 in CI (for the first test-case)
}).timeout(process.env.GITHUB_ACTIONS ? 15000 : 10000)




});

}).beforeAll(() => clock = sinon.useFakeTimers(new Date('2023-10-01T00:00:00.000Z'))).afterAll(()=> clock.restore());
Expand Down

0 comments on commit 0a02893

Please sign in to comment.