From 2d1b25e471d3a3db0f8b892c8ac32448b035ec0b Mon Sep 17 00:00:00 2001 From: Divya Goswami Date: Wed, 4 Sep 2024 15:38:27 +0100 Subject: [PATCH] [URGENT] fixes empty data loading (#226) * [URGENT] fixes empty data loading Signed-off-by: rachejazz * changed credentials Signed-off-by: rachejazz * removed json files references from classes Signed-off-by: rachejazz * removed json files references from config Signed-off-by: rachejazz * cleanup Signed-off-by: rachejazz * cleanup Signed-off-by: rachejazz * same host allthrought Signed-off-by: rachejazz * Update database_build.py Signed-off-by: Divya Goswami * Update database_build.py Signed-off-by: Divya Goswami --------- Signed-off-by: rachejazz Signed-off-by: Divya Goswami --- .env.example | 2 +- bin/database_build.py | 28 ++++----- src/classes/package_search.py | 115 ---------------------------------- src/main.py | 4 +- 4 files changed, 15 insertions(+), 134 deletions(-) diff --git a/.env.example b/.env.example index 241eb29..22c25b7 100644 --- a/.env.example +++ b/.env.example @@ -2,4 +2,4 @@ DB_HOST=localhost DB_USER=sdtreaduser DB_PASSWORD=UPDATEME -DB_NAME=sdtDB \ No newline at end of file +DB_NAME=sdtDB diff --git a/bin/database_build.py b/bin/database_build.py index f457c0e..e3776be 100755 --- a/bin/database_build.py +++ b/bin/database_build.py @@ -4,14 +4,20 @@ import pymysql import sys import os +from dotenv import load_dotenv +load_dotenv() # Load database name from the .env file sys.path.append('/opt/software-discovery-tool/src/config') import supported_distros SUPPORTED_DISTROS = supported_distros.SUPPORTED_DISTROS SDT_BASE = '/opt/software-discovery-tool' DATA_FILE_LOCATION = '%s/distro_data/data_files' % SDT_BASE -HOST = 'localhost' -DB_NAME = 'sdtDB' + +HOST = os.environ.get('DB_HOST') +USER = '' +PASSWORD = '' +DB_NAME = os.environ.get('DB_NAME') + def connectdb(username,password,database): conn = pymysql.connect(host=HOST,user=username,password=password) cur = conn.cursor() @@ -21,20 +27,12 @@ def connectdb(username,password,database): print("DB INITIATILIZED SUCCESSFULLY") def db_init(): - username = "" + username = USER + password = PASSWORD table_name = "" - if len(sys.argv)==2 and sys.argv[1]=='root': - username = sys.argv[1] - elif len(sys.argv)==2: - username = input("Enter username to use for connecting to MariaDB server : ") - table_name = sys.argv[1] - elif len(sys.argv)==3 and sys.argv[1]=='root': - username = sys.argv[1] - table_name = sys.argv[2] - else: - username = input("Enter username to use for connecting to MariaDB server : ") - password = input("Enter password for connecting to MariaDB server : ") + username = input("Enter privileged username to create/update SQL tables: ") + password = input("Enter password for privileged username: ") dbName = DB_NAME if table_name == "" or table_name == "all" or table_name == "All": @@ -46,9 +44,7 @@ def db_init(): def jsontosql(db,table,file,os,user,password): filepath = f'{DATA_FILE_LOCATION}/{file}.json' jsonFile = open(file=filepath) - #print(jsonFile) data = json.load(jsonFile) - #final_data = [dict(item, osName=os) for item in data] final_data = [] for item in data : if item : diff --git a/src/classes/package_search.py b/src/classes/package_search.py index 657363e..99a857b 100644 --- a/src/classes/package_search.py +++ b/src/classes/package_search.py @@ -19,8 +19,6 @@ class PackageSearch: package_data = {} - local_cache ={} - cache_keys = [] DISTRO_BIT_MAP = {} INSTANCE = None @@ -52,84 +50,12 @@ def loadSupportedDistros(cls): bitFlag += bitFlag return cls.DISTRO_BIT_MAP - @classmethod - def loadPackageData(cls): - ''' - Returns list of Packages in software-discovery-tool - ''' - - LOGGER.debug('loadPackageData: In loadSupportedDistros') - distro_data_file = '%s/cached_data.json' % cls.getDataFilePath() - try: - json_data = json.load(open(distro_data_file)) - except: - LOGGER.warn('loadPackageData: Loading cached distros data failed generating from scratch') - LOGGER.debug('loadPackageData: start writing distros data') - json_data = cls.preparePackageData() - cached_file = open(distro_data_file, 'w') - cached_file.write(json.dumps(json_data, indent=2, separators=(',', ': '))) - cached_file.close() - LOGGER.debug('loadPackageData: end writing distros data') - - LOGGER.debug('loadPackageData: Loading supported distros data') - - return json_data - - @classmethod - def preparePackageData(cls): - data_dir = cls.getDataFilePath() - package_info = []; - package_data = {}; - cachedPackage = {} - - for distroName in list(SUPPORTED_DISTROS.keys()): - for distroVersion in sorted(SUPPORTED_DISTROS[distroName].keys()): - distro_file = SUPPORTED_DISTROS[distroName][distroVersion] - - package_info = json.load(open('%s/%s' % (data_dir, distro_file))) - distro_file_name = distro_file - - for pkg in package_info: - try: - pkg_key = pkg["packageName"] + '_' + pkg["version"] - except Exception as ex: - LOGGER.error('preparePackageData: key not found for package %s' % str(ex)) - if pkg_key not in package_data: - cachedPackage = {} - cachedPackage["P"] = pkg["packageName"] - cachedPackage["S"] = cachedPackage["P"].lower().upper() - cachedPackage["V"] = pkg["version"] - if "description" in pkg: - cachedPackage["D"] = pkg["description"] - try: - cachedPackage["B"] = cls.DISTRO_BIT_MAP[distroName][distroVersion] - except Exception as e: - raise #This occurrs only if there is a problem with how SUPPORTED_DISTROS is configured in config py - - cachedPackage[distroName] = [distroVersion] - package_data[pkg_key] = cachedPackage - else: - if distroName not in package_data[pkg_key]: - package_data[pkg_key][distroName] = [distroVersion] - package_data[pkg_key]['B'] += cls.DISTRO_BIT_MAP[distroName][distroVersion] - else: - if distroVersion not in package_data[pkg_key][distroName]: - package_data[pkg_key][distroName].append(distroVersion) - package_data[pkg_key]['B'] += cls.DISTRO_BIT_MAP[distroName][distroVersion] - - json_data = list(package_data.values()) - - return json_data - @classmethod def get_instance(cls): LOGGER.debug('get_instance: In get_instance') if not cls.INSTANCE: cls.INSTANCE = PackageSearch() cls.INSTANCE.DISTRO_BIT_MAP = cls.loadSupportedDistros() - cls.INSTANCE.package_data = cls.loadPackageData() - cls.INSTANCE.local_cache = {} - cls.INSTANCE.cache_keys = [] LOGGER.debug('get_instance: Creating singleton instance in get_instance') return cls.INSTANCE @@ -170,50 +96,9 @@ def searchPackages(self, search_term, exact_match, search_bit_flag, page_number LOGGER.debug('searchPackages: search_packages_end_with : %s' % (search_packages_end_with)) LOGGER.debug('searchPackages: search_anywhere_in_packages : %s' % (search_anywhere_in_packages)) - cache_key = 'ck_%s_%s_%s' % (search_term, exact_match, search_bit_flag) - LOGGER.debug('searchPackages: Cache Key is : %s' % (cache_key)) - search_term = search_term.replace('*', '') search_term_ucase = search_term.upper() - preliminary_results = {} - if( (cache_key in self.INSTANCE.local_cache) == False ): - LOGGER.debug('searchPackages: Not available in cache, so make fresh search') - LOGGER.debug(self.INSTANCE.package_data) - if (exact_match == True): - LOGGER.debug('searchPackages: Doing exact search') - preliminary_results = [s for s in self.INSTANCE.package_data if s['P'] == search_term and (s['B'] & search_bit_flag) > 0] - elif search_anywhere_in_packages: - LOGGER.debug('searchPackages: Doing Anywhere Search') - preliminary_results = [s for s in self.INSTANCE.package_data if search_term_ucase in s['S'] and (s['B'] & search_bit_flag) > 0] - elif search_packages_begin_with: - LOGGER.debug('searchPackages: Find names that begin with') - preliminary_results = [s for s in self.INSTANCE.package_data if str(s['S']).startswith(search_term_ucase) and (s['B'] & search_bit_flag) > 0] - elif search_packages_end_with: - LOGGER.debug('searchPackages: Find names that end with') - preliminary_results = [s for s in self.INSTANCE.package_data if str(s['S']).endswith(search_term_ucase) and (s['B'] & search_bit_flag) > 0] - - final_results = copy.deepcopy(preliminary_results); #Deep Copy is required since we just need to remove the "S" field from returnable result - for pkg in final_results: - del pkg['S'] - - LOGGER.debug('searchPackages: Search Results Length : %s' % (len(final_results))) - - if(len(final_results) > MAX_RECORDS_TO_SEND): #This is a large result set so add it to cache - LOGGER.debug('searchPackages: Add results to cache') - if(len(list(self.INSTANCE.local_cache.keys())) >= CACHE_SIZE): #CACHE_SIZE is breached so remove oldest cached object - #LOGGER.debug('searchPackages: Cache full. So remove the oldest item. Total of Cached Items: %s' % (len(self.INSTANCE.local_cache.keys())) - self.INSTANCE.local_cache.pop(self.INSTANCE.cache_keys[0],None) #self.INSTANCE.cache_keys[0] has the Oldest Cache Key - self.INSTANCE.cache_keys.remove(self.INSTANCE.cache_keys[0]) #Remoe the cache_key from cache_keys for it is removed from local_cache - - LOGGER.debug('searchPackages: Add new Key to cache_keys for indexing.') - self.INSTANCE.cache_keys.append(cache_key) #append the new key to the list of cache_keys - self.INSTANCE.local_cache[cache_key] = final_results - else: - LOGGER.debug('searchPackages: Getting from cache') - final_results = self.INSTANCE.local_cache[cache_key]; - - LOGGER.debug('searchPackages: Cache Keys: %s' %(json.dumps(self.INSTANCE.cache_keys))) totalLength = len(final_results) last_page = math.ceil(totalLength/float(MAX_RECORDS_TO_SEND)) diff --git a/src/main.py b/src/main.py index 8e19cca..17fd9e4 100644 --- a/src/main.py +++ b/src/main.py @@ -46,8 +46,8 @@ def searchPackages(): search_bit_flag = int(request.args.get('search_bit_flag', '0')) page_number = int(request.args.get('page_number', '0')) - json_data = package_search.searchSQLPackages(search_term,exact_match,search_bit_flag,page_number) - resp = Response(json_data,mimetype="application/json") + sql_data = package_search.searchSQLPackages(search_term,exact_match,search_bit_flag,page_number) + resp = Response(sql_data,mimetype="application/json") resp.headers.set('Cache-Control','no-cache, no-store, must-revalidate') resp.headers.set('Pragma','no-cache') resp.headers.set('Expires','0')