From 71fa796efb3773bb0cd255dee6453a34c068f52c Mon Sep 17 00:00:00 2001 From: Steffen Brinckmann Date: Thu, 7 Mar 2024 12:39:34 +0100 Subject: [PATCH 1/5] Speed up start by not calling unnecessary and slow function getHierarchy --- .gitignore | 1 + pasta_eln/GUI/sidebar.py | 29 ++++++++++++++++------------- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/.gitignore b/.gitignore index 1cb2265e..a0e77f58 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,4 @@ htmlcov/ pasta_eln/test.py pastaELN*.py .coverage +profile.out diff --git a/pasta_eln/GUI/sidebar.py b/pasta_eln/GUI/sidebar.py index 9b368162..d473ff03 100644 --- a/pasta_eln/GUI/sidebar.py +++ b/pasta_eln/GUI/sidebar.py @@ -119,19 +119,22 @@ def change(self, projectChoice:str='') -> None: self.widgetsList[projID] = listW # show folders as hierarchy - treeW = QTreeWidget() - treeW.hide() #convenience: allow scroll in sidebar - treeW.setHeaderHidden(True) - treeW.setColumnCount(1) - treeW.itemClicked.connect(lambda item: self.execute([Command.SHOW_FOLDER, *item.text(1).split('/')])) - hierarchy = db.getHierarchy(projID) - rootItem = treeW.invisibleRootItem() - count = 0 - for node in PreOrderIter(hierarchy, maxlevel=2): - if not node.is_root and node.id[0]=='x': - rootItem.insertChild(count, self.iterateTree(node, projID)) - count += 1 - projectL.addWidget(treeW) + # Commented out temporarily until getHierarchy is fast + # - parentNode = find_by_attr(dataTree, parentID, name='id') + # - is slow if many entries, find better system + # treeW = QTreeWidget() + # treeW.hide() #convenience: allow scroll in sidebar + # treeW.setHeaderHidden(True) + # treeW.setColumnCount(1) + # treeW.itemClicked.connect(lambda item: self.execute([Command.SHOW_FOLDER, *item.text(1).split('/')])) + # hierarchy = db.getHierarchy(projID) + # rootItem = treeW.invisibleRootItem() + # count = 0 + # for node in PreOrderIter(hierarchy, maxlevel=2): + # if not node.is_root and node.id[0]=='x': + # rootItem.insertChild(count, self.iterateTree(node, projID)) + # count += 1 + # projectL.addWidget(treeW) # finalize layout self.projectsListL.addWidget(projectW) # Other buttons From 27e262d8bf36a22f4b7b4270e4ef0fed4e1d25c1 Mon Sep 17 00:00:00 2001 From: Steffen Brinckmann Date: Tue, 12 Mar 2024 08:47:28 +0100 Subject: [PATCH 2/5] towards speeding up getHierarchy --- pasta_eln/Extractors/extractor_csv.py | 14 ++++++++++---- pasta_eln/GUI/details.py | 4 ++-- pasta_eln/database.py | 7 ++++--- pasta_eln/inputOutput.py | 28 +++++++++++++++++++-------- pasta_eln/mixin_cli.py | 5 ++++- pyproject.toml | 6 ++++++ 6 files changed, 46 insertions(+), 18 deletions(-) diff --git a/pasta_eln/Extractors/extractor_csv.py b/pasta_eln/Extractors/extractor_csv.py index c3b89341..7779ed63 100644 --- a/pasta_eln/Extractors/extractor_csv.py +++ b/pasta_eln/Extractors/extractor_csv.py @@ -15,11 +15,17 @@ def use(filePath, recipe='', saveFileName=None): Returns: dict: containing image, metaVendor, metaUser, recipe """ - producer = '' + producer = 'comma separated' + delimiter = ',' lines = [] + skipRows = 0 with open(filePath, encoding='unicode_escape') as fIn: for _ in range(10): - lines.append(fIn.readline()[:-1]) + line = fIn.readline()[:-1] + if line.startswith('#'): + skipRows+=1 + continue + lines.append(line) # files with some form of header: try 3 criteria if lines[0].count(';')>lines[0].count(' ') and lines[0].count(';')==lines[1].count(';') and \ lines[0].count(';')==lines[2].count(';'): #Separate by ; not ' ' @@ -31,8 +37,8 @@ def use(filePath, recipe='', saveFileName=None): delimiter = ',' print('Producer ', producer) - data = pd.read_csv(filePath, delimiter=delimiter) - plt.plot(data.iloc[:,1]) + data = pd.read_csv(filePath, delimiter=delimiter, skiprows=skipRows-1) + plt.plot(data.iloc[:,0], data.iloc[:,1],'o-') metaUser = {} metaVendor = {} links = [] diff --git a/pasta_eln/GUI/details.py b/pasta_eln/GUI/details.py index eb1ec27a..e3651d04 100644 --- a/pasta_eln/GUI/details.py +++ b/pasta_eln/GUI/details.py @@ -97,7 +97,7 @@ def change(self, docID:str) -> None: if '-name' not in self.doc: #keep empty details and wait for user to click self.comm.changeTable.emit('','') return - if self.doc['-type'][0]=='-': + if self.doc['-type'][0]=='-' or self.doc['-type'][0] not in self.comm.backend.db.dataHierarchy: dataHierarchyNode = defaultDataHierarchyNode else: dataHierarchyNode = self.comm.backend.db.dataHierarchy[self.doc['-type'][0]]['meta'] @@ -155,7 +155,7 @@ def change(self, docID:str) -> None: else: link = False dataHierarchyItem = [i for group in dataHierarchyNode for i in dataHierarchyNode[group] if i['name']==key] - if '\n' in self.doc[key]: #if returns in value: format nicely + if isinstance(self.doc[key],str) and '\n' in self.doc[key]: #if returns in value: format nicely labelW, labelL = widgetAndLayout('H', self.metaDetailsL, top='s', bottom='s') labelL.addWidget(QLabel(f'{key}: '), alignment=Qt.AlignTop) # type: ignore text = QTextEdit() diff --git a/pasta_eln/database.py b/pasta_eln/database.py index 6e48fecb..5fbbd496 100644 --- a/pasta_eln/database.py +++ b/pasta_eln/database.py @@ -615,8 +615,8 @@ def getHierarchy(self, start:str, allItems:bool=False) -> Node: view = self.getView('viewHierarchy/viewHierarchy', startKey=start) if allItems or len(view)==0: view = self.getView('viewHierarchy/viewHierarchyAll', startKey=start) - # for item in view: - # print(item) + for item in view: + print(item) levelNum = 1 while True: level = [i for i in view if len(i['key'].split())==levelNum] @@ -634,7 +634,8 @@ def getHierarchy(self, start:str, allItems:bool=False) -> Node: parentID = node['key'].split()[-2] parentNode = find_by_attr(dataTree, parentID, name='id') value = node['value'] - _ = Node(id=node['id'], parent=parentNode, docType=value[1], name=value[2], gui=value[3]) + gui = value[3] if isinstance(value[3], list) else [True, True] + _ = Node(id=node['id'], parent=parentNode, docType=value[1], name=value[2], gui=gui) if not level: #if len(level)==0 break levelNum += 1 diff --git a/pasta_eln/inputOutput.py b/pasta_eln/inputOutput.py index 8acc24cb..cb29284a 100644 --- a/pasta_eln/inputOutput.py +++ b/pasta_eln/inputOutput.py @@ -11,11 +11,22 @@ from .miscTools import createDirName, generic_hash # to discuss +# - genre:docType, simulation, experiment/measurement; status = Done, finished +# - cathegory: project +# - root entry: authors list, single: @id; multiple authors +# - add several authors +# - one creator, multiple authors # - where to store additional metadata, not in ro-crate-metadata, separate files for each entry? -# "ro-crate-metadata.json", "sdPublisher": "@id": or name -# how to store different versions? -# how should the folder structure be? kadi4mat, sampleDB, does-not-matter: -# ro-crate.json: @type:Comment? +# - https://github.com/TheELNConsortium/TheELNFileFormat/issues/58 +# - how to store different versions? +# - history: last version +# - is based based on, ro-crate id OR update action +# - how should the folder structure be? kadi4mat, sampleDB, does-not-matter: +# - sampleDB ro-crate.json: @type:comment! +# - ?? +# - in "ro-crate-metadata.json" / "sdPublisher": "@id": or name +# - how to verify the import +# - import - export = the same # Always use RO-crate names # GENERAL TERMS IN ro-crate-metadata.json (None implies definitely should not be saved) @@ -326,8 +337,7 @@ def processPart(part:dict[str,str]) -> int: #return to home stack and path backend.cwd = Path(backend.basePath) backend.hierStack = [] - print(f'\n\nGraph in metadatafile\n{tree(graph)}') - + print(f'\n\nGraph in metadata file\n{tree(graph)}') return f'Success: imported {str(addedDocuments)} documents from file {elnFileName} from ELN {elnName} {elnVersion}' @@ -336,7 +346,7 @@ def processPart(part:dict[str,str]) -> int: ########################################## ### EXPORT ### ########################################## -def exportELN(backend:Backend, projectID:str, fileName:str='', dTypes:list[str]=[]) -> str: +def exportELN(backend:Backend, projectID:str, fileName:str='', dTypes:list[str]=[], verbose=True) -> str: """ export eln to file @@ -345,6 +355,7 @@ def exportELN(backend:Backend, projectID:str, fileName:str='', dTypes:list[str]= projectID (str): docId of project fileName (str): fileName which to use for saving; default='' saves in local folder dTypes (list): list of strings which should be included in the output, alongside folders x0 & x1; empty list=everything is exported + verbose (bool): verbose Returns: str: report of exportation @@ -571,7 +582,8 @@ def iterateTree(nodeHier:Node, graph:list[dict[str,Any]]) -> Optional[str]: #finalize file index['@graph'] = graphMaster+graph+graphMisc elnFile.writestr(f'{dirNameProject}/ro-crate-metadata.json', json.dumps(index)) - print(json.dumps(index, indent=3)) + if verbose: + print(json.dumps(index, indent=3)) # end writing zip file # temporary json output # with open(fileName[:-3]+'json','w', encoding='utf-8') as fOut: diff --git a/pasta_eln/mixin_cli.py b/pasta_eln/mixin_cli.py index f5bed863..9643793f 100644 --- a/pasta_eln/mixin_cli.py +++ b/pasta_eln/mixin_cli.py @@ -28,8 +28,11 @@ def output(self, docType, printID=False, **kwargs): outString.append(formatString.format(item['name'].replace('-','')) ) outString = '|'.join(outString)+'\n' outString += '-'*104+'\n' - for lineItem in self.db.getView(f'viewDocType/{docType}'): + for lineNum, lineItem in enumerate(self.db.getView(f'viewDocType/{docType}')): rowString = [] + if lineNum>100: + outString += 'continued...\n' + break for idx, item in enumerate(i for group in self.db.dataHierarchy[docType]['meta'] for i in self.db.dataHierarchy[docType]['meta'][group]): width = widthArray[idx] if idx Date: Tue, 12 Mar 2024 11:39:09 +0100 Subject: [PATCH 3/5] Faster getHierarchy --- pasta_eln/database.py | 53 +++++++++++++++++++++++-------------------- 1 file changed, 29 insertions(+), 24 deletions(-) diff --git a/pasta_eln/database.py b/pasta_eln/database.py index 5fbbd496..fb14b299 100644 --- a/pasta_eln/database.py +++ b/pasta_eln/database.py @@ -615,31 +615,36 @@ def getHierarchy(self, start:str, allItems:bool=False) -> Node: view = self.getView('viewHierarchy/viewHierarchy', startKey=start) if allItems or len(view)==0: view = self.getView('viewHierarchy/viewHierarchyAll', startKey=start) - for item in view: - print(item) - levelNum = 1 - while True: - level = [i for i in view if len(i['key'].split())==levelNum] - if levelNum==1: - if len(level)==1: - value= level[0]['value'] - dataTree = Node(id=level[0]['key'], docType=value[1], name=value[2], gui=value[3]) - else: - print(f'**ERROR getHierarchy Did not find corresponding level={levelNum} under docID {start}') - dataTree = Node(id=None, name='') + # for item in view: + # print(item) + # Reorganize data into lists + childNum = {i['id']:i['value'][0] for i in view} + # ids = [i['id'] for i in view] + keys = [i['key'] for i in view] + values = [i['value'] for i in view] + for k,v in childNum.items(): + keys = [i.replace(k,f'{v} {k}') for i in keys] + values = [x for _, x in sorted(zip(keys, values))] + keys = sorted(keys) + dataTree = None + hierarchy = [] + for idx, value in enumerate(values): + docType = value[1] + name = value[2] + gui = value[3] + _id = keys[idx].split()[-1] + level = int(len(keys[idx].split())/2) + if idx==0: + dataTree = Node(id=_id, docType=docType, name=name, gui=gui) + hierarchy.append(dataTree) else: - childList = [i['value'][0] for i in level] #temporary list to allow sorting for child-number - # https://stackoverflow.com/questions/6618515/sorting-list-based-on-values-from-another-list - for node in [x for (_,x) in sorted(zip(childList, level), key=lambda pair: pair[0])]: - parentID = node['key'].split()[-2] - parentNode = find_by_attr(dataTree, parentID, name='id') - value = node['value'] - gui = value[3] if isinstance(value[3], list) else [True, True] - _ = Node(id=node['id'], parent=parentNode, docType=value[1], name=value[2], gui=gui) - if not level: #if len(level)==0 - break - levelNum += 1 - # print(RenderTree(dataTree, style=AsciiStyle())) + parentNode = hierarchy[level-2] + subNode = Node(id=_id, parent=parentNode, docType=docType, name=name, gui=gui) + if len(hierarchy)level: + hierarchy.pop() + hierarchy[-1] = subNode return dataTree From 70430a0fa776505ed1901c9e704cc1ed9095e084 Mon Sep 17 00:00:00 2001 From: Steffen Brinckmann Date: Tue, 12 Mar 2024 11:53:42 +0100 Subject: [PATCH 4/5] Improve table CLI output --- pasta_eln/mixin_cli.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pasta_eln/mixin_cli.py b/pasta_eln/mixin_cli.py index 9643793f..9521d52e 100644 --- a/pasta_eln/mixin_cli.py +++ b/pasta_eln/mixin_cli.py @@ -28,10 +28,11 @@ def output(self, docType, printID=False, **kwargs): outString.append(formatString.format(item['name'].replace('-','')) ) outString = '|'.join(outString)+'\n' outString += '-'*104+'\n' - for lineNum, lineItem in enumerate(self.db.getView(f'viewDocType/{docType}')): + dataList = self.db.getView(f'viewDocType/{docType}') + for lineNum, lineItem in enumerate(dataList): rowString = [] - if lineNum>100: - outString += 'continued...\n' + if lineNum>20: + outString += f'... continued for {len(dataList)} items in total\n' break for idx, item in enumerate(i for group in self.db.dataHierarchy[docType]['meta'] for i in self.db.dataHierarchy[docType]['meta'][group]): From e55d11b5375a6a40319d6039b9cf94f1cec4d603 Mon Sep 17 00:00:00 2001 From: Steffen Brinckmann Date: Tue, 12 Mar 2024 11:58:53 +0100 Subject: [PATCH 5/5] mypy fix --- pasta_eln/inputOutput.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pasta_eln/inputOutput.py b/pasta_eln/inputOutput.py index cb29284a..f85b5e39 100644 --- a/pasta_eln/inputOutput.py +++ b/pasta_eln/inputOutput.py @@ -346,7 +346,7 @@ def processPart(part:dict[str,str]) -> int: ########################################## ### EXPORT ### ########################################## -def exportELN(backend:Backend, projectID:str, fileName:str='', dTypes:list[str]=[], verbose=True) -> str: +def exportELN(backend:Backend, projectID:str, fileName:str='', dTypes:list[str]=[], verbose:bool=True) -> str: """ export eln to file