PASTA-ELN · SteffenBrinckmann · Mar 14, 2024 · Mar 7, 2024 · Mar 12, 2024 · Mar 12, 2024
diff --git a/.gitignore b/.gitignore
@@ -8,3 +8,4 @@ htmlcov/
 pasta_eln/test.py
 pastaELN*.py
 .coverage
+profile.out
diff --git a/pasta_eln/Extractors/extractor_csv.py b/pasta_eln/Extractors/extractor_csv.py
@@ -15,11 +15,17 @@ def use(filePath, recipe='', saveFileName=None):
   Returns:
     dict: containing image, metaVendor, metaUser, recipe
   """
-  producer = ''
+  producer = 'comma separated'
+  delimiter = ','
   lines = []
+  skipRows = 0
   with open(filePath, encoding='unicode_escape') as fIn:
     for  _ in range(10):
-      lines.append(fIn.readline()[:-1])
+      line = fIn.readline()[:-1]
+      if line.startswith('#'):
+        skipRows+=1
+        continue
+      lines.append(line)
     # files with some form of header: try 3 criteria
     if lines[0].count(';')>lines[0].count(' ') and lines[0].count(';')==lines[1].count(';') and \
                                                    lines[0].count(';')==lines[2].count(';'): #Separate by ; not ' '
@@ -31,8 +37,8 @@ def use(filePath, recipe='', saveFileName=None):
       delimiter = ','
   print('Producer ', producer)
 
-  data = pd.read_csv(filePath, delimiter=delimiter)
-  plt.plot(data.iloc[:,1])
+  data = pd.read_csv(filePath, delimiter=delimiter, skiprows=skipRows-1)
+  plt.plot(data.iloc[:,0], data.iloc[:,1],'o-')
   metaUser = {}
   metaVendor = {}
   links = []

diff --git a/pasta_eln/GUI/details.py b/pasta_eln/GUI/details.py
@@ -97,7 +97,7 @@ def change(self, docID:str) -> None:
     if '-name' not in self.doc:  #keep empty details and wait for user to click
       self.comm.changeTable.emit('','')
       return
-    if self.doc['-type'][0]=='-':
+    if self.doc['-type'][0]=='-' or self.doc['-type'][0] not in self.comm.backend.db.dataHierarchy:
       dataHierarchyNode = defaultDataHierarchyNode
     else:
       dataHierarchyNode = self.comm.backend.db.dataHierarchy[self.doc['-type'][0]]['meta']
@@ -155,7 +155,7 @@ def change(self, docID:str) -> None:
       else:
         link = False
         dataHierarchyItem = [i for group in dataHierarchyNode for i in dataHierarchyNode[group] if i['name']==key]
-        if '\n' in self.doc[key]:     #if returns in value: format nicely
+        if isinstance(self.doc[key],str) and '\n' in self.doc[key]:     #if returns in value: format nicely
           labelW, labelL = widgetAndLayout('H', self.metaDetailsL, top='s', bottom='s')
           labelL.addWidget(QLabel(f'{key}: '), alignment=Qt.AlignTop) # type: ignore
           text = QTextEdit()

diff --git a/pasta_eln/GUI/sidebar.py b/pasta_eln/GUI/sidebar.py
@@ -119,19 +119,22 @@ def change(self, projectChoice:str='') -> None:
         self.widgetsList[projID] = listW
 
         # show folders as hierarchy
-        treeW = QTreeWidget()
-        treeW.hide()  #convenience: allow scroll in sidebar
-        treeW.setHeaderHidden(True)
-        treeW.setColumnCount(1)
-        treeW.itemClicked.connect(lambda item: self.execute([Command.SHOW_FOLDER, *item.text(1).split('/')]))
-        hierarchy = db.getHierarchy(projID)
-        rootItem = treeW.invisibleRootItem()
-        count = 0
-        for node in PreOrderIter(hierarchy, maxlevel=2):
-          if not node.is_root and node.id[0]=='x':
-            rootItem.insertChild(count, self.iterateTree(node, projID))
-            count += 1
-        projectL.addWidget(treeW)
+        # Commented out temporarily until getHierarchy is fast
+        # - parentNode = find_by_attr(dataTree, parentID, name='id')
+        # - is slow if many entries, find better system
+        # treeW = QTreeWidget()
+        # treeW.hide()  #convenience: allow scroll in sidebar
+        # treeW.setHeaderHidden(True)
+        # treeW.setColumnCount(1)
+        # treeW.itemClicked.connect(lambda item: self.execute([Command.SHOW_FOLDER, *item.text(1).split('/')]))
+        # hierarchy = db.getHierarchy(projID)
+        # rootItem = treeW.invisibleRootItem()
+        # count = 0
+        # for node in PreOrderIter(hierarchy, maxlevel=2):
+        #   if not node.is_root and node.id[0]=='x':
+        #     rootItem.insertChild(count, self.iterateTree(node, projID))
+        #     count += 1
+        # projectL.addWidget(treeW)
         # finalize layout
         self.projectsListL.addWidget(projectW)
     # Other buttons

diff --git a/pasta_eln/database.py b/pasta_eln/database.py
@@ -617,28 +617,34 @@ def getHierarchy(self, start:str, allItems:bool=False) -> Node:
       view = self.getView('viewHierarchy/viewHierarchyAll', startKey=start)
     # for item in view:
     #   print(item)
-    levelNum = 1
-    while True:
-      level = [i for i in view if len(i['key'].split())==levelNum]
-      if levelNum==1:
-        if len(level)==1:
-          value= level[0]['value']
-          dataTree = Node(id=level[0]['key'], docType=value[1], name=value[2], gui=value[3])
-        else:
-          print(f'**ERROR getHierarchy Did not find corresponding level={levelNum} under docID {start}')
-          dataTree = Node(id=None, name='')
+    # Reorganize data into lists
+    childNum = {i['id']:i['value'][0] for i in view}
+    # ids = [i['id'] for i in view]
+    keys = [i['key'] for i in view]
+    values = [i['value'] for i in view]
+    for k,v in childNum.items():
+      keys = [i.replace(k,f'{v} {k}') for i in keys]
+    values = [x for _, x in sorted(zip(keys, values))]
+    keys   = sorted(keys)
+    dataTree = None
+    hierarchy = []
+    for idx, value in enumerate(values):
+      docType = value[1]
+      name    = value[2]
+      gui     = value[3]
+      _id     = keys[idx].split()[-1]
+      level   = int(len(keys[idx].split())/2)
+      if idx==0:
+        dataTree = Node(id=_id, docType=docType, name=name, gui=gui)
+        hierarchy.append(dataTree)
       else:
-        childList = [i['value'][0] for i in level]   #temporary list to allow sorting for child-number
-        # https://stackoverflow.com/questions/6618515/sorting-list-based-on-values-from-another-list
-        for node in [x for (_,x) in sorted(zip(childList, level), key=lambda pair: pair[0])]:
-          parentID = node['key'].split()[-2]
-          parentNode = find_by_attr(dataTree, parentID, name='id')
-          value = node['value']
-          _ = Node(id=node['id'], parent=parentNode, docType=value[1], name=value[2], gui=value[3])
-      if not level: #if len(level)==0
-        break
-      levelNum += 1
-    # print(RenderTree(dataTree, style=AsciiStyle()))
+        parentNode = hierarchy[level-2]
+        subNode = Node(id=_id, parent=parentNode, docType=docType, name=name, gui=gui)
+        if len(hierarchy)<level:
+          hierarchy.append('')
+        elif len(hierarchy)>level:
+          hierarchy.pop()
+        hierarchy[-1] = subNode
     return dataTree
 
 

diff --git a/pasta_eln/inputOutput.py b/pasta_eln/inputOutput.py
@@ -11,11 +11,22 @@
 from .miscTools import createDirName, generic_hash
 
 # to discuss
+# - genre:docType, simulation, experiment/measurement;  status = Done, finished
+# - cathegory: project
+# - root entry: authors list, single: @id; multiple authors
+#    - add several authors
+#    - one creator, multiple authors
 # - where to store additional metadata, not in ro-crate-metadata, separate files for each entry?
-# "ro-crate-metadata.json", "sdPublisher": "@id": or name
-# how to store different versions?
-# how should the folder structure be? kadi4mat, sampleDB, does-not-matter:
-# ro-crate.json: @type:Comment?
+#    - https://github.com/TheELNConsortium/TheELNFileFormat/issues/58
+# - how to store different versions?
+#    - history: last version
+#    - is based based on, ro-crate id OR update action
+# - how should the folder structure be? kadi4mat, sampleDB, does-not-matter:
+# - sampleDB ro-crate.json: @type:comment!
+#   - ??
+# - in "ro-crate-metadata.json" / "sdPublisher": "@id": or name
+# - how to verify the import
+#   - import - export = the same
 
 # Always use RO-crate names
 # GENERAL TERMS IN ro-crate-metadata.json (None implies definitely should not be saved)
@@ -326,8 +337,7 @@ def processPart(part:dict[str,str]) -> int:
   #return to home stack and path
   backend.cwd = Path(backend.basePath)
   backend.hierStack = []
-  print(f'\n\nGraph in metadatafile\n{tree(graph)}')
-
+  print(f'\n\nGraph in metadata file\n{tree(graph)}')
   return f'Success: imported {str(addedDocuments)} documents from file {elnFileName} from ELN {elnName} {elnVersion}'
 
 
@@ -336,7 +346,7 @@ def processPart(part:dict[str,str]) -> int:
 ##########################################
 ###               EXPORT               ###
 ##########################################
-def exportELN(backend:Backend, projectID:str, fileName:str='', dTypes:list[str]=[]) -> str:
+def exportELN(backend:Backend, projectID:str, fileName:str='', dTypes:list[str]=[], verbose:bool=True) -> str:
   """
   export eln to file
 
@@ -345,6 +355,7 @@ def exportELN(backend:Backend, projectID:str, fileName:str='', dTypes:list[str]=
     projectID (str): docId of project
     fileName (str): fileName which to use for saving; default='' saves in local folder
     dTypes (list): list of strings which should be included in the output, alongside folders x0 & x1; empty list=everything is exported
+    verbose (bool): verbose
 
   Returns:
     str: report of exportation
@@ -571,7 +582,8 @@ def iterateTree(nodeHier:Node, graph:list[dict[str,Any]]) -> Optional[str]:
     #finalize file
     index['@graph'] = graphMaster+graph+graphMisc
     elnFile.writestr(f'{dirNameProject}/ro-crate-metadata.json', json.dumps(index))
-    print(json.dumps(index, indent=3))
+    if verbose:
+      print(json.dumps(index, indent=3))
   # end writing zip file
   # temporary json output
   # with open(fileName[:-3]+'json','w', encoding='utf-8') as fOut:

diff --git a/pasta_eln/mixin_cli.py b/pasta_eln/mixin_cli.py
@@ -28,8 +28,12 @@ def output(self, docType, printID=False, **kwargs):
         outString.append(formatString.format(item['name'].replace('-','')) )
     outString = '|'.join(outString)+'\n'
     outString += '-'*104+'\n'
-    for lineItem in self.db.getView(f'viewDocType/{docType}'):
+    dataList  = self.db.getView(f'viewDocType/{docType}')
+    for lineNum, lineItem in enumerate(dataList):
       rowString = []
+      if lineNum>20:
+        outString += f'... continued for {len(dataList)} items in total\n'
+        break
       for idx, item in enumerate(i for group in self.db.dataHierarchy[docType]['meta']
                                  for i in self.db.dataHierarchy[docType]['meta'][group]):
         width = widthArray[idx] if idx<len(widthArray) else 0

diff --git a/pyproject.toml b/pyproject.toml
@@ -7,3 +7,9 @@ skip = '.git,*.pdf,*.svg,*lookup*.json'
 ignore-regex = '\[[a-z]\][a-z]+'
 #
 ignore-words-list = 'speciall,ons,crate,ser'
+
+[tool.pytest.ini_options]
+log_cli = true
+log_cli_level = "INFO"
+log_cli_format = "%(asctime)s|%(levelname)s:%(message)s (%(filename)s:%(lineno)s)"
+log_cli_date_format = "%m-%d %H:%M:%S"