Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sb speed up start #214

Merged
merged 6 commits into from
Mar 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ htmlcov/
pasta_eln/test.py
pastaELN*.py
.coverage
profile.out
14 changes: 10 additions & 4 deletions pasta_eln/Extractors/extractor_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,17 @@ def use(filePath, recipe='', saveFileName=None):
Returns:
dict: containing image, metaVendor, metaUser, recipe
"""
producer = ''
producer = 'comma separated'
delimiter = ','
lines = []
skipRows = 0
with open(filePath, encoding='unicode_escape') as fIn:
for _ in range(10):
lines.append(fIn.readline()[:-1])
line = fIn.readline()[:-1]
if line.startswith('#'):
skipRows+=1
continue
lines.append(line)
# files with some form of header: try 3 criteria
if lines[0].count(';')>lines[0].count(' ') and lines[0].count(';')==lines[1].count(';') and \
lines[0].count(';')==lines[2].count(';'): #Separate by ; not ' '
Expand All @@ -31,8 +37,8 @@ def use(filePath, recipe='', saveFileName=None):
delimiter = ','
print('Producer ', producer)

data = pd.read_csv(filePath, delimiter=delimiter)
plt.plot(data.iloc[:,1])
data = pd.read_csv(filePath, delimiter=delimiter, skiprows=skipRows-1)
plt.plot(data.iloc[:,0], data.iloc[:,1],'o-')
metaUser = {}
metaVendor = {}
links = []
Expand Down
4 changes: 2 additions & 2 deletions pasta_eln/GUI/details.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def change(self, docID:str) -> None:
if '-name' not in self.doc: #keep empty details and wait for user to click
self.comm.changeTable.emit('','')
return
if self.doc['-type'][0]=='-':
if self.doc['-type'][0]=='-' or self.doc['-type'][0] not in self.comm.backend.db.dataHierarchy:
dataHierarchyNode = defaultDataHierarchyNode
else:
dataHierarchyNode = self.comm.backend.db.dataHierarchy[self.doc['-type'][0]]['meta']
Expand Down Expand Up @@ -155,7 +155,7 @@ def change(self, docID:str) -> None:
else:
link = False
dataHierarchyItem = [i for group in dataHierarchyNode for i in dataHierarchyNode[group] if i['name']==key]
if '\n' in self.doc[key]: #if returns in value: format nicely
if isinstance(self.doc[key],str) and '\n' in self.doc[key]: #if returns in value: format nicely
labelW, labelL = widgetAndLayout('H', self.metaDetailsL, top='s', bottom='s')
labelL.addWidget(QLabel(f'{key}: '), alignment=Qt.AlignTop) # type: ignore
text = QTextEdit()
Expand Down
29 changes: 16 additions & 13 deletions pasta_eln/GUI/sidebar.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,19 +119,22 @@ def change(self, projectChoice:str='') -> None:
self.widgetsList[projID] = listW

# show folders as hierarchy
treeW = QTreeWidget()
treeW.hide() #convenience: allow scroll in sidebar
treeW.setHeaderHidden(True)
treeW.setColumnCount(1)
treeW.itemClicked.connect(lambda item: self.execute([Command.SHOW_FOLDER, *item.text(1).split('/')]))
hierarchy = db.getHierarchy(projID)
rootItem = treeW.invisibleRootItem()
count = 0
for node in PreOrderIter(hierarchy, maxlevel=2):
if not node.is_root and node.id[0]=='x':
rootItem.insertChild(count, self.iterateTree(node, projID))
count += 1
projectL.addWidget(treeW)
# Commented out temporarily until getHierarchy is fast
# - parentNode = find_by_attr(dataTree, parentID, name='id')
# - is slow if many entries, find better system
# treeW = QTreeWidget()
# treeW.hide() #convenience: allow scroll in sidebar
# treeW.setHeaderHidden(True)
# treeW.setColumnCount(1)
# treeW.itemClicked.connect(lambda item: self.execute([Command.SHOW_FOLDER, *item.text(1).split('/')]))
# hierarchy = db.getHierarchy(projID)
# rootItem = treeW.invisibleRootItem()
# count = 0
# for node in PreOrderIter(hierarchy, maxlevel=2):
# if not node.is_root and node.id[0]=='x':
# rootItem.insertChild(count, self.iterateTree(node, projID))
# count += 1
# projectL.addWidget(treeW)
# finalize layout
self.projectsListL.addWidget(projectW)
# Other buttons
Expand Down
48 changes: 27 additions & 21 deletions pasta_eln/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -617,28 +617,34 @@ def getHierarchy(self, start:str, allItems:bool=False) -> Node:
view = self.getView('viewHierarchy/viewHierarchyAll', startKey=start)
# for item in view:
# print(item)
levelNum = 1
while True:
level = [i for i in view if len(i['key'].split())==levelNum]
if levelNum==1:
if len(level)==1:
value= level[0]['value']
dataTree = Node(id=level[0]['key'], docType=value[1], name=value[2], gui=value[3])
else:
print(f'**ERROR getHierarchy Did not find corresponding level={levelNum} under docID {start}')
dataTree = Node(id=None, name='')
# Reorganize data into lists
childNum = {i['id']:i['value'][0] for i in view}
# ids = [i['id'] for i in view]
keys = [i['key'] for i in view]
values = [i['value'] for i in view]
for k,v in childNum.items():
keys = [i.replace(k,f'{v} {k}') for i in keys]
values = [x for _, x in sorted(zip(keys, values))]
keys = sorted(keys)
dataTree = None
hierarchy = []
for idx, value in enumerate(values):
docType = value[1]
name = value[2]
gui = value[3]
_id = keys[idx].split()[-1]
level = int(len(keys[idx].split())/2)
if idx==0:
dataTree = Node(id=_id, docType=docType, name=name, gui=gui)
hierarchy.append(dataTree)
else:
childList = [i['value'][0] for i in level] #temporary list to allow sorting for child-number
# https://stackoverflow.com/questions/6618515/sorting-list-based-on-values-from-another-list
for node in [x for (_,x) in sorted(zip(childList, level), key=lambda pair: pair[0])]:
parentID = node['key'].split()[-2]
parentNode = find_by_attr(dataTree, parentID, name='id')
value = node['value']
_ = Node(id=node['id'], parent=parentNode, docType=value[1], name=value[2], gui=value[3])
if not level: #if len(level)==0
break
levelNum += 1
# print(RenderTree(dataTree, style=AsciiStyle()))
parentNode = hierarchy[level-2]
subNode = Node(id=_id, parent=parentNode, docType=docType, name=name, gui=gui)
if len(hierarchy)<level:
hierarchy.append('')
elif len(hierarchy)>level:
hierarchy.pop()
hierarchy[-1] = subNode
return dataTree


Expand Down
28 changes: 20 additions & 8 deletions pasta_eln/inputOutput.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,22 @@
from .miscTools import createDirName, generic_hash

# to discuss
# - genre:docType, simulation, experiment/measurement; status = Done, finished
# - cathegory: project
# - root entry: authors list, single: @id; multiple authors
# - add several authors
# - one creator, multiple authors
# - where to store additional metadata, not in ro-crate-metadata, separate files for each entry?
# "ro-crate-metadata.json", "sdPublisher": "@id": or name
# how to store different versions?
# how should the folder structure be? kadi4mat, sampleDB, does-not-matter:
# ro-crate.json: @type:Comment?
# - https://github.com/TheELNConsortium/TheELNFileFormat/issues/58
# - how to store different versions?
# - history: last version
# - is based based on, ro-crate id OR update action
# - how should the folder structure be? kadi4mat, sampleDB, does-not-matter:
# - sampleDB ro-crate.json: @type:comment!
# - ??
# - in "ro-crate-metadata.json" / "sdPublisher": "@id": or name
# - how to verify the import
# - import - export = the same

# Always use RO-crate names
# GENERAL TERMS IN ro-crate-metadata.json (None implies definitely should not be saved)
Expand Down Expand Up @@ -326,8 +337,7 @@ def processPart(part:dict[str,str]) -> int:
#return to home stack and path
backend.cwd = Path(backend.basePath)
backend.hierStack = []
print(f'\n\nGraph in metadatafile\n{tree(graph)}')

print(f'\n\nGraph in metadata file\n{tree(graph)}')
return f'Success: imported {str(addedDocuments)} documents from file {elnFileName} from ELN {elnName} {elnVersion}'


Expand All @@ -336,7 +346,7 @@ def processPart(part:dict[str,str]) -> int:
##########################################
### EXPORT ###
##########################################
def exportELN(backend:Backend, projectID:str, fileName:str='', dTypes:list[str]=[]) -> str:
def exportELN(backend:Backend, projectID:str, fileName:str='', dTypes:list[str]=[], verbose:bool=True) -> str:
"""
export eln to file

Expand All @@ -345,6 +355,7 @@ def exportELN(backend:Backend, projectID:str, fileName:str='', dTypes:list[str]=
projectID (str): docId of project
fileName (str): fileName which to use for saving; default='' saves in local folder
dTypes (list): list of strings which should be included in the output, alongside folders x0 & x1; empty list=everything is exported
verbose (bool): verbose

Returns:
str: report of exportation
Expand Down Expand Up @@ -571,7 +582,8 @@ def iterateTree(nodeHier:Node, graph:list[dict[str,Any]]) -> Optional[str]:
#finalize file
index['@graph'] = graphMaster+graph+graphMisc
elnFile.writestr(f'{dirNameProject}/ro-crate-metadata.json', json.dumps(index))
print(json.dumps(index, indent=3))
if verbose:
print(json.dumps(index, indent=3))
# end writing zip file
# temporary json output
# with open(fileName[:-3]+'json','w', encoding='utf-8') as fOut:
Expand Down
6 changes: 5 additions & 1 deletion pasta_eln/mixin_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,12 @@ def output(self, docType, printID=False, **kwargs):
outString.append(formatString.format(item['name'].replace('-','')) )
outString = '|'.join(outString)+'\n'
outString += '-'*104+'\n'
for lineItem in self.db.getView(f'viewDocType/{docType}'):
dataList = self.db.getView(f'viewDocType/{docType}')
for lineNum, lineItem in enumerate(dataList):
rowString = []
if lineNum>20:
outString += f'... continued for {len(dataList)} items in total\n'
break
for idx, item in enumerate(i for group in self.db.dataHierarchy[docType]['meta']
for i in self.db.dataHierarchy[docType]['meta'][group]):
width = widthArray[idx] if idx<len(widthArray) else 0
Expand Down
6 changes: 6 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,9 @@ skip = '.git,*.pdf,*.svg,*lookup*.json'
ignore-regex = '\[[a-z]\][a-z]+'
#
ignore-words-list = 'speciall,ons,crate,ser'

[tool.pytest.ini_options]
log_cli = true
log_cli_level = "INFO"
log_cli_format = "%(asctime)s|%(levelname)s:%(message)s (%(filename)s:%(lineno)s)"
log_cli_date_format = "%m-%d %H:%M:%S"
Loading