diff --git a/CMakeLists.txt b/CMakeLists.txt index 5cd1bb4c7e..ebb992c1f4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -170,7 +170,7 @@ adios_option(Profiling "Enable support for profiling" AUTO) adios_option(Endian_Reverse "Enable support for Little/Big Endian Interoperability" AUTO) adios_option(Sodium "Enable support for Sodium for encryption" AUTO) adios_option(Catalyst "Enable support for in situ visualization plugin using ParaView Catalyst" AUTO) -adios_option(AWSSDK "Enable support for S3 compatible storage using AWS SDK's S3 module" AUTO) +adios_option(AWSSDK "Enable support for S3 compatible storage using AWS SDK's S3 module" OFF) include(${PROJECT_SOURCE_DIR}/cmake/DetectOptions.cmake) if(ADIOS2_HAVE_CUDA OR ADIOS2_HAVE_Kokkos_CUDA) diff --git a/cmake/DetectOptions.cmake b/cmake/DetectOptions.cmake index 848c8e12dc..3f511e02a8 100644 --- a/cmake/DetectOptions.cmake +++ b/cmake/DetectOptions.cmake @@ -546,9 +546,9 @@ endif() # AWS S3 if(ADIOS2_USE_AWSSDK STREQUAL AUTO) - find_package(AWSSDK QUIET COMPONENTS s3) + find_package(AWSSDK 1.10.15 COMPONENTS s3) elseif(ADIOS2_USE_AWSSDK) - find_package(AWSSDK REQUIRED COMPONENTS s3) + find_package(AWSSDK 1.10.15 REQUIRED COMPONENTS s3) endif() if(AWSSDK_FOUND) set(ADIOS2_HAVE_AWSSDK TRUE) diff --git a/source/utils/bp5dbg/CMakeLists.txt b/source/utils/bp5dbg/CMakeLists.txt index 7cd62825dd..df085bb65f 100644 --- a/source/utils/bp5dbg/CMakeLists.txt +++ b/source/utils/bp5dbg/CMakeLists.txt @@ -5,9 +5,9 @@ install(PROGRAMS bp5dbg.py install( FILES adios2/bp5dbg/__init__.py - adios2/bp5dbg/data.py adios2/bp5dbg/utils.py adios2/bp5dbg/metadata.py + adios2/bp5dbg/metametadata.py adios2/bp5dbg/idxtable.py DESTINATION ${CMAKE_INSTALL_PYTHONDIR}/adios2/bp5dbg COMPONENT adios2_scripts-runtime ) diff --git a/source/utils/bp5dbg/adios2/bp5dbg/__init__.py b/source/utils/bp5dbg/adios2/bp5dbg/__init__.py index eb13414f08..e14ae7d29f 100644 --- a/source/utils/bp5dbg/adios2/bp5dbg/__init__.py +++ b/source/utils/bp5dbg/adios2/bp5dbg/__init__.py @@ -1,3 +1,3 @@ -from .data import * from .idxtable import * from .metadata import * +from .metametadata import * diff --git a/source/utils/bp5dbg/adios2/bp5dbg/data.py b/source/utils/bp5dbg/adios2/bp5dbg/data.py deleted file mode 100644 index 6b77fbd865..0000000000 --- a/source/utils/bp5dbg/adios2/bp5dbg/data.py +++ /dev/null @@ -1,635 +0,0 @@ -import numpy as np -from os import fstat -from .utils import * - - -def ReadEncodedString(f, ID, limit, lensize=2): - if lensize == 2: - # 2 bytes length + string without \0 - namelen = np.fromfile(f, dtype=np.uint16, count=1)[0] - elif lensize == 4: - # 2 bytes length + string without \0 - namelen = np.fromfile(f, dtype=np.uint32, count=1)[0] - else: - print("CODING ERROR: bp5dbp_data.ReadEncodedString: " - "lensize must be 2 or 4") - return False, "" - if namelen > limit: - print("ERROR: " + ID + " string length ({0}) is longer than the " - "limit to stay inside the block ({1})".format( - namelen, limit)) - return False, "" - name = f.read(namelen).decode('ascii') - return True, name - - -def ReadEncodedStringArray(f, ID, limit, nStrings): - s = [] - for i in range(nStrings): - # 2 bytes length + string - # !!! String here INCLUDES Terminating \0 !!! - namelen = np.fromfile(f, dtype=np.uint32, count=1)[0] - if namelen > limit - 4: - print("ERROR: " + ID + " string length ({0}) is longer than the " - "limit to stay inside the block ({1})".format( - namelen, limit - 4)) - return False, s - name = f.read(namelen).decode('ascii') - limit = limit - namelen - 4 - s.append(name[0:-1]) # omit the terminating \0 - return True, s - - -def readDataToNumpyArray(f, typeName, nElements): - if typeName == 'byte': - return np.fromfile(f, dtype=np.int8, count=nElements) - elif typeName == 'char': - return np.fromfile(f, dtype=np.uint8, count=nElements) - elif typeName == 'short': - return np.fromfile(f, dtype=np.int16, count=nElements) - elif typeName == 'integer': - return np.fromfile(f, dtype=np.int32, count=nElements) - elif typeName == 'long': - return np.fromfile(f, dtype=np.int64, count=nElements) - - elif typeName == 'unsigned_byte': - return np.fromfile(f, dtype=np.uint8, count=nElements) - elif typeName == 'unsigned_short': - return np.fromfile(f, dtype=np.uint16, count=nElements) - elif typeName == 'unsigned_integer': - return np.fromfile(f, dtype=np.uint32, count=nElements) - elif typeName == 'unsigned_long': - return np.fromfile(f, dtype=np.uint64, count=nElements) - - elif typeName == 'real': - return np.fromfile(f, dtype=np.float32, count=nElements) - elif typeName == 'double': - return np.fromfile(f, dtype=np.float64, count=nElements) - elif typeName == 'long_double': - return np.fromfile(f, dtype=np.float128, count=nElements) - - elif typeName == 'complex': - return np.fromfile(f, dtype=np.complex64, count=nElements) - elif typeName == 'double_complex': - return np.fromfile(f, dtype=np.complex128, count=nElements) - - else: - return np.zeros(1, dtype=np.uint32) - - -def ReadCharacteristicsFromData(f, limit, typeID, ndim): - cStartPosition = f.tell() - dataTypeName = GetTypeName(typeID) - # 1 byte NCharacteristics - nCharacteristics = np.fromfile(f, dtype=np.uint8, count=1)[0] - print(" # of Characteristics : {0}".format(nCharacteristics)) - # 4 bytes length - charLen = np.fromfile(f, dtype=np.uint32, count=1)[0] - print(" Characteristics Length : {0}".format(charLen)) - - for i in range(nCharacteristics): - print(" Characteristics[{0}]".format(i)) - # 1 byte TYPE - cID = np.fromfile(f, dtype=np.uint8, count=1)[0] - cName = GetCharacteristicName(cID) - print(" Type : {0} ({1}) ".format(cName, cID)) - if cName == 'value' or cName == 'min' or cName == 'max': - if dataTypeName == 'string': - namelimit = limit - (f.tell() - cStartPosition) - status, s = ReadEncodedString(f, "String Value", namelimit) - if not status: - return False - print(" Value : '" + s + "'") - else: - data = readDataToNumpyArray(f, dataTypeName, 1) - print(" Value : {0}".format(data[0])) - elif cName == 'offset' or cName == 'payload_offset': - data = readDataToNumpyArray(f, 'unsigned_long', 1) - print(" Value : {0}".format(data[0])) - elif cName == 'time_index' or cName == 'file_index': - data = readDataToNumpyArray(f, 'unsigned_integer', 1) - print(" Value : {0}".format(data[0])) - elif cName == 'minmax': - nBlocks = np.fromfile(f, - dtype=np.uint16, count=1)[0] - print(" nBlocks : {0}".format(nBlocks)) - bminmax = readDataToNumpyArray(f, dataTypeName, 2) - print(" Min/max : {0} / {1}".format( - bminmax[0], bminmax[1])) - if nBlocks > 1: - method = np.fromfile(f, dtype=np.uint8, - count=1)[0] - print(" Division method: {0}".format(method)) - blockSize = np.fromfile(f, dtype=np.uint64, - count=1)[0] - print(" Block size : {0}".format(blockSize)) - div = np.fromfile(f, dtype=np.uint16, - count=ndim) - print(" Division vector: (", end="") - for d in range(ndim): - print("{0}".format(div[d]), end="") - if d < ndim - 1: - print(", ", end="") - else: - print(")") - minmax = readDataToNumpyArray( - f, dataTypeName, 2 * nBlocks) - for i in range(nBlocks): - print(" Min/max : {0} / {1}".format( - minmax[2 * i], minmax[2 * i + 1])) - else: - print(" ERROR: could not understand this " - "characteristics type '{0}' id {1}".format(cName, cID)) - return True - -# Read String Variable data - - -def ReadStringVarData(f, expectedSize, - varsStartPosition): - # 2 bytes String Length - len = np.fromfile(f, dtype=np.uint16, count=1)[0] - if len != expectedSize - 2: - print("ERROR: Variable data block size does not equal the size " - "calculated from var block length") - print("Expected size = {0} calculated size " - "from encoded length info {1}". - format(expectedSize, len + 2)) - return False - - str = f.read(len).decode('ascii') - print(" Variable Data : '" + str + "'") - return True - -# Read Variable data - - -def ReadVarData(f, nElements, typeID, ldims, varLen, - varsStartPosition, varsTotalLength): - if typeID == 9: # string type - return ReadStringVarData(f, varLen, varsStartPosition) - typeSize = GetTypeSize(typeID) - if typeSize == 0: - print("ERROR: Cannot process variable data block with " - "unknown type size") - return False - - currentPosition = f.tell() - print(" Payload offset : {0}".format(currentPosition)) - - if currentPosition + varLen > varsStartPosition + varsTotalLength: - print("ERROR: Variable data block of size would reach beyond all " - "variable blocks") - print("VarsStartPosition = {0} varsTotalLength = {1}".format( - varsStartPosition, varsTotalLength)) - print("current Position = {0} var block length = {1}".format( - currentPosition, varLen)) - return False - - nBytes = int(varLen.item()) - - if nElements == 1: - # single value. read and print - value = readDataToNumpyArray(f, GetTypeName(typeID), - nElements) - print(" Payload (value) : {0} ({1} bytes)".format( - value[0], nBytes)) - else: - # seek instead of reading for now - # f.read(nBytes) - f.seek(nBytes, 1) - # data = readDataToNumpyArray(f, GetTypeName(typeID), - # nElements) - print(" Payload (array) : {0} bytes".format(nBytes)) - - return True - -# Read a variable's metadata - - -def ReadVMD(f, varidx, varsStartPosition, varsTotalLength): - startPosition = f.tell() - print(" Var {0:5d}".format(varidx)) - print(" Starting offset : {0}".format(startPosition)) - # 4 bytes TAG - tag = f.read(4) - if tag != b"[VMD": - print(" Tag: " + str(tag)) - print("ERROR: VAR group does not start with [VMD") - return False - print(" Tag : " + tag.decode('ascii')) - - # 8 bytes VMD Length - vmdlen = np.fromfile(f, dtype=np.uint64, count=1)[0] - print(" Var block size : {0} bytes (+4 for Tag)".format(vmdlen)) - expectedVarBlockLength = vmdlen + 4 # [VMD is not included in vmdlen - - if startPosition + expectedVarBlockLength > \ - varsStartPosition + varsTotalLength: - print("ERROR: There is not enough bytes inside this PG to read " - "this Var block") - print("VarsStartPosition = {0} varsTotalLength = {1}".format( - varsStartPosition, varsTotalLength)) - print("current var's start position = {0} var block length = {1}". - format(startPosition, expectedVarBlockLength)) - return False - - # 4 bytes VAR MEMBER ID - memberID = np.fromfile(f, dtype=np.uint32, count=1)[0] - print(" Member ID : {0}".format(memberID)) - - # VAR NAME, 2 bytes length + string without \0 - sizeLimit = expectedVarBlockLength - (f.tell() - startPosition) - status, varname = ReadEncodedString(f, "Var Name", sizeLimit) - if not status: - return False - print(" Var Name : " + varname) - - # VAR PATH, 2 bytes length + string without \0 - # sizeLimit = expectedVarBlockLength - (f.tell() - startPosition) - # status, varpath = ReadEncodedString(f, "Var Path", sizeLimit) - # if not status: - # return False - # print(" Var Path : " + varpath) - - # 1 byte ORDER (K, C, F) - order = f.read(1) - if (order != b'K' and order != b'C' and order != b'F' and order != b'\x00'): - print( - "ERROR: Next byte for Order must be 'K', 'C', or 'F' " - "but it isn't = {0}".format(order)) - return False - if (order == b'\x00'): - order = b'0' - print(" Order : " + order.decode('ascii')) - - # 1 byte UNUSED - unused = f.read(1) - print(" Unused byte : {0}".format(ord(unused))) - - # 1 byte TYPE - typeID = np.fromfile(f, dtype=np.uint8, count=1)[0] - print(" Type : {0} ({1}) ".format( - GetTypeName(typeID), typeID)) - - # ISDIMENSIONS 1 byte, 'y' or 'n' - isDimensionVar = f.read(1) - if (isDimensionVar != b'y' and isDimensionVar != b'n'): - print( - "ERROR: Next byte for isDimensionVar must be 'y' or 'n' " - "but it isn't = {0}".format(isDimensionVar)) - return False - print(" isDimensionVar : " + isDimensionVar.decode('ascii')) - - # 1 byte NDIMENSIONS - ndims = np.fromfile(f, dtype=np.uint8, count=1)[0] - print(" # of Dimensions : {0}".format( - ndims)) - - # DIMLENGTH - dimsLen = np.fromfile(f, dtype=np.uint16, count=1)[0] - print(" Dims Length : {0}".format( - dimsLen)) - - nElements = np.uint64(1) - ldims = np.zeros(ndims, dtype=np.uint64) - isLocalValueArray = False - for i in range(ndims): - print(" Dim[{0}]".format(i)) - # Read Local Dimensions (1 byte flag + 8 byte value) - # Is Dimension a variable ID 1 byte, 'y' or 'n' or '\0' - isDimensionVarID = f.read(1) - if isDimensionVarID != b'y' and isDimensionVarID != b'n' and \ - isDimensionVarID != b'\0': - print( - "ERROR: Next byte for isDimensionVarID must be 'y' or 'n' " - "but it isn't = {0}".format(isDimensionVarID)) - return False - if isDimensionVarID == b'\0': - isDimensionVarID = b'n' - ldims[i] = np.fromfile(f, dtype=np.uint64, count=1)[0] - print(" local dim : {0}".format(ldims[i])) - nElements = nElements * ldims[i] - # Read Global Dimensions (1 byte flag + 8 byte value) - # Is Dimension a variable ID 1 byte, 'y' or 'n' or '\0' - isDimensionVarID = f.read(1) - if isDimensionVarID != b'y' and isDimensionVarID != b'n' \ - and isDimensionVarID != b'\0': - print( - "ERROR: Next byte for isDimensionVarID must be 'y' or 'n' " - "but it isn't = {0}".format(isDimensionVarID)) - return False - if isDimensionVarID == b'\0': - isDimensionVarID = b'n' - gdim = np.fromfile(f, dtype=np.uint64, count=1)[0] - if i == 0 and ldims[i] == 0 and gdim == LocalValueDim: - print(" global dim : LocalValueDim ({0})".format(gdim)) - isLocalValueArray = True - else: - print(" global dim : {0}".format(gdim)) - - # Read Offset Dimensions (1 byte flag + 8 byte value) - # Is Dimension a variable ID 1 byte, 'y' or 'n' or '\0' - isDimensionVarID = f.read(1) - if isDimensionVarID != b'y' and isDimensionVarID != b'n' and \ - isDimensionVarID != b'\0': - print( - "ERROR: Next byte for isDimensionVarID must be 'y' or 'n' " - "but it isn't = {0}".format(isDimensionVarID)) - return False - if isDimensionVarID == b'\0': - isDimensionVarID = b'n' - offset = np.fromfile(f, dtype=np.uint64, count=1)[0] - print(" offset dim : {0}".format(offset)) - - sizeLimit = expectedVarBlockLength - (f.tell() - startPosition) - status = ReadCharacteristicsFromData(f, sizeLimit, typeID, ndims) - if not status: - return False - - # Padded end TAG - # 1 byte length of tag - endTagLen = np.fromfile(f, dtype=np.uint8, count=1)[0] - tag = f.read(endTagLen) - if not tag.endswith(b"VMD]"): - print(" Tag: " + str(tag)) - print("ERROR: VAR group metadata does not end with VMD]") - return False - print(" Tag (pad {0:2d}) : {1}".format( - endTagLen - 4, tag.decode('ascii'))) - - # special case: LocalValueDim: local values turned into 1D global array - # but it seems there is no data block at all for these variables - if isLocalValueArray: - ldims[0] = 1 - nElements = np.uint64(1) - else: - expectedVarDataSize = expectedVarBlockLength - \ - (f.tell() - startPosition) - status = ReadVarData(f, nElements, typeID, ldims, expectedVarDataSize, - varsStartPosition, varsTotalLength) - if not status: - return False - - return True - -# Read an attribute's metadata and value - - -def ReadAMD(f, attridx, attrsStartPosition, attrsTotalLength): - startPosition = f.tell() - print(" attr {0:5d}".format(attridx)) - print(" Starting offset : {0}".format(startPosition)) - # 4 bytes TAG - tag = f.read(4) - if tag != b"[AMD": - print(" Tag: " + str(tag)) - print("ERROR: ATTR group does not start with [AMD") - return False - print(" Tag : " + tag.decode('ascii')) - - # 8 bytes AMD Length - amdlen = np.fromfile(f, dtype=np.uint32, count=1)[0] - print(" Attr block size : {0} bytes (+4 for Tag)".format(amdlen)) - expectedAttrBlockLength = amdlen + 4 # [AMD is not included in amdlen - if startPosition + expectedAttrBlockLength > \ - attrsStartPosition + attrsTotalLength: - print("ERROR: There is not enough bytes inside this PG " - "to read this Attr block") - print("AttrsStartPosition = {0} attrsTotalLength = {1}".format( - attrsStartPosition, attrsTotalLength)) - print("current attr's start position = {0} " - "attr block length = {1}".format( - startPosition, expectedAttrBlockLength)) - return False - - # 4 bytes ATTR MEMBER ID - memberID = np.fromfile(f, dtype=np.uint32, count=1)[0] - print(" Member ID : {0}".format(memberID)) - - # ATTR NAME, 2 bytes length + string without \0 - sizeLimit = expectedAttrBlockLength - (f.tell() - startPosition) - status, attrname = ReadEncodedString(f, "Attr Name", sizeLimit) - if not status: - return False - print(" Attr Name : " + attrname) - - # ATTR PATH, 2 bytes length + string without \0 - sizeLimit = expectedAttrBlockLength - (f.tell() - startPosition) - status, attrpath = ReadEncodedString(f, "Attr Path", sizeLimit) - if not status: - return False - print(" Attr Path : " + attrpath) - - # isAttrAVar 1 byte, 'y' or 'n' - isAttrAVar = f.read(1) - if isAttrAVar != b'y' and isAttrAVar != b'n': - print( - "ERROR: Next byte for isAttrAVar must be 'y' or 'n' " - "but it isn't = {0}".format(isAttrAVar)) - return False - print(" Refers to Var? : " + isAttrAVar.decode('ascii')) - - # 1 byte TYPE - typeID = np.fromfile(f, dtype=np.uint8, count=1)[0] - typeName = GetTypeName(typeID) - print(" Type : {0} ({1}) ".format(typeName, typeID)) - - # Read Attribute data - if typeName == 'string': - sizeLimit = expectedAttrBlockLength - (f.tell() - startPosition) - status, s = ReadEncodedString( - f, "Attribute String Value", sizeLimit, 4) - if not status: - return False - print(" Value : '" + s + "'") - - elif typeName == 'string_array': - nElems = np.fromfile(f, dtype=np.uint32, count=1)[0] - sizeLimit = expectedAttrBlockLength - (f.tell() - startPosition) - status, strList = ReadEncodedStringArray( - f, "Attribute String Array", sizeLimit, nElems) - if not status: - return False - print(" Value : [", end="") - for j in range(len(strList)): - print("'" + strList[j] + "'", end="") - if j < len(strList) - 1: - print(", ", end="") - print("]") - else: - nBytes = np.fromfile(f, dtype=np.uint32, count=1)[0] - typeSize = GetTypeSize(typeID) - nElems = int(nBytes / typeSize) - data = readDataToNumpyArray(f, typeName, nElems) - print(" Value : [", end="") - for j in range(nElems): - print("{0}".format(data[j]), end="") - if j < nElems - 1: - print(", ", end="") - print("]") - - # End TAG AMD] - tag = f.read(4) - if tag != b"AMD]": - print(" Tag: " + str(tag)) - print("ERROR: PG group metadata does not end with AMD]") - return False - print(" Tag : {0}".format(tag.decode('ascii'))) - - return True - -# Read one PG process group (variables and attributes from one process in -# one step) - - -def ReadPG(f, fileSize, pgidx): - pgStartPosition = f.tell() - if pgidx > 0: - print("========================================================") - print("Process Group {0}: ".format(pgidx)) - print(" Starting offset : {0}".format(pgStartPosition)) - tag = f.read(4) - if tag != b"[PGI": - print(" Tag: " + str(tag)) - print("ERROR: PG group does not start with [PGI") - return False - - print(" Tag : " + tag.decode('ascii')) - - # 8 bytes PG Length - pglen = np.fromfile(f, dtype=np.uint64, count=1)[0] - print(" PG length : {0} bytes (+4 for Tag)".format(pglen)) - # pglen does not include the opening tag 4 bytes: - expectedPGLength = pglen + 4 - if pgStartPosition + expectedPGLength > fileSize: - print("ERROR: There is not enough bytes in file to read this PG") - return False - - # ColumnMajor (host language Fortran) 1 byte, 'y' or 'n' - isColumnMajor = f.read(1) - if isColumnMajor != b'y' and isColumnMajor != b'n': - print( - "ERROR: Next byte for isColumnMajor must be 'y' or 'n' " - "but it isn't = {0}".format(isColumnMajor)) - return False - print(" isColumnMajor : " + isColumnMajor.decode('ascii')) - - # PG Name, 2 bytes length + string without \0 - sizeLimit = expectedPGLength - (f.tell() - pgStartPosition) - status, pgname = ReadEncodedString(f, "PG Name", sizeLimit) - if not status: - return False - print(" PG Name : " + pgname) - - # 4 bytes unused (for Coordination variable) - tag = f.read(4) - print(" Unused 4 bytes : " + str(tag)) - - # Timestep name - sizeLimit = expectedPGLength - (f.tell() - pgStartPosition) - status, tsname = ReadEncodedString(f, "Timestep Name", sizeLimit) - if not status: - return False - print(" Step Name : " + tsname) - - # STEP 4 bytes - step = np.fromfile(f, dtype=np.uint32, count=1)[0] - print(" Step Value : {0}".format(step)) - - # Method Count 1 byte1 - nMethods = np.fromfile(f, dtype=np.uint8, count=1)[0] - print(" Methods count : {0}".format(nMethods)) - - # Method Length 2 byte1 - lenMethods = np.fromfile(f, dtype=np.uint16, count=1)[0] - print(" Methods length : {0}".format(lenMethods)) - - print(" Methods info") - for i in range(nMethods): - # Method ID - methodID = np.fromfile(f, dtype=np.uint8, count=1)[0] - print(" Method ID : {0}".format(methodID)) - sizeLimit = expectedPGLength - (f.tell() - pgStartPosition) - status, methodParams = ReadEncodedString( - f, "Method Parameters", sizeLimit) - if not status: - return False - print(' M. params : "' + methodParams + '"') - - # VARIABLES - - # VARS COUNT 4 bytes - nVars = np.fromfile(f, dtype=np.uint32, count=1)[0] - print(" # of Variables : {0}".format(nVars)) - - # VARS SIZE 8 bytes - varlen = np.fromfile(f, dtype=np.uint64, count=1)[0] - print(" Vars length : {0} bytes".format(varlen)) - sizeLimit = expectedPGLength - (f.tell() - pgStartPosition) - expectedVarsLength = varlen # need to read this more - if expectedVarsLength > sizeLimit: - print("ERROR: There is not enough bytes in PG to read the variables") - return False - - varsStartPosition = f.tell() - for i in range(nVars): - # VMD block - status = ReadVMD(f, i, varsStartPosition, expectedVarsLength) - if not status: - return False - - # ATTRIBUTES - - # ATTRS COUNT 4 bytes - nAttrs = np.fromfile(f, dtype=np.uint32, count=1)[0] - print(" # of Attributes : {0}".format(nAttrs)) - - attrsStartPosition = f.tell() - # ATTS SIZE 8 bytes - # attlen includes the 8 bytes of itself, so remember position before this - attlen = np.fromfile(f, dtype=np.uint64, count=1)[0] - print(" Attrs length : {0} bytes".format(attlen)) - sizeLimit = expectedPGLength - (attrsStartPosition - pgStartPosition) - 4 - expectedAttrsLength = attlen # need to read this more before reaching PGI] - - if expectedAttrsLength > sizeLimit: - print("ERROR: There is not enough bytes in PG to read the attributes") - return False - - attrsStartPosition = f.tell() - for i in range(nAttrs): - # AMD block - status = ReadAMD(f, i, attrsStartPosition, expectedAttrsLength) - if not status: - return False - - # End TAG PGI] - tag = f.read(4) - if tag != b"PGI]": - print(" Tag: " + str(tag)) - print("ERROR: PG group metadata does not end with PGI]") - return False - print(" Tag : {0}".format(tag.decode('ascii'))) - - return True - - -def DumpData(fileName): - print("========================================================") - print(" Data File: " + fileName) - print("========================================================") - with open(fileName, "rb") as f: - fileSize = fstat(f.fileno()).st_size - status = ReadHeader(f, fileSize, "Data") - if not status: - return status - pgidx = 0 - while (f.tell() < fileSize - 12 and status): - status = ReadPG(f, fileSize, pgidx) - pgidx = pgidx + 1 - return status - - -if __name__ == "__main__": - print("ERROR: Utility main program is bp5dbg.py") diff --git a/source/utils/bp5dbg/adios2/bp5dbg/idxtable.py b/source/utils/bp5dbg/adios2/bp5dbg/idxtable.py index 7d2854e7e8..d554096428 100644 --- a/source/utils/bp5dbg/adios2/bp5dbg/idxtable.py +++ b/source/utils/bp5dbg/adios2/bp5dbg/idxtable.py @@ -1,10 +1,24 @@ -from ast import Sub -import numpy as np from os import fstat -from .utils import * +import numpy as np + +from .utils import ReadHeader + +# metadata index table (list of dictionary) +# step: step (superfluous, since list idx = step) +# mdpos: pos in metadata file for step +# mdsize: size of metadata +# flush_count: flush count +# writermapindex: entry in WriterMap relevant for this step -def ReadWriterMap(bytearray, pos): +# WriterMap table (list of dictionary) +# StartStep: this entry is valid from this step +# WriterCount: number of writers +# AggregatorCount: number of aggregators +# SubfilesCount: number of subfiles + + +def ReadWriterMap(bytearray, pos, verbose): data = np.frombuffer(bytearray, dtype=np.uint64, count=3, offset=pos) WriterCount = int(data[0]) @@ -12,24 +26,25 @@ def ReadWriterMap(bytearray, pos): SubfileCount = int(data[2]) pos = pos + 3 * 8 - print(" WriterMap: Writers = {0} Aggregators = {1} Subfiles = {2}" - .format(WriterCount, AggregatorCount, SubfileCount)) data = np.frombuffer(bytearray, dtype=np.uint64, count=WriterCount, offset=pos) - print(" =====================") - print(" | Rank | Subfile |") - print(" ---------------------") - for r in range(0, WriterCount): - rank = str(r).rjust(7) - sub = str(data[r]).rjust(8) - print(" |" + rank + " | " + sub + " |") - print(" =====================") + if verbose: + print(" WriterMap: Writers = {0} Aggregators = {1} Subfiles = {2}" + .format(WriterCount, AggregatorCount, SubfileCount)) + print(" =====================") + print(" | Rank | Subfile |") + print(" ---------------------") + for r in range(0, WriterCount): + rank = str(r).rjust(7) + sub = str(data[r]).rjust(8) + print(" |" + rank + " | " + sub + " |") + print(" =====================") pos = pos + WriterCount * 8 return pos, WriterCount, AggregatorCount, SubfileCount -def ReadIndex(f, fileSize): +def ReadIndex(f, fileSize, verbose): nBytes = fileSize - f.tell() if nBytes <= 0: return True @@ -37,15 +52,20 @@ def ReadIndex(f, fileSize): WriterCount = 0 pos = 0 step = 0 + MetadataIndexTable = [] + WriterMap = [] + WriterMapIdx = -1 while pos < nBytes: - print("-----------------------------------------------" + - "---------------------------------------------------") + if verbose: + print("-----------------------------------------------" + + "---------------------------------------------------") record = chr(table[pos]) pos = pos + 1 reclen = np.frombuffer(table, dtype=np.uint64, count=1, offset=pos) pos = pos + 8 - print("Record '{0}', length = {1}".format(record, reclen)) + if verbose: + print("Record '{0}', length = {1}".format(record, reclen)) if record == 's': # print("Step record, length = {0}".format(reclen)) data = np.frombuffer(table, dtype=np.uint64, count=3, @@ -56,9 +76,17 @@ def ReadIndex(f, fileSize): flushcount = str(data[2]).ljust(3) FlushCount = data[2] - print("| Step = " + stepstr + "| MetadataPos = " + mdatapos + - " | MetadataSize = " + mdatasize + " | FlushCount = " + - flushcount + "|") + md = {"step": step, + "mdpos": int(data[0]), + "mdsize": int(data[1]), + "flushcount": int(data[2]), + "writermapindex": WriterMapIdx} + MetadataIndexTable.append(md) + + if verbose: + print("| Step = " + stepstr + "| MetadataPos = " + + mdatapos + " | MetadataSize = " + mdatasize + + " | FlushCount = " + flushcount + "|") pos = pos + 3 * 8 @@ -73,44 +101,61 @@ def ReadIndex(f, fileSize): str(thiswriter[i * 2 + 1]) + "; ") start += "loc:" + str(thiswriter[int(FlushCount * 2)]) pos = int(pos + (FlushCount * 2 + 1) * 8) - print(start) + if verbose: + print(start) + step = step + 1 elif record == 'w': # print("WriterMap record") pos, WriterCount, AggregatorCount, SubfileCount = ReadWriterMap( - table, pos) + table, pos, verbose) + wmd = {"StartStep": step, + "WriterCount": WriterCount, + "AggregatorCount": AggregatorCount, + "SubfilesCount": SubfileCount} + WriterMap.append(wmd) + WriterMapIdx = WriterMapIdx + 1 elif record == 'm': - print("MetaMeta record") + if verbose: + print("MetaMeta record") else: - print("Unknown record {0}, lenght = {1}".format(record, reclen)) + if verbose: + print("Unknown record {0}, lenght = {1}".format( + record, reclen)) - print("---------------------------------------------------" + - "-----------------------------------------------") + if verbose: + print("---------------------------------------------------" + + "-----------------------------------------------") if fileSize - f.tell() > 1: print("ERROR: There are {0} bytes at the end of file" " that cannot be interpreted".format(fileSize - f.tell() - 1)) - return False + return False, MetadataIndexTable, WriterMap - return True + return True, MetadataIndexTable, WriterMap -def DumpIndexTable(fileName): - print("========================================================") - print(" Index Table File: " + fileName) - print("========================================================") +def DumpIndexTable(fileName, verbose): + if verbose: + print("========================================================") + print(" Index Table File: " + fileName) + print("========================================================") status = False + MetadataIndexTable = [] + WriterMap = [] with open(fileName, "rb") as f: fileSize = fstat(f.fileno()).st_size - status = ReadHeader(f, fileSize, "Index Table") + status = ReadHeader( + f, fileSize, "Index Table", verbose) if isinstance(status, list): status = status[0] if status: - status = ReadIndex(f, fileSize) - return status + status, MetadataIndexTable, WriterMap = ReadIndex( + f, fileSize, verbose) + return status, MetadataIndexTable, WriterMap if __name__ == "__main__": diff --git a/source/utils/bp5dbg/adios2/bp5dbg/metadata.py b/source/utils/bp5dbg/adios2/bp5dbg/metadata.py index e460e41bd0..6fce9ec786 100644 --- a/source/utils/bp5dbg/adios2/bp5dbg/metadata.py +++ b/source/utils/bp5dbg/adios2/bp5dbg/metadata.py @@ -1,570 +1,90 @@ -import numpy as np from os import fstat -from .utils import * - - -def ReadEncodedStringFromBuffer(buf, pos, ID, limit, lenbytes=2): - # 'lenbytes' bytes length + string without \0 - if lenbytes == 1: - dt = np.dtype(np.uint8) - else: - dt = np.dtype(np.uint16) - namelen = np.frombuffer(buf, dtype=dt, count=1, offset=pos)[0] - pos = pos + lenbytes - if namelen > limit - lenbytes: - print("ERROR: " + ID + " string length ({0}) is longer than the " - "limit to stay inside the block ({1})".format( - namelen, limit - lenbytes)) - return False, "", namelen, pos - name = buf[pos:pos + namelen].decode('ascii') - pos = pos + namelen - return True, name, namelen, pos - - -def ReadEncodedStringArrayFromBuffer(buf, pos, ID, limit, nStrings): - s = [] - for i in range(nStrings): - # 2 bytes length + string without \0 - namelen = np.frombuffer(buf, dtype=np.uint16, count=1, offset=pos)[0] - pos = pos + 2 - if namelen > limit - 2: - print("ERROR: " + ID + " string length ({0}) is longer than the " - "limit to stay inside the block ({1})".format( - namelen, limit - 2)) - return False, s, pos - name = buf[pos:pos + namelen].decode('ascii') - pos = pos + namelen - limit = limit - namelen - 2 - s.append(name) - return True, s, pos - - -def ReadDimensionCharacteristics(buf, pos): - ndim = np.frombuffer(buf, dtype=np.uint8, count=1, offset=pos)[0] - pos = pos + 1 - lgo = np.zeros(ndim, dtype=np.uint64) - dimLen = np.frombuffer(buf, dtype=np.uint16, count=1, offset=pos)[0] - pos = pos + 2 - if dimLen != 24 * ndim: - print("ERROR: Encoded dimension length expected size = {0} bytes, " - "but found {1} bytes".format(24 * ndim, dimLen)) - return False, pos, ndim, lgo - - lgo = np.frombuffer(buf, dtype=np.uint64, count=3 * ndim, offset=pos) - pos = pos + 24 * ndim - return True, pos, ndim, lgo - - -def bDataToNumpyArray(cData, typeName, nElements, startPos=0): - if typeName == 'byte': - return np.frombuffer(cData, dtype=np.int8, count=nElements, - offset=startPos) - elif typeName == 'char': - return np.frombuffer(cData, dtype=np.uint8, count=nElements, - offset=startPos) - elif typeName == 'short': - return np.frombuffer(cData, dtype=np.int16, count=nElements, - offset=startPos) - elif typeName == 'integer': - return np.frombuffer(cData, dtype=np.int32, count=nElements, - offset=startPos) - elif typeName == 'long': - return np.frombuffer(cData, dtype=np.int64, count=nElements, - offset=startPos) - - elif typeName == 'unsigned_byte': - return np.frombuffer(cData, dtype=np.uint8, count=nElements, - offset=startPos) - elif typeName == 'unsigned_short': - return np.frombuffer(cData, dtype=np.uint16, count=nElements, - offset=startPos) - elif typeName == 'unsigned_integer': - return np.frombuffer(cData, dtype=np.uint32, count=nElements, - offset=startPos) - elif typeName == 'unsigned_long': - return np.frombuffer(cData, dtype=np.uint64, count=nElements, - offset=startPos) - - elif typeName == 'real': - return np.frombuffer(cData, dtype=np.float32, count=nElements, - offset=startPos) - elif typeName == 'double': - return np.frombuffer(cData, dtype=np.float64, count=nElements, - offset=startPos) - elif typeName == 'long_double': - return np.frombuffer(cData, dtype=np.float128, count=nElements, - offset=startPos) - - elif typeName == 'complex': - return np.frombuffer(cData, dtype=np.complex64, count=nElements, - offset=startPos) - elif typeName == 'double_complex': - return np.frombuffer(cData, dtype=np.complex128, count=nElements, - offset=startPos) - - else: - return np.zeros(1, dtype=np.uint32) - - -def ReadCharacteristicsFromMetaData(buf, idx, pos, limit, typeID, - fileOffset, isVarCharacteristics): - cStartPosition = pos - dataTypeName = GetTypeName(typeID) - print(" Block {0}: ".format(idx)) - print(" Starting offset : {0}".format(fileOffset)) - # 1 byte NCharacteristics - nChars = np.frombuffer(buf, dtype=np.uint8, count=1, offset=pos)[0] - pos = pos + 1 - print(" # of Characteristics : {0}".format(nChars)) - # 4 bytes length - charLen = np.frombuffer(buf, dtype=np.uint8, count=32, offset=pos)[0] - pos = pos + 4 - print(" Characteristics Length : {0}".format(charLen)) - # For attributes, we need to remember the dimensions and size - # when reading the value - ndim = 0 - nElems = 1 - - for i in range(nChars): - print(" Characteristics[{0}]".format(i)) - # 1 byte TYPE - cID = np.frombuffer(buf, dtype=np.uint8, count=1, offset=pos)[0] - pos = pos + 1 - cName = GetCharacteristicName(cID) - print(" Type : {0} ({1}) ".format( - cName, cID)) - cLen = GetCharacteristicDataLength(cID, typeID) - - if cName == 'dimensions': - status, pos, ndim, lgo = ReadDimensionCharacteristics(buf, pos) - if not status: - return status, pos - print(" # of Dims : {0}".format(ndim)) - if ndim > 0: - print(" Dims (lgo) : (", end="") - for d in range(ndim): - p = 3 * d - nElems = int(nElems * lgo[p]) # need for value later - print("{0}:{1}:{2}".format(lgo[p], lgo[p + 1], lgo[p + 2]), - end="") - if d < ndim - 1: - print(", ", end="") - else: - print(")") - - elif cName == 'value' or cName == 'min' or cName == 'max': - if dataTypeName == 'string': - namelimit = limit - (pos - cStartPosition) - status, s, sLen, pos = ReadEncodedStringFromBuffer( - buf, pos, "String Value", namelimit) - if not status: - return False, pos - print(" Value : '" + s + - "' ({0} bytes)".format(sLen)) - elif dataTypeName == 'string_array': - namelimit = limit - (pos - cStartPosition) - status, strList, pos = ReadEncodedStringArrayFromBuffer( - buf, pos, "String Array", namelimit, lgo[0]) - if not status: - return False, pos - print(" Value : [", end="") - for j in range(len(strList)): - print("'" + strList[j] + "'", end="") - if j < len(strList) - 1: - print(", ", end="") - print("]") - - else: - if isVarCharacteristics: - cData = buf[pos:pos + cLen] - pos = pos + cLen - data = bDataToNumpyArray(cData, dataTypeName, 1) - print(" Value : {0}" - " ({1} bytes)".format(data[0], cLen)) - else: # attribute value characteristics are different - dataTypeSize = GetTypeSize(typeID) - nBytes = int(nElems * dataTypeSize) - cData = buf[pos:pos + nBytes] - pos = pos + nBytes - data = bDataToNumpyArray(cData, dataTypeName, nElems) - print(" Value : [", end="") - for j in range(nElems): - print("{0}".format(data[j]), end="") - if j < nElems - 1: - print(", ", end="") - print("]") - - elif cName == 'offset' or cName == 'payload_offset': - cData = buf[pos:pos + cLen] - pos = pos + cLen - data = bDataToNumpyArray(cData, 'unsigned_long', 1) - print(" Value : {0} ({1} bytes)".format( - data[0], cLen)) - elif cName == 'time_index' or cName == 'file_index': - cData = buf[pos:pos + cLen] - pos = pos + cLen - data = bDataToNumpyArray(cData, 'unsigned_integer', 1) - print(" Value : {0} ({1} bytes)".format( - data[0], cLen)) - elif cName == 'minmax': - nBlocks = np.frombuffer( - buf, dtype=np.uint16, count=1, offset=pos)[0] - print(" nBlocks : {0}".format(nBlocks)) - pos = pos + 2 - bminmax = bDataToNumpyArray(buf, dataTypeName, 2, pos) - pos = pos + 2 * cLen - print(" Min/max : {0} / {1}".format( - bminmax[0], bminmax[1])) - if nBlocks > 1: - method = np.frombuffer(buf, dtype=np.uint8, - count=1, offset=pos)[0] - pos = pos + 1 - print(" Division method: {0}".format(method)) - blockSize = np.frombuffer(buf, dtype=np.uint64, - count=1, offset=pos)[0] - pos = pos + 8 - print(" Block size : {0}".format(blockSize)) - div = np.frombuffer(buf, dtype=np.uint16, - count=ndim, offset=pos) - pos = pos + 2 * ndim - print(" Division vector: (", end="") - for d in range(ndim): - print("{0}".format(div[d]), end="") - if d < ndim - 1: - print(", ", end="") - else: - print(")") - minmax = bDataToNumpyArray(buf, dataTypeName, 2 * nBlocks, pos) - pos = pos + 2 * nBlocks * cLen - for i in range(nBlocks): - print(" Min/max : {0} / {1}".format( - minmax[2 * i], minmax[2 * i + 1])) - elif cName == "transform_type": - # Operator name (8 bit length) - namelimit = limit - (pos - cStartPosition) - status, s, sLen, pos = ReadEncodedStringFromBuffer( - buf, pos, "Operator Name", namelimit, lenbytes=1) - if not status: - return False, pos - print(" Operator : '" + s + - "' ({0} bytes)".format(sLen)) - - # 1 byte TYPE - typeID = buf[pos] - pos = pos + 1 - print(" Pre-type : {0} ({1}) ".format( - GetTypeName(typeID), typeID)) - - # Pre-transform dimenstions - status, pos, ndim, lgo = ReadDimensionCharacteristics(buf, pos) - if not status: - return status, pos - print(" Pre-# of dims : {0}".format(ndim)) - if ndim > 0: - print(" Pre-Dims (lgo) : (", end="") - for d in range(ndim): - p = 3 * d - nElems = int(nElems * lgo[p]) # need for value later - print("{0}:{1}:{2}".format(lgo[p], lgo[p + 1], lgo[p + 2]), - end="") - if d < ndim - 1: - print(", ", end="") - else: - print(")") - - # Operator specific metadata - omdlen = np.frombuffer(buf, dtype=np.uint16, - count=1, offset=pos)[0] - pos = pos + 2 - print(" Op. data length: {0}".format(omdlen)) - pos = pos + omdlen - else: - print(" ERROR: could not understand this " - "characteristics type '{0}' id {1}".format(cName, cID)) - return True, pos - - -def ReadPGMD(buf, idx, pos, limit, pgStartOffset): - # Read one PG index group - pgStartPosition = pos - print(" PG {0}: ".format(idx)) - print(" Starting offset : {0}".format(pgStartOffset)) - - # 2 bytes PG Length + Name length - pgLength = np.frombuffer(buf, dtype=np.uint16, count=1, offset=pos)[0] - pos = pos + 2 - print( - " PG length : {0} bytes (+2 for length)".format( - pgLength)) - if pgStartPosition + pgLength + 2 > limit: - print("ERROR: There is not enough bytes {0} left in PG index block " - "to read this single PG index ({1} bytes)").format( - limit - pgStartPosition, pgLength) - return False, pos - - pgNameLen = np.frombuffer(buf, dtype=np.uint16, count=1, offset=pos)[0] - pos = pos + 2 - if pgStartPosition + pgNameLen > limit: - print("ERROR: There is not enough bytes {0} left in PG index block " - "to read the name of this single PG index ({1} bytes)").format( - limit - pos + 2, pgNameLen) - return False, pos +import numpy as np - pgName = buf[pos:pos + pgNameLen].decode('ascii') - pos = pos + pgNameLen - print(" PG Name : '" + pgName + - "' ({0} bytes)".format(pgNameLen)) - # ColumnMajor (host language Fortran) 1 byte, 'y' or 'n' - isColumnMajor = buf[pos] # this is an integer value - pos = pos + 1 - if isColumnMajor != ord('y') and isColumnMajor != ord('n'): - print( - "ERROR: Next byte for isColumnMajor must be 'y' or 'n' " - "but it isn't = {0} (={1})".format( - chr(isColumnMajor), isColumnMajor)) +def ReadMetadataStep(f, fileSize, MetadataEntry, WriterMapEntry): + # Read metadata of one step + step = MetadataEntry['step'] + mdpos = MetadataEntry['mdpos'] + mdsize = MetadataEntry['mdsize'] + # flushcount = MetadataEntry['flushcount'] + WriterCount = WriterMapEntry['WriterCount'] + + if mdpos + mdsize > fileSize: + print(f"ERROR: step {step} metadata pos {mdpos} + size {mdsize} " + f"is beyond the metadata file size {fileSize}") return False - print(" isColumnMajor : " + chr(isColumnMajor)) - - processID = np.frombuffer(buf, dtype=np.uint32, count=1, offset=pos)[0] - pos = pos + 4 - print(" process ID : {0}".format(processID)) - - pgTimeNameLen = np.frombuffer(buf, dtype=np.uint16, count=1, offset=pos)[0] - pos = pos + 2 - if pgStartPosition + pgTimeNameLen > limit: - print("ERROR: There is not enough bytes {0} left in PG index block " - "to read the name of this single PG index ({1} bytes)").format( - limit - pos + 2, pgTimeNameLen) - return False, pos - - pgTimeName = buf[pos:pos + pgTimeNameLen].decode('ascii') - pos = pos + pgTimeNameLen - print(" Timestep Name : '" + pgTimeName + - "' ({0} bytes)".format(pgTimeNameLen)) - - step = np.frombuffer(buf, dtype=np.uint32, count=1, offset=pos)[0] - pos = pos + 4 - print(" Step : {0}".format(step)) - ptr = np.frombuffer(buf, dtype=np.uint64, count=1, offset=pos)[0] - pos = pos + 8 - print(" Offset in data : {0}".format(ptr)) - - return True, pos - - -def ReadVarMD(buf, idx, pos, limit, varStartOffset): - # Read one VAR index group - varStartPosition = pos - print(" Var {0}: ".format(idx)) - print(" Starting offset : {0}".format(varStartOffset)) - - # 4 bytes VAR Index Length - varLength = np.frombuffer(buf, dtype=np.uint32, count=1, offset=pos)[0] - pos = pos + 4 - print(" Var idx length : {0} bytes (+4 for idx length)".format( - varLength)) - if varStartPosition + varLength + 4 > limit: - print("ERROR: There is not enough bytes in Var index block " - "to read this single Var index") - return False, pos - - memberID = np.frombuffer(buf, dtype=np.uint32, count=1, offset=pos)[0] - pos = pos + 4 - print(" MemberID : {0}".format(memberID)) - - namelimit = limit - (pos - varStartPosition) - status, grpName, grpNameLen, pos = ReadEncodedStringFromBuffer( - buf, pos, "Group Name", namelimit) - if not status: - return False, pos - print(" Group Name : '" + grpName + - "' ({0} bytes)".format(grpNameLen)) - - namelimit = limit - (pos - varStartPosition) - status, varName, varNameLen, pos = ReadEncodedStringFromBuffer( - buf, pos, "Var Name", namelimit) - if not status: - return False, pos - print(" Var Name : '" + varName + - "' ({0} bytes)".format(varNameLen)) - - # print(" Current offset : {0}".format(varStartOffset + pos)) - # namelimit = limit - (pos - varStartPosition) - # status, varPath, varPathLen, pos = ReadEncodedStringFromBuffer( - # buf, pos, "Var Path", namelimit) - # if not status: - # return False, pos - # print(" Var Path : '" + varPath + - # "' ({0} bytes)".format(varPathLen)) + currentpos = f.tell() + if mdpos > currentpos: + print(f"Offset {currentpos}..{mdpos-1} is a gap unaccounted for") - # 1 byte ORDER (K, C, F) - order = buf[pos] # this is an integer value - pos = pos + 1 - if order != ord('K') and order != ord('C') and order != ord('F'): - print( - "ERROR: Next byte for Order must be 'K', 'C', or 'F' " - "but it isn't = {0} (={1})".format( - chr(order), order)) + if mdpos < currentpos: + print(f"ERROR: step {step} metadata pos {mdpos} points before the " + f"expected position in file {currentpos}") return False - print(" Order : " + chr(order)) - - # 1 byte UNUSED - unused = buf[pos] # this is an integer value - pos = pos + 1 - print(" Unused byte : {0}".format(unused)) - - # 1 byte TYPE - typeID = buf[pos] - pos = pos + 1 - print(" Type : {0} ({1}) ".format( - GetTypeName(typeID), typeID)) - - # 8 byte Number of Characteristics Sets - cSets = np.frombuffer(buf, dtype=np.uint64, count=1, offset=pos)[0] - pos = pos + 8 - print(" # of blocks : {0}".format(cSets)) - -# This loop only reads the number of reported blocks -# for i in range(cSets): -# # one characteristics block -# newlimit = limit - (pos - varStartPosition) -# fileOffset = varStartOffset + (pos - varStartPosition) -# status, pos = ReadCharacteristicsFromMetaData( -# buf, i, pos, newlimit, typeID, fileOffset, True) -# if not status: -# return False - -# This loop reads blocks until the reported length of variable index length - i = 0 - while pos < varStartPosition + varLength: - # one characteristics block - newlimit = limit - (pos - varStartPosition) - fileOffset = varStartOffset + (pos - varStartPosition) - status, pos = ReadCharacteristicsFromMetaData( - buf, i, pos, newlimit, typeID, fileOffset, True) - if not status: - return False - i = i + 1 - - if (i != cSets): - print( - "ERROR: reported # of blocks (={0}) != # of encoded blocks " - " (={1})".format( - cSets, i)) - - return True, pos - -def ReadAttrMD(buf, idx, pos, limit, attrStartOffset): - # Read one ATTR index group - attrStartPosition = pos - print(" Attr {0}: ".format(idx)) - print(" Starting offset : {0}".format(attrStartOffset)) + f.seek(mdpos) + buf = f.read(mdsize) + pos = 0 - # 4 bytes ATTR Index Length - attrLength = np.frombuffer(buf, dtype=np.uint32, count=1, offset=pos)[0] - pos = pos + 4 - print(" Attr idx length : {0} bytes (+4 for idx length)".format( - attrLength)) - if attrStartPosition + attrLength + 4 > limit: - print("ERROR: There is not enough bytes in Attr index block " - "to read this single Attr index") - return False, pos - - memberID = np.frombuffer(buf, dtype=np.uint32, count=1, offset=pos)[0] - pos = pos + 4 - print(" MemberID : {0}".format(memberID)) - - namelimit = limit - (pos - attrStartPosition) - status, grpName, grpNameLen, pos = ReadEncodedStringFromBuffer( - buf, pos, "Group Name", namelimit) - if not status: - return False, pos - print(" Group Name : '" + grpName + - "' ({0} bytes)".format(grpNameLen)) - - namelimit = limit - (pos - attrStartPosition) - status, attrName, attrNameLen, pos = ReadEncodedStringFromBuffer( - buf, pos, "Attr Name", namelimit) - if not status: - return False, pos - print(" Attr Name : '" + attrName + - "' ({0} bytes)".format(attrNameLen)) - - # print(" Current offset : {0}".format(attrStartOffset + pos)) - namelimit = limit - (pos - attrStartPosition) - status, attrPath, attrPathLen, pos = ReadEncodedStringFromBuffer( - buf, pos, "Attr Path", namelimit) - if not status: - return False, pos - print(" Attr Path : '" + attrPath + - "' ({0} bytes)".format(attrPathLen)) - - # 1 byte TYPE - typeID = buf[pos] - pos = pos + 1 - print(" Type : {0} ({1}) ".format( - GetTypeName(typeID), typeID)) - - # 8 byte Number of Characteristics Sets - cSets = np.frombuffer(buf, dtype=np.uint64, count=1, offset=pos)[0] - pos = pos + 8 - print(" # of blocks : {0}".format(cSets)) - - for i in range(cSets): - # one characteristics block - newlimit = limit - (pos - attrStartPosition) - fileOffset = attrStartOffset + (pos - attrStartPosition) - status, pos = ReadCharacteristicsFromMetaData( - buf, i, pos, newlimit, typeID, fileOffset, False) - if not status: - return False - - return True, pos - - -def ReadMetadataStep(f, fileSize, step, WriterCount): - # Read metadata of one step - mdStartPosition = f.tell() if step > 0: print("========================================================") - print("Step {0}: ".format(step)) - print(" PG Index offset : {0}".format(mdStartPosition)) - - # Read the PG Index - - # 8 bytes PG Count + Index Length - totalsize = np.fromfile(f, dtype=np.uint64, count=1) - print(" TotalMetadata size : {0}".format(totalsize)) - metadatasizearray = np.fromfile(f, dtype=np.uint64, count=WriterCount) - attrsizearray = np.fromfile(f, dtype=np.uint64, count=WriterCount) + print(f"Step {step}: ") + print(f" Offset = {mdpos}") - for i in range(0, WriterCount): - print(" Writer " + str(i) + ": MDsize=" + str(metadatasizearray[i]) + - ", AttrSize=" + str(attrsizearray)) - f.read(metadatasizearray[i]) - f.read(attrsizearray[i]) + mdsize_in_file = np.frombuffer(buf, dtype=np.uint64, count=1, + offset=pos) + pos = pos + 8 + if (mdsize == mdsize_in_file[0] + 8): + print(f" Size = {mdsize_in_file[0]}") + else: + print(f"ERROR: md record supposed to be {mdsize-8} + 8 bytes " + f"(as recorded in index), but found in file " + f"{mdsize_in_file[0]}") + + MDPosition = mdpos + 2 * 8 * WriterCount + print(" Variable metadata entries: ") + for w in range(0, WriterCount): + a = np.frombuffer(buf, dtype=np.uint64, count=1, offset=pos) + thisMDSize = int(a[0]) + pos = pos + 8 + print(f" Writer {w}: md size {thisMDSize} " + f"offset {MDPosition}") + MDPosition = MDPosition + thisMDSize + + print(" Attribute metadata entries: ") + for w in range(0, WriterCount): + a = np.frombuffer(buf, dtype=np.uint64, count=1, offset=pos) + thisMDSize = int(a[0]) + pos = pos + 8 + print(f" Writer {w}: md size {thisMDSize} " + f"offset {MDPosition}") + MDPosition = MDPosition + thisMDSize + + if (mdsize_in_file != MDPosition - mdpos): + print(f"ERROR: entries supposed to end at start offset+size " + f"{mdpos}+{mdsize_in_file[0]}, but it ends instead on offset " + f"{MDPosition}") return True -def DumpMetaData(fileName): +def DumpMetaData(fileName, MetadataIndexTable, WriterMap): print("========================================================") print(" Metadata File: " + fileName) print("========================================================") + + # print(f"MetadataIndexTable={MetadataIndexTable}") + # print(f"WriterMap={WriterMap}") + with open(fileName, "rb") as f: fileSize = fstat(f.fileno()).st_size - status = ReadHeader(f, fileSize, "Metadata") - if isinstance(status, list): - WriterCount = status[1] - status = status[0] - step = 0 - while (f.tell() < fileSize - 12 and status): - status = ReadMetadataStep(f, fileSize, step, WriterCount) - step = step + 1 + for MetadataEntry in MetadataIndexTable: + WriterMapEntry = WriterMap[MetadataEntry['writermapindex']] + status = ReadMetadataStep( + f, fileSize, MetadataEntry, WriterMapEntry) return status diff --git a/source/utils/bp5dbg/adios2/bp5dbg/metametadata.py b/source/utils/bp5dbg/adios2/bp5dbg/metametadata.py new file mode 100644 index 0000000000..3b908e29e2 --- /dev/null +++ b/source/utils/bp5dbg/adios2/bp5dbg/metametadata.py @@ -0,0 +1,51 @@ +from os import fstat + +import numpy as np + + +def ReadMetaMetadataRecord(buf, rec, pos, fileSize): + # Read one metametadata record + startoffset = str(pos).rjust(8) + # 8 bytes MetaMetaIDLen + MetaMetaInfoLen Length + mmIDlen = np.frombuffer(buf, dtype=np.uint64, count=1, offset=pos) + pos = pos + 8 + mmInfolen = np.frombuffer(buf, dtype=np.uint64, count=1, offset=pos) + pos = pos + 8 + # mmid = np.frombuffer(buf, dtype=np.uint8, count=mmIDlen[0], offset=pos) + pos = pos + int(mmIDlen[0]) + # mminfo = np.frombuffer(buf, dtype=np.uint8, + # count=mmInfolen[0], offset=pos) + pos = pos + int(mmInfolen[0]) + + recs = str(rec).rjust(7) + idlen = str(mmIDlen[0]).rjust(10) + infolen = str(mmInfolen[0]).rjust(12) + + print( + f" | {recs} | {startoffset} | {idlen} | {infolen} |") + + return pos + + +def DumpMetaMetaData(fileName): + print("========================================================") + print(" MetaMetadata File: " + fileName) + print("========================================================") + with open(fileName, "rb") as f: + fileSize = fstat(f.fileno()).st_size + print(f" File size = {fileSize}") + buf = f.read(fileSize) + print(" --------------------------------------------------") + print(" | Record | Offset | ID length | Info length |") + print(" --------------------------------------------------") + pos = 0 + rec = 0 + while (pos < fileSize - 1): + pos = ReadMetaMetadataRecord(buf, rec, pos, fileSize) + rec = rec + 1 + print(" --------------------------------------------------") + return True + + +if __name__ == "__main__": + print("ERROR: Utility main program is bp5dbg.py") diff --git a/source/utils/bp5dbg/adios2/bp5dbg/utils.py b/source/utils/bp5dbg/adios2/bp5dbg/utils.py index 22dbe45e6b..87ca3724cd 100644 --- a/source/utils/bp5dbg/adios2/bp5dbg/utils.py +++ b/source/utils/bp5dbg/adios2/bp5dbg/utils.py @@ -103,7 +103,7 @@ def GetCharacteristicDataLength(cID, typeID): # Read Header info 64 bytes # fileType: Data, Metadata, Index Table -def ReadHeader(f, fileSize, fileType): +def ReadHeader(f, fileSize, fileType, verbose): status = True if fileSize < 64: print("ERROR: Invalid " + fileType + ". File is smaller " @@ -143,6 +143,9 @@ def ReadHeader(f, fileSize, fileType): clmnStr = ' yes' # 45..63 unused + if not verbose: + return status + print("---------------------------------------------------------" "---------------------------------------------------------") print("| Version string |Major|Minor|Patch" diff --git a/source/utils/bp5dbg/bp5dbg.py b/source/utils/bp5dbg/bp5dbg.py index 5b797dbac0..9ec21342ba 100755 --- a/source/utils/bp5dbg/bp5dbg.py +++ b/source/utils/bp5dbg/bp5dbg.py @@ -1,11 +1,14 @@ #!/usr/bin/env python3 import argparse -from os.path import basename, exists, isdir import glob -from adios2.bp5dbg import * +from os.path import basename, exists, isdir + +from adios2.bp5dbg import DumpIndexTable, DumpMetaData, DumpMetaMetaData -WriterCount = -1 +MetadataIndexTable = [] +WriterMap = [] +status = True def SetupArgs(): @@ -21,7 +24,11 @@ def SetupArgs(): help="Do not print index table md.idx", action="store_true") parser.add_argument("--no-metadata", "-m", - help="Do not print metadata md.0", action="store_true") + help="Do not print metadata md.0", + action="store_true") + parser.add_argument("--no-metametadata", "-M", + help="Do not print meta-metadata mmd.0", + action="store_true") parser.add_argument("--no-data", "-d", help="Do not print data data.*", action="store_true") args = parser.parse_args() @@ -29,6 +36,8 @@ def SetupArgs(): # default values args.idxFileName = "" args.dumpIdx = False + args.metametadataFileName = "" + args.dumpMetaMetadata = False args.metadataFileName = "" args.dumpMetadata = False args.dataFileName = "" @@ -49,6 +58,9 @@ def CheckFileName(args): if not args.no_metadata: args.metadataFileName = args.FILE + "/" + "md.[0-9]*" args.dumpMetadata = True + if not args.no_metametadata: + args.metametadataFileName = args.FILE + "/" + "mmd.[0-9]*" + args.dumpMetaMetadata = True if not args.no_data: args.dataFileName = args.FILE + "/" + "data.[0-9]*" args.dumpData = True @@ -67,22 +79,62 @@ def CheckFileName(args): args.metadataFileName = args.FILE args.dumpMetadata = True + elif name.startswith("mmd."): + args.metametadataFileName = args.FILE + args.dumpMetaMetadata = True + def DumpIndexTableFile(args): + global MetadataIndexTable + global WriterMap + global status indexFileList = glob.glob(args.idxFileName) if len(indexFileList) > 0: - DumpIndexTable(indexFileList[0]) + status, MetadataIndexTable, WriterMap = DumpIndexTable( + indexFileList[0], True) else: print("There is no BP% Index Table file as " + args.idxFileName) + status = False + return status -# def DumpMetadataFiles(args): -# mdFileList = glob.glob(args.metadataFileName) -# if len(mdFileList) > 0: -# for fname in mdFileList: -# DumpMetaData(fname) -# else: -# print("There are no BP% Metadata files in " + args.metadataFileName) +def DumpMetaMetadataFiles(args): + global status + mdFileList = glob.glob(args.metametadataFileName) + if len(mdFileList) > 0: + for fname in mdFileList: + status = DumpMetaMetaData(fname) + else: + print("There are no BP% MetaMetadata files in " + + args.metametadataFileName) + status = False + return status + + +# xxx/md.X to xxx/md.idx +def GetIndexFileName(MDFileName): + return MDFileName.rsplit('.', 1)[0] + ".idx" + + +def DumpMetadataFiles(args): + global MetadataIndexTable + global WriterMap + global status + mdFileList = glob.glob(args.metadataFileName) + if len(mdFileList) > 0: + if len(MetadataIndexTable) == 0: + # need to parse index first + IndexFileName = GetIndexFileName(mdFileList[0]) + status, MetadataIndexTable, WriterMap = DumpIndexTable( + IndexFileName, False) + + if status: + for fname in mdFileList: + DumpMetaData(fname, MetadataIndexTable, WriterMap) + else: + print("There are no BP% Metadata files in " + args.metadataFileName) + status = False + return status # def DumpDataFiles(args): # dataFileList = glob.glob(args.dataFileName) @@ -93,8 +145,6 @@ def DumpIndexTableFile(args): # print("There are no BP5 Data files in " + args.dataFileName) -WriterCount = -1 - if __name__ == "__main__": args = SetupArgs() @@ -102,10 +152,13 @@ def DumpIndexTableFile(args): # print(args) if args.dumpIdx: - DumpIndexTableFile(args) + status = DumpIndexTableFile(args) + + if args.dumpMetaMetadata and status: + status = DumpMetaMetadataFiles(args) -# if args.dumpMetadata: -# DumpMetadataFiles(args) + if args.dumpMetadata and status: + status = DumpMetadataFiles(args) # if args.dumpData: # DumpDataFiles(args) diff --git a/source/utils/bpls/bpls.cpp b/source/utils/bpls/bpls.cpp index 462598d90a..c696690fa0 100644 --- a/source/utils/bpls/bpls.cpp +++ b/source/utils/bpls/bpls.cpp @@ -1252,8 +1252,9 @@ int printVariableInfo(core::Engine *fp, core::IO *io, core::Variable *variabl if (longopt && !timestep) { fprintf(outf, " = "); - auto mm = variable->MinMax(); - print_data(&mm.second, 0, adiosvartype, false); + T value; + fp->Get(*variable, value, adios2::Mode::Sync); + print_data(&value, 0, adiosvartype, false); } fprintf(outf, "\n");