diff --git a/.gitignore b/.gitignore index d065f5b7..5a61c7f0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,11 @@ +tags Makefile Makefile.in aclocal.m4 config.log config.status +COPYING +INSTALL config/compile config/config.guess config/config.sub @@ -12,16 +15,20 @@ config/missing config/test-driver configure contrib/.deps/ +contrib/cbif contrib/Makefile contrib/Makefile.in +contrib/cbif doc/Makefile doc/Makefile.in src/.deps/ +src/mdtest src/Makefile src/Makefile.in src/config.h src/config.h.in src/stamp-h1 +*[-.]mod *~ NOTES.txt autom4te.cache @@ -29,7 +36,14 @@ contrib/cbif.o src/*.o src/*.i src/*.s +src/*.a src/ior +src/mdtest +src/testlib +src/test/.deps/ +src/test/.dirstamp +src/test/lib.o +build/ doc/doxygen/build doc/sphinx/_*/ diff --git a/META b/META index a4ea8733..782f8466 100755 --- a/META +++ b/META @@ -1,3 +1,3 @@ Package: ior -Version: 3.2.0 +Version: 3.3.0+dev Release: 0 diff --git a/NEWS b/NEWS index 00cbf439..93671124 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,19 @@ +Version 3.3.0+dev +-------------------------------------------------------------------------------- + +New major features: + +New minor features: + +Bugfixes: + +Version 3.2.1 +-------------------------------------------------------------------------------- + +- Fixed a memory protection bug in mdtest (Julian Kunkel) +- Fixed correctness bugs in mdtest leaf-only mode (#147) (rmn1) +- Fixed bug where mdtest attempted to stat uncreated files (Julian Kunkel) + Version 3.2.0 -------------------------------------------------------------------------------- @@ -104,7 +120,7 @@ Version 2.10.1 - Corrected IOR_GetFileSize() function to point to HDF5 and NCMPI versions of IOR_GetFileSize() calls - Changed the netcdf dataset from 1D array to 4D array, where the 4 dimensions - are: [segmentCount][numTasksWorld][numTransfers][transferSize] + are: [segmentCount][numTasks][numTransfers][transferSize] This patch from Wei-keng Liao allows for file sizes > 4GB (provided no single dimension is > 4GB). - Finalized random-capability release diff --git a/README.md b/README.md index 81a1fa32..c1c73a8a 100755 --- a/README.md +++ b/README.md @@ -1,31 +1,31 @@ # HPC IO Benchmark Repository [![Build Status](https://travis-ci.org/hpc/ior.svg?branch=master)](https://travis-ci.org/hpc/ior) -This repo now contains both IOR and mdtest. -See also NOTES.txt +This repository contains the IOR and mdtest parallel I/O benchmarks. The +[official IOR/mdtest documention][] can be found in the `docs/` subdirectory or +on Read the Docs. -# Building +## Building -0. If "configure" is missing from the top level directory, you - probably retrieved this code directly from the repository. - Run "./bootstrap". +1. If `configure` is missing from the top level directory, you probably + retrieved this code directly from the repository. Run `./bootstrap` + to generate the configure script. Alternatively, download an + [official IOR release][] which includes the configure script. - If your versions of the autotools are not new enough to run - this script, download and official tarball in which the - configure script is already provided. +1. Run `./configure`. For a full list of configuration options, use + `./configure --help`. -1. Run "./configure" +2. Run `make` - See "./configure --help" for configuration options. +3. Optionally, run `make install`. The installation prefix + can be changed via `./configure --prefix=...`. -2. Run "make" +## Testing -3. Optionally, run "make install". The installation prefix - can be changed as an option to the "configure" script. +* Run `make check` to invoke the unit tests. +* More comprehensive functionality tests are included in `testing/`. These + scripts will launch IOR and mdtest via MPI. +* Docker scripts are also provided in `testing/docker/` to test various + distributions at once. -# Testing - - Run "make check" to invoke the unit test framework of Automake. - - * To run basic functionality tests that we use for continuous integration, see ./testing/ - * There are docker scripts provided to test various distributions at once. - * See ./testing/docker/ +[official IOR release]: https://github.com/hpc/ior/releases +[official IOR/mdtest documention]: http://ior.readthedocs.org/ diff --git a/README_DAOS b/README_DAOS new file mode 100644 index 00000000..ed98bd66 --- /dev/null +++ b/README_DAOS @@ -0,0 +1,86 @@ +Building +---------------------- + +The DAOS library must be installed on the system. + +./bootstrap +./configure --prefix=iorInstallDir --with-daos=DIR --with-cart=DIR + +One must specify "--with-daos=/path/to/daos/install and --with-cart". When that +is specified the DAOS and DFS driver will be built. + +The DAOS driver uses the DAOS API to open a container (or create it if it +doesn't exist first) then create an array object in that container (file) and +read/write to the array object using the daos Array API. The DAOS driver works +with IOR only (no mdtest support yet). The file name used by IOR (passed by -o +option) is hashed to an object ID that is used as the array oid. + +The DFS (DAOS File System) driver creates an encapsulated namespace and emulates +the POSIX driver using the DFS API directly on top of DAOS. The DFS driver works +with both IOR and mdtest. + +Running with DAOS API +--------------------- + +ior -a DAOS [ior_options] [daos_options] + +In the IOR options, the file name should be specified as a container uuid using +"-o ". If the "-E" option is given, then this UUID shall denote +an existing container created by a "matching" IOR run. Otherwise, IOR will +create a new container with this UUID. In the latter case, one may use +uuidgen(1) to generate the UUID of the new container. + +The DAOS options include: + +Required Options: +--daos.pool : pool uuid to connect to (has to be created beforehand) +--daos.svcl : pool svcl list (: separated) +--daos.cont : container for the IOR files/objects (can use `uuidgen`) + +Optional Options: +--daos.group : group name of servers with the pool +--daos.chunk_size : Chunk size of the array object controlling striping over DKEYs +--daos.destroy flag to destory the container on finalize +--daos.oclass : specific object class for array object + +Examples that should work include: + + - "ior -a DAOS -w -W -o file_name --daos.pool --daos.svcl \ + --daos.cont " + + - "ior -a DAOS -w -W -r -R -o file_name -b 1g -t 4m \ + --daos.pool --daos.svcl --daos.cont \ + --daos.chunk_size 1024 --daos.oclass R2" + +Running with DFS API +--------------------- + +ior -a DFS [ior_options] [dfs_options] +mdtest -a DFS [mdtest_options] [dfs_options] + +Required Options: +--dfs.pool : pool uuid to connect to (has to be created beforehand) +--dfs.svcl : pool svcl list (: separated) +--dfs.cont : container uuid that will hold the encapsulated namespace + +Optional Options: +--dfs.group : group name of servers with the pool +--dfs.chunk_size : Chunk size of the files +--dfs.destroy flag to destory the container on finalize +--dfs.oclass : specific object class for files + +In the IOR options, the file name should be specified on the root dir directly +since ior does not create directories and the DFS container representing the +encapsulated namespace is not the same as the system namespace the user is +executing from. + +Examples that should work include: + - "ior -a DFS -w -W -o /test1 --dfs.pool --dfs.svcl --dfs.cont " + - "ior -a DFS -w -W -r -R -o /test2 -b 1g -t 4m -C --dfs.pool --dfs.svcl --dfs.cont " + - "ior -a DFS -w -r -o /test3 -b 8g -t 1m -C --dfs.pool --dfs.svcl --dfs.cont " + +Running mdtest, the user needs to specify a directory with -d where the test +tree will be created. Some examples: + - "mdtest -a DFS -n 100 -F -D -d /bla --dfs.pool --dfs.svcl --dfs.cont " + - "mdtest -a DFS -n 1000 -F -C -d /bla --dfs.pool --dfs.svcl --dfs.cont " + - "mdtest -a DFS -I 10 -z 5 -b 2 -L -d /bla --dfs.pool --dfs.svcl --dfs.cont " diff --git a/configure.ac b/configure.ac index a372d845..cd71b396 100755 --- a/configure.ac +++ b/configure.ac @@ -19,6 +19,17 @@ AM_INIT_AUTOMAKE([check-news dist-bzip2 gnu no-define foreign subdir-objects]) m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) AM_MAINTAINER_MODE +# Check for system-specific stuff +case "${host_os}" in + *linux*) + ;; + *darwin*) + CPPFLAGS="${CPPFLAGS} -D_DARWIN_C_SOURCE" + ;; + *) + ;; +esac + # Checks for programs # We can't do anything without a working MPI @@ -39,7 +50,7 @@ AC_CHECK_HEADERS([fcntl.h libintl.h stdlib.h string.h strings.h sys/ioctl.h sys/ AC_TYPE_SIZE_T # Checks for library functions. -AC_CHECK_FUNCS([getpagesize gettimeofday memset mkdir pow putenv realpath regcomp sqrt strcasecmp strchr strerror strncasecmp strstr uname statfs statvfs]) +AC_CHECK_FUNCS([sysconf gettimeofday memset mkdir pow putenv realpath regcomp sqrt strcasecmp strchr strerror strncasecmp strstr uname statfs statvfs]) AC_SEARCH_LIBS([sqrt], [m], [], [AC_MSG_ERROR([Math library not found])]) @@ -65,12 +76,14 @@ AS_IF([test "$ac_cv_header_gpfs_h" = "yes" -o "$ac_cv_header_gpfs_fcntl_h" = "ye # Check for system capabilities AC_SYS_LARGEFILE +AC_DEFINE([_XOPEN_SOURCE], [700], [C99 compatibility]) + # Check for lustre availability AC_ARG_WITH([lustre], [AS_HELP_STRING([--with-lustre], [support configurable Lustre striping values @<:@default=check@:>@])], [], [with_lustre=check]) -AS_IF([test "x$with_lustre" != xno], [ +AS_IF([test "x$with_lustre" = xyes ], [ AC_CHECK_HEADERS([linux/lustre/lustre_user.h lustre/lustre_user.h], break, [ if test "x$with_lustre" != xcheck -a \ "x$ac_cv_header_linux_lustre_lustre_user_h" = "xno" -a \ @@ -78,6 +91,18 @@ AS_IF([test "x$with_lustre" != xno], [ AC_MSG_FAILURE([--with-lustre was given, not found]) fi ]) + AC_CHECK_HEADERS([linux/lustre/lustreapi.h lustre/lustreapi.h], + [AC_DEFINE([HAVE_LUSTRE_LUSTREAPI], [], [Lustre user API available in some shape or form])], [ + if test "x$with_lustre" != xcheck -a \ + "x$ac_cv_header_linux_lustre_lustreapi_h" = "xno" -a \ + "x$ac_cv_header_lustre_lustreapi_h" = "xno" ; then + AC_MSG_FAILURE([--with-lustre was given, not found]) + fi + ]) +]) +AM_CONDITIONAL([WITH_LUSTRE], [test x$with_lustre = xyes]) +AM_COND_IF([WITH_LUSTRE],[ + AC_DEFINE([WITH_LUSTRE], [], [Build wth LUSTRE backend]) ]) # IME (DDN's Infinite Memory Engine) support @@ -172,7 +197,75 @@ AM_COND_IF([USE_RADOS_AIORI],[ AC_DEFINE([USE_RADOS_AIORI], [], [Build RADOS backend AIORI]) ]) +# CEPHFS support +AC_ARG_WITH([cephfs], + [AS_HELP_STRING([--with-cephfs], + [support IO with libcephfs backend @<:@default=no@:>@])], + [], + [with_cephfs=no]) +AS_IF([test "x$with_cephfs" != xno], [ + CPPFLAGS="$CPPFLAGS -D_FILE_OFFSET_BITS=64 -std=gnu11" +]) +AM_CONDITIONAL([USE_CEPHFS_AIORI], [test x$with_cephfs = xyes]) +AM_COND_IF([USE_CEPHFS_AIORI],[ + AC_DEFINE([USE_CEPHFS_AIORI], [], [Build CEPHFS backend AIORI]) +]) +# DAOS Backends (DAOS and DFS) IO support require DAOS and CART/GURT +AC_ARG_WITH([cart], + [AS_HELP_STRING([--with-cart], + [support IO with DAOS backends @<:@default=no@:>@])], + [], [with_daos=no]) + +AS_IF([test "x$with_cart" != xno], [ + CART="yes" + LDFLAGS="$LDFLAGS -L$with_cart/lib64 -Wl,--enable-new-dtags -Wl,-rpath=$with_cart/lib64" + LDFLAGS="$LDFLAGS -L$with_cart/lib -Wl,--enable-new-dtags -Wl,-rpath=$with_cart/lib" + CPPFLAGS="$CPPFLAGS -I$with_cart/include/" + AC_CHECK_HEADERS(gurt/common.h,, [unset CART]) + AC_CHECK_LIB([gurt], [d_hash_murmur64],, [unset CART]) +]) + +AC_ARG_WITH([daos], + [AS_HELP_STRING([--with-daos], + [support IO with DAOS backends @<:@default=no@:>@])], + [], [with_daos=no]) + +AS_IF([test "x$with_daos" != xno], [ + DAOS="yes" + LDFLAGS="$LDFLAGS -L$with_daos/lib64 -Wl,--enable-new-dtags -Wl,-rpath=$with_daos/lib64" + CPPFLAGS="$CPPFLAGS -I$with_daos/include" + AC_CHECK_HEADERS(daos_types.h,, [unset DAOS]) + AC_CHECK_LIB([uuid], [uuid_generate],, [unset DAOS]) + AC_CHECK_LIB([daos_common], [daos_sgl_init],, [unset DAOS]) + AC_CHECK_LIB([daos], [daos_init],, [unset DAOS]) + AC_CHECK_LIB([dfs], [dfs_mkdir],, [unset DAOS]) +]) + +AM_CONDITIONAL([USE_DAOS_AIORI], [test x$DAOS = xyes]) +AM_COND_IF([USE_DAOS_AIORI],[ + AC_DEFINE([USE_DAOS_AIORI], [], [Build DAOS backends AIORI]) +]) + +# Gfarm support +AC_MSG_CHECKING([for Gfarm file system]) +AC_ARG_WITH([gfarm], + [AS_HELP_STRING([--with-gfarm=GFARM_ROOT], + [support IO with Gfarm backend @<:@default=no@:>@])], + [], [with_gfarm=no]) +AC_MSG_RESULT([$with_gfarm]) +AM_CONDITIONAL([USE_GFARM_AIORI], [test x$with_gfarm != xno]) +if test x$with_gfarm != xno; then + AC_DEFINE([USE_GFARM_AIORI], [], [Build Gfarm backend AIORI]) + case x$with_gfarm in + xyes) ;; + *) + CPPFLAGS="$CPPFLAGS -I$with_gfarm/include" + LDFLAGS="$LDFLAGS -L$with_gfarm/lib" ;; + esac + AC_CHECK_LIB([gfarm], [gfarm_initialize],, [AC_MSG_ERROR([libgfarm not found])]) + AC_CHECK_MEMBERS([struct stat.st_mtim.tv_nsec]) +fi # aws4c is needed for the S3 backend (see --with-S3, below). # Version 0.5.2 of aws4c is available at https://github.com/jti-lanl/aws4c.git @@ -245,12 +338,6 @@ Consider --with-aws4c=, CPPFLAGS, LDFLAGS, etc]) ]) - - - - - - # Enable building "IOR", in all capitals AC_ARG_ENABLE([caps], [AS_HELP_STRING([--enable-caps], @@ -261,6 +348,7 @@ AM_CONDITIONAL([USE_CAPS], [test x$enable_caps = xyes]) AC_CONFIG_FILES([Makefile src/Makefile + src/test/Makefile contrib/Makefile doc/Makefile]) AC_OUTPUT diff --git a/doc/USER_GUIDE b/doc/USER_GUIDE index 7ea6e49a..3d6b4e4f 100755 --- a/doc/USER_GUIDE +++ b/doc/USER_GUIDE @@ -65,7 +65,6 @@ These options are to be used on the command line. E.g., 'IOR -a POSIX -b 4K'. -a S api -- API for I/O, e.g., POSIX -A N refNum -- user reference number to include in long summary -b N blockSize -- contiguous bytes to write per task (e.g.: 8, 4k, 2m, 1g) - -B useO_DIRECT -- uses O_DIRECT for POSIX, bypassing I/O buffers -c collective -- collective I/O -C reorderTasksConstant -- changes task ordering to n+1 ordering for readback -d N interTestDelay -- delay between reps in seconds @@ -118,6 +117,8 @@ NOTES: * S is a string, N is an integer number. * For transfer and block sizes, the case-insensitive K, M, and G suffices are recognized. I.e., '4k' or '4K' is accepted as 4096. +Various options are only valid for specific modules, you can see details when running $ ./ior -h +These options are typically prefixed with the module name, an example is: --posix.odirect ********************* * 4. OPTION DETAILS * @@ -170,6 +171,8 @@ GENERAL: NOTE: it does not delay before a check write or check read + * interIODelay - this time in us (microseconds) after each I/O simulates computing time. + * outlierThreshold - gives warning if any task is more than this number of seconds from the mean of all participating tasks. If so, the task is identified, its time (start, @@ -340,7 +343,7 @@ MPIIO-, HDF5-, AND NCMPI-ONLY: * collective - uses collective operations for access [0=FALSE] * showHints - show hint/value pairs attached to open file [0=FALSE] - NOTE: not available in NCMPI + NOTE: available in NCMPI only if PnetCDF is 1.2.0 or later LUSTRE-SPECIFIC: ================ @@ -548,6 +551,17 @@ HOW DOES IOR CALCULATE PERFORMANCE? operations (-g), the sum of the open, transfer, and close times may not equal the elapsed time from the first open to the last close. + After each iteration (-i) IOR reports performance for that iteration, and + those numbers include: + + - Bandwidth (described above) + + - IOPS: I/O rate (operations per second) achieved by all tasks given the total + time spent in reading and writing the data. + + - Latency: computed by taking the average latency of all I/O operations from a + single task. If ior is run with multiple tasks, then the latency reported is + the minimum that was computed between all tasks. HOW DO I ACCESS MULTIPLE FILE SYSTEMS IN IOR? diff --git a/doc/mdtest.1 b/doc/mdtest.1 index ba82d88a..3cfc082f 100644 --- a/doc/mdtest.1 +++ b/doc/mdtest.1 @@ -47,6 +47,9 @@ The first number of tasks on which the test will run .I "-F" Perform test on files only (no directories). .TP +.I "-g" +Use global default directory layout for test subdirectories (deletes inherited striping layout - Lustre only). +.TP .I "-h" Display help message. .TP diff --git a/doc/sphinx/README.md b/doc/sphinx/README.md new file mode 100644 index 00000000..2ec8323a --- /dev/null +++ b/doc/sphinx/README.md @@ -0,0 +1,5 @@ +To build the documentation in this directory, + + sphinx-build . _build_html + +The output will be saved as html in `_build_html/`. diff --git a/doc/sphinx/changes.rst b/doc/sphinx/changes.rst index 3632a7d8..c11d45e1 100644 --- a/doc/sphinx/changes.rst +++ b/doc/sphinx/changes.rst @@ -1,4 +1,4 @@ Changes in IOR -***************** +************** -.. .. include:: ../../ChangeLog +.. include:: ../../NEWS diff --git a/doc/sphinx/conf.py b/doc/sphinx/conf.py index 05e58074..688303ec 100644 --- a/doc/sphinx/conf.py +++ b/doc/sphinx/conf.py @@ -64,18 +64,30 @@ master_doc = 'index' # General information about the project. -project = u'IOR' -copyright = u'2017, IOR' -author = u'IOR' +project = u'ior' +copyright = u'2018, The Regents of the University of California' +author = u'The IOR Developer Community' + # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. -version = u'3.1.0' +version = u'3.2.0' # The full version, including alpha/beta/rc tags. -release = u'0' +release = u'3.2.0' + +for line in open(os.path.join('..', '..', 'META'), 'r'): + key, value = line.split(None, 1) + if key == "Package:": + project = value + elif key == "Version:": + version = value + release = value +# We don't use the release field +# elif key == "Release:": +# release = value # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -185,7 +197,7 @@ # dir menu entry, description, category) texinfo_documents = [ (master_doc, 'IOR', u'IOR Documentation', - author, 'IOR', 'One line description of project.', + author, 'IOR', 'IOR/mdtest Parallel I/O Benchmarks', 'Miscellaneous'), ] diff --git a/doc/sphinx/devDoc/CI.rst b/doc/sphinx/devDoc/CI.rst index 39cbf9e8..43f1b5c3 100644 --- a/doc/sphinx/devDoc/CI.rst +++ b/doc/sphinx/devDoc/CI.rst @@ -1,8 +1,8 @@ -Continues Integration -===================== +Continuous Integration +====================== -Continues Integration is used for basic sanity checking. Travis-CI provides free -CI for open source github projects and is configured via a .travis.yml. +Continuous Integration is used for basic sanity checking. Travis-CI provides free +CI for open source github projects and is configured via a ``.travis.yml``. For now this is set up to compile IOR on a ubuntu 14.04 machine with gcc 4.8, openmpi and hdf5 for the backends. This is a pretty basic check and should be diff --git a/doc/sphinx/devDoc/release.rst b/doc/sphinx/devDoc/release.rst new file mode 100644 index 00000000..1e39a35e --- /dev/null +++ b/doc/sphinx/devDoc/release.rst @@ -0,0 +1,38 @@ +Release Process +=============== + +To build a new version of IOR:: + + $ docker run -it ubuntu bash + $ apt-get update + $ apt-get install -y git automake autoconf make gcc mpich + $ git clone -b rc https://github.com/hpc/ior + $ cd ior + $ ./travis-build.sh + +To create a new release candidate from RC, + +1. Disable the ``check-news`` option in ``AM_INIT_AUTOMAKE`` inside configure.ac +2. Append "rcX" to the ``Version:`` field in META where X is the release + candidate number +3. Build a release package as described above + +To create a new minor release of IOR, + +1. Build the rc branch as described above +2. Create a release on GitHub which creates the appropriate tag +3. Upload the source distributions generated by travis-build.sh + +To create a micro branch of IOR (e.g., if a release needs a hotfix), + +1. Check out the relevant release tagged in the rc branch (e.g., ``3.2.0``) +2. Create a branch with the major.minor name (e.g., ``3.2``) from that tag +3. Update the ``Version:`` in META +4. Apply hotfix(es) to that major.minor branch +5. Create the major.minor.micro release on GitHub + +To initiate a feature freeze, + +1. Merge the master branch into the rc branch +2. Update the ``Version:`` field in META `of the master branch` to be the `next` + release version, not the one whose features have just been frozen diff --git a/doc/sphinx/index.rst b/doc/sphinx/index.rst index de693ac5..fe3abb78 100644 --- a/doc/sphinx/index.rst +++ b/doc/sphinx/index.rst @@ -13,7 +13,7 @@ userDoc/install First Steps userDoc/options - userDoc/skripts + userDoc/scripts userDoc/compatibility FAQ @@ -24,6 +24,7 @@ devDoc/doxygen devDoc/CI + devDoc/release .. toctree:: :hidden: diff --git a/doc/sphinx/intro.rst b/doc/sphinx/intro.rst index 3964bbc3..11cf138f 100644 --- a/doc/sphinx/intro.rst +++ b/doc/sphinx/intro.rst @@ -1,26 +1,22 @@ Introduction ============ -Welcome to the IOR documentation. +IOR is a parallel IO benchmark that can be used to test the performance of +parallel storage systems using various interfaces and access patterns. The +IOR repository also includes the mdtest benchmark which specifically tests +the peak metadata rates of storage systems under different directory +structures. Both benchmarks use a common parallel I/O abstraction backend +and rely on MPI for synchronization. -**I**\ nterleaved **o**\ r **R**\ andom is a parallel IO benchmark. -IOR can be used for testing performance of parallel file systems using various -interfaces and access patterns. IOR uses MPI for process synchronization. -This documentation provides information for versions 3 and higher, for other -versions check :ref:`compatibility` +This documentation consists of two parts. -This documentation consists of tow parts. +**User documentation** includes installation instructions (:ref:`install`), a +beginner's tutorial (:ref:`first-steps`), and information about IOR's +runtime :ref:`options`. -The first part is a user documentation were you find instructions on compilation, a -beginners tutorial (:ref:`first-steps`) as well as information about all -available :ref:`options`. +**Developer documentation** consists of code documentation generated with +Doxygen and some notes about the contiguous integration with Travis. -The second part is the developer documentation. It currently only consists of a -auto generated Doxygen and some notes about the contiguous integration with travis. -As there are quite some people how needs to modify or extend IOR to there needs -it would be great to have documentation on what and how to alter IOR without -breaking other stuff. Currently there is neither a documentation on the overall -concept of the code nor on implementation details. If you are getting your -hands dirty in code anyways or have deeper understanding of IOR, you are more -then welcome to comment the code directly, which will result in better Doxygen -output or add your insight to this sphinx documentation. +Many aspects of both IOR/mdtest user and developer documentation are incomplete, +and contributors are encouraged to comment the code directly or expand upon this +documentation. diff --git a/doc/sphinx/userDoc/compatibility.rst b/doc/sphinx/userDoc/compatibility.rst index 6b87d680..0c925e2a 100644 --- a/doc/sphinx/userDoc/compatibility.rst +++ b/doc/sphinx/userDoc/compatibility.rst @@ -1,10 +1,11 @@ .. _compatibility: -Compatibility -============= +Version Compatibility +===================== -IOR has a long history. Here are some hints about compatibility with older -versions. +IOR has a long history and only IOR version 3 is currently supported. However, +there are many forks of IOR based on earlier versions, and the following +incompatibilities are known to exist between major versions. 1) IOR version 1 (c. 1996-2002) and IOR version 2 (c. 2003-present) are incompatible. Input decks from one will not work on the other. As version diff --git a/doc/sphinx/userDoc/install.rst b/doc/sphinx/userDoc/install.rst index 48948ac0..4bfa684b 100644 --- a/doc/sphinx/userDoc/install.rst +++ b/doc/sphinx/userDoc/install.rst @@ -1,3 +1,5 @@ +.. _install: + Install ======= diff --git a/doc/sphinx/userDoc/options.rst b/doc/sphinx/userDoc/options.rst index 8b441269..31240f0e 100644 --- a/doc/sphinx/userDoc/options.rst +++ b/doc/sphinx/userDoc/options.rst @@ -16,11 +16,12 @@ normal parameters override each other, so the last one executed. Command line options -------------------- -These options are to be used on the command line. E.g., 'IOR -a POSIX -b 4K'. + +These options are to be used on the command line (e.g., ``./ior -a POSIX -b 4K``). + -a S api -- API for I/O [POSIX|MPIIO|HDF5|HDFS|S3|S3_EMC|NCMPI|RADOS] -A N refNum -- user reference number to include in long summary -b N blockSize -- contiguous bytes to write per task (e.g.: 8, 4k, 2m, 1g) - -B useO_DIRECT -- uses O_DIRECT for POSIX, bypassing I/O buffers -c collective -- collective I/O -C reorderTasksConstant -- changes task ordering to n+1 ordering for readback -d N interTestDelay -- delay between reps in seconds @@ -69,283 +70,267 @@ These options are to be used on the command line. E.g., 'IOR -a POSIX -b 4K'. -Z reorderTasksRandom -- changes task ordering to random ordering for readback -NOTES: * S is a string, N is an integer number. - * For transfer and block sizes, the case-insensitive K, M, and G - suffices are recognized. I.e., '4k' or '4K' is accepted as 4096. +* S is a string, N is an integer number. + +* For transfer and block sizes, the case-insensitive K, M, and G + suffices are recognized. I.e., '4k' or '4K' is accepted as 4096. + +Various options are only valid for specific modules, you can see details when running $ ./ior -h +These options are typically prefixed with the module name, an example is: --posix.odirect Directive Options ------------------ -For each of the general settings, note the default is shown in brackets. -IMPORTANT NOTE: For all true/false options below [1]=true, [0]=false -IMPORTANT NOTE: Contrary to appearance, the script options below are NOT case sensitive +For all true/false options below, [1]=true, [0]=false. All options are case-insensitive. -GENERAL: +GENERAL ^^^^^^^^^^^^^^ - * refNum - user supplied reference number, included in - long summary [0] - * api - must be set to one of POSIX, MPIIO, HDF5, HDFS, S3, - S3_EMC, or NCMPI, depending on test [POSIX] + * ``refNum`` - user supplied reference number, included in long summary + (default: 0) + + * ``api`` - must be set to one of POSIX, MPIIO, HDF5, HDFS, S3, S3_EMC, NCMPI, + IME, MMAP, or RAODS depending on test (default: ``POSIX``) + + * ``testFile`` - name of the output file [testFile]. With ``filePerProc`` set, + the tasks can round robin across multiple file names via ``-o S@S@S``. + If only a single file name is specified in this case, IOR appends the MPI + rank to the end of each file generated (e.g., ``testFile.00000059``) + (default: ``testFile``) + + * ``hintsFileName`` - name of the hints file (default: none) + + * ``repetitions`` - number of times to run each test (default: 1) + + * ``multiFile`` - creates multiple files for single-shared-file or + file-per-process modes for each iteration (default: 0) + + * ``reorderTasksConstant`` - reorders tasks by a constant node offset for + writing/reading neighbor's data from different nodes (default: 0) + + * ``taskPerNodeOffset`` - for read tests. Use with ``-C`` and ``-Z`` options. + With ``reorderTasks``, constant N. With ``reordertasksrandom``, >= N + (default: 1) + + * ``reorderTasksRandom`` - reorders tasks to random ordering for read tests + (default: 0) + + * ``reorderTasksRandomSeed`` - random seed for ``reordertasksrandom`` option. (default: 0) + * When > 0, use the same seed for all iterations + * When < 0, different seed for each iteration + + * ``quitOnError`` - upon error encountered on ``checkWrite`` or ``checkRead``, + display current error and then stop execution. Otherwise, count errors and + continue (default: 0) + + * ``numTasks`` - number of tasks that should participate in the test. 0 + denotes all tasks. (default: 0) + + * ``interTestDelay`` - time (in seconds) to delay before beginning a write or + read phase in a series of tests This does not delay before check-write or + check-read phases. (default: 0) + + * ``outlierThreshold`` - gives warning if any task is more than this number of + seconds from the mean of all participating tasks. The warning includes the + offending task, its timers (start, elapsed create, elapsed transfer, elapsed + close, end), and the mean and standard deviation for all tasks. When zero, + disable this feature. (default: 0) + + * ``intraTestBarriers`` - use barrier between open, write/read, and close + phases (default: 0) + + * ``uniqueDir`` - create and use unique directory for each file-per-process + (default: 0) + + * ``writeFile`` - write file(s), first deleting any existing file. + The defaults for ``writeFile`` and ``readFile`` are set such that if there + is not at least one of ``-w``, ``-r``, ``-W``, or ``-R``, ``-w`` and ``-r`` + are enabled. If either ``writeFile`` or ``readFile`` are explicitly + enabled, though, its complement is *not* also implicitly enabled. - * testFile - name of the output file [testFile] - NOTE: with filePerProc set, the tasks can round - robin across multiple file names '-o S@S@S' + * ``readFile`` - reads existing file(s) as specified by the ``testFile`` + option. The defaults for ``writeFile`` and ``readFile`` are set such that + if there is not at least one of ``-w``, ``-r``, ``-W``, or ``-R``, ``-w`` + and ``-r`` are enabled. If either ``writeFile`` or ``readFile`` are + explicitly enabled, though, its complement is *not* also implicitly enabled. - * hintsFileName - name of the hints file [] + * ``filePerProc`` - have each MPI process perform I/O to a unique file + (default: 0) - * repetitions - number of times to run each test [1] + * ``checkWrite`` - read data back and check for errors against known pattern. + Can be used independently of ``writeFile``. Data checking is not timed and + does not affect other performance timings. All errors detected are tallied + and returned as the program exit code unless ``quitOnError`` is set. + (default: 0) - * multiFile - creates multiple files for single-shared-file or - file-per-process modes; i.e. each iteration creates - a new file [0=FALSE] - - * reorderTasksConstant - reorders tasks by a constant node offset for writing/reading neighbor's - data from different nodes [0=FALSE] + * ``checkRead`` - re-read data and check for errors between reads. Can be + used independently of ``readFile``. Data checking is not timed and does not + affect other performance timings. All errors detected are tallied and + returned as the program exit code unless ``quitOnError`` is set. + (default: 0) - * taskPerNodeOffset - for read tests. Use with -C & -Z options. [1] - With reorderTasks, constant N. With reordertasksrandom, >= N - - * reorderTasksRandom - reorders tasks to random ordering for readback [0=FALSE] + * ``keepFile`` - do not remove test file(s) on program exit (default: 0) - * reorderTasksRandomSeed - random seed for reordertasksrandom option. [0] - >0, same seed for all iterations. <0, different seed for each iteration - - * quitOnError - upon error encountered on checkWrite or checkRead, - display current error and then stop execution; - if not set, count errors and continue [0=FALSE] - - * numTasks - number of tasks that should participate in the test - [0] - NOTE: 0 denotes all tasks - - * interTestDelay - this is the time in seconds to delay before - beginning a write or read in a series of tests [0] - NOTE: it does not delay before a check write or - check read - - * outlierThreshold - gives warning if any task is more than this number - of seconds from the mean of all participating tasks. - If so, the task is identified, its time (start, - elapsed create, elapsed transfer, elapsed close, or - end) is reported, as is the mean and standard - deviation for all tasks. The default for this is 0, - which turns it off. If set to a positive value, for - example 3, any task not within 3 seconds of the mean - displays its times. [0] - - * intraTestBarriers - use barrier between open, write/read, and close [0=FALSE] - - * uniqueDir - create and use unique directory for each - file-per-process [0=FALSE] - - * writeFile - writes file(s), first deleting any existing file [1=TRUE] - NOTE: the defaults for writeFile and readFile are - set such that if there is not at least one of - the following -w, -r, -W, or -R, it is assumed - that -w and -r are expected and are - consequently used -- this is only true with - the command line, and may be overridden in - a script - - * readFile - reads existing file(s) (from current or previous - run) [1=TRUE] - NOTE: see writeFile notes - - * filePerProc - accesses a single file for each processor; default - is a single file accessed by all processors [0=FALSE] - - * checkWrite - read data back and check for errors against known - pattern; can be used independently of writeFile [0=FALSE] - NOTES: - data checking is not timed and does not - affect other performance timings - - all errors tallied and returned as program - exit code, unless quitOnError set - - * checkRead - reread data and check for errors between reads; can - be used independently of readFile [0=FALSE] - NOTE: see checkWrite notes - - * keepFile - stops removal of test file(s) on program exit [0=FALSE] - - * keepFileWithError - ensures that with any error found in data-checking, - the error-filled file(s) will not be deleted [0=FALSE] - - * useExistingTestFile - do not remove test file before write access [0=FALSE] - - * segmentCount - number of segments in file [1] - NOTES: - a segment is a contiguous chunk of data - accessed by multiple clients each writing/ - reading their own contiguous data; - comprised of blocks accessed by multiple - clients - - with HDF5 this repeats the pattern of an - entire shared dataset - - * blockSize - size (in bytes) of a contiguous chunk of data - accessed by a single client; it is comprised of one - or more transfers [1048576] - - * transferSize - size (in bytes) of a single data buffer to be - transferred in a single I/O call [262144] - - * verbose - output information [0] - NOTE: this can be set to levels 0-5 on the command - line; repeating the -v flag will increase - verbosity level - - * setTimeStampSignature - set value for time stamp signature [0] - NOTE: used to rerun tests with the exact data - pattern by setting data signature to contain - positive integer value as timestamp to be - written in data file; if set to 0, is - disabled - - * showHelp - display options and help [0=FALSE] - - * storeFileOffset - use file offset as stored signature when writing - file [0=FALSE] - NOTE: this will affect performance measurements - - * memoryPerNode - Allocate memory on each node to simulate real - application memory usage. Accepts a percentage of - node memory (e.g. "50%") on machines that support - sysconf(_SC_PHYS_PAGES) or a size. Allocation will - be split between tasks that share the node. - - * memoryPerTask - Allocate secified amount of memory per task to - simulate real application memory usage. - - * maxTimeDuration - max time in minutes to run tests [0] - NOTES: * setting this to zero (0) unsets this option - * this option allows the current read/write - to complete without interruption - - * deadlineForStonewalling - seconds before stopping write or read phase [0] - NOTES: - used for measuring the amount of data moved - in a fixed time. After the barrier, each - task starts its own timer, begins moving - data, and the stops moving data at a pre- - arranged time. Instead of measuring the - amount of time to move a fixed amount of - data, this option measures the amount of - data moved in a fixed amount of time. The - objective is to prevent tasks slow to - complete from skewing the performance. - - setting this to zero (0) unsets this option - - this option is incompatible w/data checking - - * randomOffset - access is to random, not sequential, offsets within a file [0=FALSE] - NOTES: - this option is currently incompatible with: - -checkRead - -storeFileOffset - -MPIIO collective or useFileView - -HDF5 or NCMPI - * summaryAlways - Always print the long summary for each test. - Useful for long runs that may be interrupted, preventing - the final long summary for ALL tests to be printed. + * ``keepFileWithError`` - do not delete any files containing errors if + detected during read-check or write-check phases. (default: 0) + * ``useExistingTestFile`` - do not remove test file(s) before write phase + (default: 0) + + * ``segmentCount`` - number of segments in file, where a segment is a + contiguous chunk of data accessed by multiple clients each writing/reading + their own contiguous data (blocks). The exact semantics of segments + depend on the API used; for example, HDF5 repeats the pattern of an entire + shared dataset. (default: 1) + + * ``blockSize`` - size (in bytes) of a contiguous chunk of data accessed by a + single client. It is comprised of one or more transfers (default: 1048576) + + * ``transferSize`` - size (in bytes) of a single data buffer to be transferred + in a single I/O call (default: 262144) + + * ``verbose`` - output more information about what IOR is doing. Can be set + to levels 0-5; repeating the -v flag will increase verbosity level. + (default: 0) + + * ``setTimeStampSignature`` - Value to use for the time stamp signature. Used + to rerun tests with the exact data pattern by setting data signature to + contain positive integer value as timestamp to be written in data file; if + set to 0, is disabled (default: 0) + + * ``showHelp`` - display options and help (default: 0) + + * ``storeFileOffset`` - use file offset as stored signature when writing file. + This will affect performance measurements (default: 0) + + * ``memoryPerNode`` - allocate memory on each node to simulate real + application memory usage or restrict page cache size. Accepts a percentage + of node memory (e.g. ``50%``) on systems that support + ``sysconf(_SC_PHYS_PAGES)`` or a size. Allocation will be split between + tasks that share the node. (default: 0) + + * ``memoryPerTask`` - allocate specified amount of memory (in bytes) per task + to simulate real application memory usage. (default: 0) + + * ``maxTimeDuration`` - max time (in minutes) to run all tests. Any current + read/write phase is not interrupted; only future I/O phases are cancelled + once this time is exceeded. Value of zero unsets disables. (default: 0) + + * ``deadlineForStonewalling`` - seconds before stopping write or read phase. + Used for measuring the amount of data moved in a fixed time. After the + barrier, each task starts its own timer, begins moving data, and the stops + moving data at a pre-arranged time. Instead of measuring the amount of time + to move a fixed amount of data, this option measures the amount of data + moved in a fixed amount of time. The objective is to prevent straggling + tasks slow from skewing the performance. This option is incompatible with + read-check and write-check modes. Value of zero unsets this option. + (default: 0) + + * ``randomOffset`` - randomize access offsets within test file(s). Currently + incompatible with ``checkRead``, ``storeFileOffset``, MPIIO ``collective`` + and ``useFileView``, and HDF5 and NCMPI APIs. (default: 0) + + * ``summaryAlways`` - Always print the long summary for each test even if the job is interrupted. (default: 0) POSIX-ONLY ^^^^^^^^^^ - * useO_DIRECT - use O_DIRECT for POSIX, bypassing I/O buffers [0] - * singleXferAttempt - will not continue to retry transfer entire buffer - until it is transferred [0=FALSE] - NOTE: when performing a write() or read() in POSIX, - there is no guarantee that the entire - requested size of the buffer will be - transferred; this flag keeps the retrying a - single transfer until it completes or returns - an error + * ``useO_DIRECT`` - use direct I/ for POSIX, bypassing I/O buffers (default: 0) + + * ``singleXferAttempt`` - do not continue to retry transfer entire buffer + until it is transferred. When performing a write() or read() in POSIX, + there is no guarantee that the entire requested size of the buffer will be + transferred; this flag keeps the retrying a single transfer until it + completes or returns an error (default: 0) + + * ``fsyncPerWrite`` - perform fsync after each POSIX write (default: 0) - * fsyncPerWrite - perform fsync after each POSIX write [0=FALSE] - * fsync - perform fsync after POSIX write close [0=FALSE] + * ``fsync`` - perform fsync after POSIX file close (default: 0) MPIIO-ONLY ^^^^^^^^^^ - * preallocate - preallocate the entire file before writing [0=FALSE] - * useFileView - use an MPI datatype for setting the file view option - to use individual file pointer [0=FALSE] - NOTE: default IOR uses explicit file pointers + * ``preallocate`` - preallocate the entire file before writing (default: 0) - * useSharedFilePointer - use a shared file pointer [0=FALSE] (not working) - NOTE: default IOR uses explicit file pointers + * ``useFileView`` - use an MPI datatype for setting the file view option to + use individual file pointer. Default IOR uses explicit file pointers. + (default: 0) - * useStridedDatatype - create a datatype (max=2GB) for strided access; akin - to MULTIBLOCK_REGION_SIZE [0] (not working) + * ``useSharedFilePointer`` - use a shared file pointer. Default IOR uses + explicit file pointers. (default: 0) + + * ``useStridedDatatype`` - create a datatype (max=2GB) for strided access; + akin to ``MULTIBLOCK_REGION_SIZE`` (default: 0) HDF5-ONLY ^^^^^^^^^ - * individualDataSets - within a single file each task will access its own - dataset [0=FALSE] (not working) - NOTE: default IOR creates a dataset the size of - numTasks * blockSize to be accessed by all - tasks - * noFill - no pre-filling of data in HDF5 file creation [0=FALSE] + * ``individualDataSets`` - within a single file, each task will access its own + dataset. Default IOR creates a dataset the size of ``numTasks * blockSize`` + to be accessed by all tasks (default: 0) + + * ``noFill`` - do not pre-fill data in HDF5 file creation (default: 0) - * setAlignment - HDF5 alignment in bytes (e.g.: 8, 4k, 2m, 1g) [1] + * ``setAlignment`` - set the HDF5 alignment in bytes (e.g.: 8, 4k, 2m, 1g) (default: 1) - * collectiveMetadata - enable HDF5 collective metadata (available since - HDF5-1.10.0) + * hdf5.collectiveMetadata - enable HDF5 collective metadata (available since HDF5-1.10.0) MPIIO-, HDF5-, AND NCMPI-ONLY ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - * collective - uses collective operations for access [0=FALSE] - * showHints - show hint/value pairs attached to open file [0=FALSE] - NOTE: not available in NCMPI + * ``collective`` - uses collective operations for access (default: 0) + + * ``showHints`` - show hint/value pairs attached to open file. Not available + for NCMPI. (default: 0) LUSTRE-SPECIFIC ^^^^^^^^^^^^^^^^^ - * lustreStripeCount - set the lustre stripe count for the test file(s) [0] - * lustreStripeSize - set the lustre stripe size for the test file(s) [0] + * ``lustreStripeCount`` - set the Lustre stripe count for the test file(s) (default: 0) + + * ``lustreStripeSize`` - set the Lustre stripe size for the test file(s) (default: 0) - * lustreStartOST - set the starting OST for the test file(s) [-1] + * ``lustreStartOST`` - set the starting OST for the test file(s) (default: -1) - * lustreIgnoreLocks - disable lustre range locking [0] + * ``lustreIgnoreLocks`` - disable Lustre range locking (default: 0) GPFS-SPECIFIC ^^^^^^^^^^^^^^ - * gpfsHintAccess - use gpfs_fcntl hints to pre-declare accesses - - * gpfsReleaseToken - immediately after opening or creating file, release - all locks. Might help mitigate lock-revocation - traffic when many proceses write/read to same file. + * ``gpfsHintAccess`` - use ``gpfs_fcntl`` hints to pre-declare accesses (default: 0) + * ``gpfsReleaseToken`` - release all locks immediately after opening or + creating file. Might help mitigate lock-revocation traffic when many + proceses write/read to same file. (default: 0) Verbosity levels ---------------------- -The verbosity of output for IOR can be set with -v. Increasing the number of --v instances on a command line sets the verbosity higher. +---------------- + +The verbosity of output for IOR can be set with ``-v``. Increasing the number +of ``-v`` instances on a command line sets the verbosity higher. Here is an overview of the information shown for different verbosity levels: -0) default; only bare essentials shown -1) max clock deviation, participating tasks, free space, access pattern, - commence/verify access notification w/time -2) rank/hostname, machine name, timer used, individual repetition - performance results, timestamp used for data signature -3) full test details, transfer block/offset compared, individual data - checking errors, environment variables, task writing/reading file name, - all test operation times -4) task id and offset for each transfer -5) each 8-byte data signature comparison (WARNING: more data to STDOUT - than stored in file, use carefully) +====== =================================== +Level Behavior +====== =================================== + 0 default; only bare essentials shown + 1 max clock deviation, participating tasks, free space, access pattern, commence/verify access notification with time + 2 rank/hostname, machine name, timer used, individual repetition performance results, timestamp used for data signature + 3 full test details, transfer block/offset compared, individual data checking errors, environment variables, task writing/reading file name, all test operation times + 4 task id and offset for each transfer + 5 each 8-byte data signature comparison (WARNING: more data to STDOUT than stored in file, use carefully) +====== =================================== Incompressible notes -------------------------- +-------------------- Please note that incompressibility is a factor of how large a block compression -algorithm uses. The incompressible buffer is filled only once before write times, -so if the compression algorithm takes in blocks larger than the transfer size, -there will be compression. Below are some baselines that I established for -zip, gzip, and bzip. +algorithm uses. The incompressible buffer is filled only once before write +times, so if the compression algorithm takes in blocks larger than the transfer +size, there will be compression. Below are some baselines for zip, gzip, and +bzip. 1) zip: For zipped files, a transfer size of 1k is sufficient. @@ -355,5 +340,5 @@ zip, gzip, and bzip. To avoid compression a transfer size of greater than the bzip block size is required (default = 900KB). I suggest a transfer size of greather than 1MB to avoid bzip2 compression. -Be aware of the block size your compression algorithm will look at, and adjust the transfer size -accordingly. +Be aware of the block size your compression algorithm will look at, and adjust +the transfer size accordingly. diff --git a/doc/sphinx/userDoc/scripts.rst b/doc/sphinx/userDoc/scripts.rst new file mode 100644 index 00000000..14e00e05 --- /dev/null +++ b/doc/sphinx/userDoc/scripts.rst @@ -0,0 +1,82 @@ +Scripting +========= + +IOR can use an input script with the command line using the ``-f`` option. +**Any options on the command line set before the '-f' option is given will be +considered the default settings for running the script.** For example, :: + + mpirun ./ior -W -f script + +will run all tests in the script with an implicit ``-W``. The script itself can +override these settings and may be set to run many different tests of IOR under +a single execution, and it is important to note that **any command-line options +specified after ``-f`` will not be applied to the runs dictated by the script.** +For example, :: + + mpirun ./ior -f script -W + +will *not* run any tests with the implicit ``-W`` since that argument does not +get applied until after the ``-f`` option (and its constituent runs) are complete. + +Input scripts are specified using the long-form option names that correspond to +each command-line option. In addition to long-form options, + + * ``IOR START`` and ``IOR END`` mark the beginning and end of the script + * ``RUN`` dispatches the test using all of the options specified before it + * All previous set parameter stay set for the next test. They are not reset + to the default! For default the must be rest manually. + * White space is ignored in script, as are comments starting with ``#``. + * Not all test parameters need be set. + +An example of a script: :: + + IOR START + api=[POSIX|MPIIO|HDF5|HDFS|S3|S3_EMC|NCMPI|RADOS] + testFile=testFile + hintsFileName=hintsFile + repetitions=8 + multiFile=0 + interTestDelay=5 + readFile=1 + writeFile=1 + filePerProc=0 + checkWrite=0 + checkRead=0 + keepFile=1 + quitOnError=0 + segmentCount=1 + blockSize=32k + outlierThreshold=0 + setAlignment=1 + transferSize=32 + singleXferAttempt=0 + individualDataSets=0 + verbose=0 + numTasks=32 + collective=1 + preallocate=0 + useFileView=0 + keepFileWithError=0 + setTimeStampSignature=0 + useSharedFilePointer=0 + useStridedDatatype=0 + uniqueDir=0 + fsync=0 + storeFileOffset=0 + maxTimeDuration=60 + deadlineForStonewalling=0 + useExistingTestFile=0 + useO_DIRECT=0 + showHints=0 + showHelp=0 + RUN + # additional tests are optional + transferSize=64 + blockSize=64k + segmentcount=2 + RUN + transferSize=4K + blockSize=1M + segmentcount=1024 + RUN + IOR STOP diff --git a/doc/sphinx/userDoc/skripts.rst b/doc/sphinx/userDoc/skripts.rst deleted file mode 100644 index 964f1acc..00000000 --- a/doc/sphinx/userDoc/skripts.rst +++ /dev/null @@ -1,72 +0,0 @@ -Scripting -========= - -IOR can use a script with the command line. Any options on the command line set -before the script will be considered the default settings for running the script. -(I.e.,'$ ./IOR -W -f script' will have all tests in the script run with the -W -option as default.) -The script itself can override these settings and may be set to run -run many different tests of IOR under a single execution. -The command line is: :: - - ./IOR -f script - -In IOR/scripts, there are scripts of test cases for simulating I/O behavior of -various application codes. Details are included in each script as necessary. - -Syntax: - * IOR START / IOR END: marks the beginning and end of the script - * RUN: Delimiter for next Test - * All previous set parameter stay set for the next test. They are not reset - to the default! For default the musst be rest manually. - * White space is ignored in script, as are comments starting with '#'. - * Not all test parameters need be set. - -An example of a script: :: - - IOR START - api=[POSIX|MPIIO|HDF5|HDFS|S3|S3_EMC|NCMPI|RADOS] - testFile=testFile - hintsFileName=hintsFile - repetitions=8 - multiFile=0 - interTestDelay=5 - readFile=1 - writeFile=1 - filePerProc=0 - checkWrite=0 - checkRead=0 - keepFile=1 - quitOnError=0 - segmentCount=1 - blockSize=32k - outlierThreshold=0 - setAlignment=1 - transferSize=32 - singleXferAttempt=0 - individualDataSets=0 - verbose=0 - numTasks=32 - collective=1 - preallocate=0 - useFileView=0 - keepFileWithError=0 - setTimeStampSignature=0 - useSharedFilePointer=0 - useStridedDatatype=0 - uniqueDir=0 - fsync=0 - storeFileOffset=0 - maxTimeDuration=60 - deadlineForStonewalling=0 - useExistingTestFile=0 - useO_DIRECT=0 - showHints=0 - showHelp=0 - RUN - # additional tests are optional - - RUN - - RUN - IOR STOP diff --git a/doc/sphinx/userDoc/tutorial.rst b/doc/sphinx/userDoc/tutorial.rst index 9556cebe..5fa68141 100644 --- a/doc/sphinx/userDoc/tutorial.rst +++ b/doc/sphinx/userDoc/tutorial.rst @@ -181,6 +181,7 @@ again, using this option changes our performance measurement quite a bit:: and we finally have a believable bandwidth measurement for our file system. Defeating Page Cache +-------------------- Since IOR is specifically designed to benchmark I/O, it provides these options that make it as easy as possible to ensure that you are actually measuring the performance of your file system and not your compute nodes' memory. That being diff --git a/src/Makefile.am b/src/Makefile.am index 51fb8734..3786560f 100755 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,4 +1,4 @@ -SUBDIRS = . +SUBDIRS = . test bin_PROGRAMS = ior mdtest if USE_CAPS @@ -70,6 +70,21 @@ extraSOURCES += aiori-RADOS.c extraLDADD += -lrados endif +if USE_CEPHFS_AIORI +extraSOURCES += aiori-CEPHFS.c +extraLDADD += -lcephfs +endif + + +if USE_DAOS_AIORI +extraSOURCES += aiori-DAOS.c aiori-DFS.c +endif + +if USE_GFARM_AIORI +extraSOURCES += aiori-Gfarm.c +extraLDADD += -lgfarm +endif + if USE_S3_AIORI extraSOURCES += aiori-S3.c if AWS4C_DIR @@ -82,6 +97,10 @@ extraLDADD += -laws4c extraLDADD += -laws4c_extra endif +if WITH_LUSTRE +extraLDADD += -llustreapi +endif + ior_SOURCES += $(extraSOURCES) ior_LDFLAGS += $(extraLDFLAGS) ior_LDADD += $(extraLDADD) @@ -104,11 +123,3 @@ MDTEST_CPPFLAGS = $(mdtest_CPPFLAGS) libaiori_a_SOURCES += $(extraSOURCES) libaiori_a_CPPFLAGS = $(extraCPPFLAGS) - - -TESTS = testlib -bin_PROGRAMS += testlib - -testlib_SOURCES = ./test/lib.c -testlib_LDFLAGS = $(extraLDFLAGS) -testlib_LDADD = libaiori.a $(extraLDADD) diff --git a/src/aiori-CEPHFS.c b/src/aiori-CEPHFS.c new file mode 100755 index 00000000..27f12dbc --- /dev/null +++ b/src/aiori-CEPHFS.c @@ -0,0 +1,385 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ +/******************************************************************************\ +* * +* (C) 2015 The University of Chicago * +* (C) 2020 Red Hat, Inc. * +* * +* See COPYRIGHT in top-level directory. * +* * +******************************************************************************** +* +* Implement abstract I/O interface for CEPHFS. +* +\******************************************************************************/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include +#include + +#include "ior.h" +#include "iordef.h" +#include "aiori.h" +#include "utilities.h" + +#define CEPH_O_RDONLY 00000000 +#define CEPH_O_WRONLY 00000001 +#define CEPH_O_RDWR 00000002 +#define CEPH_O_CREAT 00000100 +#define CEPH_O_EXCL 00000200 +#define CEPH_O_TRUNC 00001000 +#define CEPH_O_LAZY 00020000 +#define CEPH_O_DIRECTORY 00200000 +#define CEPH_O_NOFOLLOW 00400000 + +/************************** O P T I O N S *****************************/ +struct cephfs_options{ + char * user; + char * conf; + char * prefix; +}; + +static struct cephfs_options o = { + .user = NULL, + .conf = NULL, + .prefix = NULL, +}; + +static option_help options [] = { + {0, "cephfs.user", "Username for the ceph cluster", OPTION_REQUIRED_ARGUMENT, 's', & o.user}, + {0, "cephfs.conf", "Config file for the ceph cluster", OPTION_REQUIRED_ARGUMENT, 's', & o.conf}, + {0, "cephfs.prefix", "mount prefix", OPTION_OPTIONAL_ARGUMENT, 's', & o.prefix}, + LAST_OPTION +}; + +static struct ceph_mount_info *cmount; + +/**************************** P R O T O T Y P E S *****************************/ +static void CEPHFS_Init(); +static void CEPHFS_Final(); +static void *CEPHFS_Create(char *, IOR_param_t *); +static void *CEPHFS_Open(char *, IOR_param_t *); +static IOR_offset_t CEPHFS_Xfer(int, void *, IOR_size_t *, + IOR_offset_t, IOR_param_t *); +static void CEPHFS_Close(void *, IOR_param_t *); +static void CEPHFS_Delete(char *, IOR_param_t *); +static void CEPHFS_Fsync(void *, IOR_param_t *); +static IOR_offset_t CEPHFS_GetFileSize(IOR_param_t *, MPI_Comm, char *); +static int CEPHFS_StatFS(const char *, ior_aiori_statfs_t *, IOR_param_t *); +static int CEPHFS_MkDir(const char *, mode_t, IOR_param_t *); +static int CEPHFS_RmDir(const char *, IOR_param_t *); +static int CEPHFS_Access(const char *, int, IOR_param_t *); +static int CEPHFS_Stat(const char *, struct stat *, IOR_param_t *); +static void CEPHFS_Sync(IOR_param_t *); +static option_help * CEPHFS_options(); + +/************************** D E C L A R A T I O N S ***************************/ +ior_aiori_t cephfs_aiori = { + .name = "CEPHFS", + .name_legacy = NULL, + .initialize = CEPHFS_Init, + .finalize = CEPHFS_Final, + .create = CEPHFS_Create, + .open = CEPHFS_Open, + .xfer = CEPHFS_Xfer, + .close = CEPHFS_Close, + .delete = CEPHFS_Delete, + .get_version = aiori_get_version, + .fsync = CEPHFS_Fsync, + .get_file_size = CEPHFS_GetFileSize, + .statfs = CEPHFS_StatFS, + .mkdir = CEPHFS_MkDir, + .rmdir = CEPHFS_RmDir, + .access = CEPHFS_Access, + .stat = CEPHFS_Stat, + .sync = CEPHFS_Sync, + .get_options = CEPHFS_options, +}; + +#define CEPHFS_ERR(__err_str, __ret) do { \ + errno = -__ret; \ + ERR(__err_str); \ +} while(0) + +/***************************** F U N C T I O N S ******************************/ +static const char* pfix(const char* path) { + const char* npath = path; + const char* prefix = o.prefix; + while (*prefix) { + if(*prefix++ != *npath++) { + return path; + } + } + return npath; +} + +static option_help * CEPHFS_options(){ + return options; +} + +static void CEPHFS_Init() +{ + /* Short circuit if the options haven't been filled yet. */ + if (!o.user || !o.conf || !o.prefix) { + WARN("CEPHFS_Init() called before options have been populated!"); + return; + } + + /* Short circuit if the mount handle already exists */ + if (cmount) { + return; + } + + int ret; + /* create CEPHFS mount handle */ + ret = ceph_create(&cmount, o.user); + if (ret) { + CEPHFS_ERR("unable to create CEPHFS mount handle", ret); + } + + /* set the handle using the Ceph config */ + ret = ceph_conf_read_file(cmount, o.conf); + if (ret) { + CEPHFS_ERR("unable to read ceph config file", ret); + } + + /* mount the handle */ + ret = ceph_mount(cmount, "/"); + if (ret) { + CEPHFS_ERR("unable to mount cephfs", ret); + ceph_shutdown(cmount); + + } + + Inode *root; + + /* try retrieving the root cephfs inode */ + ret = ceph_ll_lookup_root(cmount, &root); + if (ret) { + CEPHFS_ERR("uanble to retrieve root cephfs inode", ret); + ceph_shutdown(cmount); + + } + + return; +} + +static void CEPHFS_Final() +{ + /* shutdown */ + int ret = ceph_unmount(cmount); + if (ret < 0) { + CEPHFS_ERR("ceph_umount failed", ret); + } + ret = ceph_release(cmount); + if (ret < 0) { + CEPHFS_ERR("ceph_release failed", ret); + } + cmount = NULL; +} + +static void *CEPHFS_Create(char *testFileName, IOR_param_t * param) +{ + return CEPHFS_Open(testFileName, param); +} + +static void *CEPHFS_Open(char *testFileName, IOR_param_t * param) +{ + const char *file = pfix(testFileName); + int* fd; + fd = (int *)malloc(sizeof(int)); + + mode_t mode = 0664; + int flags = (int) 0; + + /* set IOR file flags to CephFS flags */ + /* -- file open flags -- */ + if (param->openFlags & IOR_RDONLY) { + flags |= CEPH_O_RDONLY; + } + if (param->openFlags & IOR_WRONLY) { + flags |= CEPH_O_WRONLY; + } + if (param->openFlags & IOR_RDWR) { + flags |= CEPH_O_RDWR; + } + if (param->openFlags & IOR_APPEND) { + fprintf(stdout, "File append not implemented in CephFS\n"); + } + if (param->openFlags & IOR_CREAT) { + flags |= CEPH_O_CREAT; + } + if (param->openFlags & IOR_EXCL) { + flags |= CEPH_O_EXCL; + } + if (param->openFlags & IOR_TRUNC) { + flags |= CEPH_O_TRUNC; + } + if (param->openFlags & IOR_DIRECT) { + fprintf(stdout, "O_DIRECT not implemented in CephFS\n"); + } + *fd = ceph_open(cmount, file, flags, mode); + if (*fd < 0) { + CEPHFS_ERR("ceph_open failed", *fd); + } + return (void *) fd; +} + +static IOR_offset_t CEPHFS_Xfer(int access, void *file, IOR_size_t * buffer, + IOR_offset_t length, IOR_param_t * param) +{ + uint64_t size = (uint64_t) length; + char *buf = (char *) buffer; + int fd = *(int *) file; + int ret; + + if (access == WRITE) + { + ret = ceph_write(cmount, fd, buf, size, param->offset); + if (ret < 0) { + CEPHFS_ERR("unable to write file to CephFS", ret); + } else if (ret < size) { + CEPHFS_ERR("short write to CephFS", ret); + } + if (param->fsyncPerWrite == TRUE) { + CEPHFS_Fsync(&fd, param); + } + } + else /* READ */ + { + ret = ceph_read(cmount, fd, buf, size, param->offset); + if (ret < 0) { + CEPHFS_ERR("unable to read file from CephFS", ret); + } else if (ret < size) { + CEPHFS_ERR("short read from CephFS", ret); + } + + } + return length; +} + +static void CEPHFS_Fsync(void *file, IOR_param_t * param) +{ + int fd = *(int *) file; + int ret = ceph_fsync(cmount, fd, 0); + if (ret < 0) { + CEPHFS_ERR("ceph_fsync failed", ret); + } +} + +static void CEPHFS_Close(void *file, IOR_param_t * param) +{ + int fd = *(int *) file; + int ret = ceph_close(cmount, fd); + if (ret < 0) { + CEPHFS_ERR("ceph_close failed", ret); + } + free(file); + return; +} + +static void CEPHFS_Delete(char *testFileName, IOR_param_t * param) +{ + int ret = ceph_unlink(cmount, pfix(testFileName)); + if (ret < 0) { + CEPHFS_ERR("ceph_unlink failed", ret); + } + return; +} + +static IOR_offset_t CEPHFS_GetFileSize(IOR_param_t * param, MPI_Comm testComm, + char *testFileName) +{ + struct stat stat_buf; + IOR_offset_t aggFileSizeFromStat, tmpMin, tmpMax, tmpSum; + + int ret = ceph_stat(cmount, pfix(testFileName), &stat_buf); + if (ret < 0) { + CEPHFS_ERR("ceph_stat failed", ret); + } + aggFileSizeFromStat = stat_buf.st_size; + + if (param->filePerProc == TRUE) { + MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpSum, 1, + MPI_LONG_LONG_INT, MPI_SUM, testComm), + "cannot total data moved"); + aggFileSizeFromStat = tmpSum; + } else { + MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpMin, 1, + MPI_LONG_LONG_INT, MPI_MIN, testComm), + "cannot total data moved"); + MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpMax, 1, + MPI_LONG_LONG_INT, MPI_MAX, testComm), + "cannot total data moved"); + if (tmpMin != tmpMax) { + if (rank == 0) { + WARN("inconsistent file size by different tasks"); + } + /* incorrect, but now consistent across tasks */ + aggFileSizeFromStat = tmpMin; + } + } + + return (aggFileSizeFromStat); + +} + +static int CEPHFS_StatFS(const char *path, ior_aiori_statfs_t *stat_buf, + IOR_param_t *param) +{ +#if defined(HAVE_STATVFS) + struct statvfs statfs_buf; + int ret = ceph_statfs(cmount, pfix(path), &statfs_buf); + if (ret < 0) { + CEPHFS_ERR("ceph_statfs failed", ret); + return -1; + } + + stat_buf->f_bsize = statfs_buf.f_bsize; + stat_buf->f_blocks = statfs_buf.f_blocks; + stat_buf->f_bfree = statfs_buf.f_bfree; + stat_buf->f_files = statfs_buf.f_files; + stat_buf->f_ffree = statfs_buf.f_ffree; + + return 0; +#else + WARN("ceph_statfs requires statvfs!"); + return -1; +#endif +} + +static int CEPHFS_MkDir(const char *path, mode_t mode, IOR_param_t *param) +{ + return ceph_mkdir(cmount, pfix(path), mode); +} + +static int CEPHFS_RmDir(const char *path, IOR_param_t *param) +{ + return ceph_rmdir(cmount, pfix(path)); +} + +static int CEPHFS_Access(const char *testFileName, int mode, IOR_param_t *param) +{ + struct stat buf; + return ceph_stat(cmount, pfix(testFileName), &buf); +} + +static int CEPHFS_Stat(const char *testFileName, struct stat *buf, IOR_param_t *param) +{ + return ceph_stat(cmount, pfix(testFileName), buf); +} + +static void CEPHFS_Sync(IOR_param_t *param) +{ + int ret = ceph_sync_fs(cmount); + if (ret < 0) { + CEPHFS_ERR("ceph_sync_fs failed", ret); + } + +} diff --git a/src/aiori-DAOS.c b/src/aiori-DAOS.c new file mode 100644 index 00000000..8fa15786 --- /dev/null +++ b/src/aiori-DAOS.c @@ -0,0 +1,542 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ +/* + * Copyright (C) 2018-2020 Intel Corporation + * See the file COPYRIGHT for a complete copyright notice and license. + */ + +/* + * This file implements the abstract I/O interface for DAOS Array API. + */ + +#define _BSD_SOURCE + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "ior.h" +#include "aiori.h" +#include "iordef.h" + +/************************** O P T I O N S *****************************/ +struct daos_options{ + char *pool; + char *svcl; + char *group; + char *cont; + int chunk_size; + int destroy; + char *oclass; +}; + +static struct daos_options o = { + .pool = NULL, + .svcl = NULL, + .group = NULL, + .cont = NULL, + .chunk_size = 1048576, + .destroy = 0, + .oclass = NULL, +}; + +static option_help options [] = { + {0, "daos.pool", "pool uuid", OPTION_OPTIONAL_ARGUMENT, 's', &o.pool}, + {0, "daos.svcl", "pool SVCL", OPTION_OPTIONAL_ARGUMENT, 's', &o.svcl}, + {0, "daos.group", "server group", OPTION_OPTIONAL_ARGUMENT, 's', &o.group}, + {0, "daos.cont", "container uuid", OPTION_OPTIONAL_ARGUMENT, 's', &o.cont}, + {0, "daos.chunk_size", "chunk size", OPTION_OPTIONAL_ARGUMENT, 'd', &o.chunk_size}, + {0, "daos.destroy", "Destroy Container", OPTION_FLAG, 'd', &o.destroy}, + {0, "daos.oclass", "object class", OPTION_OPTIONAL_ARGUMENT, 's', &o.oclass}, + LAST_OPTION +}; + +/**************************** P R O T O T Y P E S *****************************/ + +static void DAOS_Init(); +static void DAOS_Fini(); +static void *DAOS_Create(char *, IOR_param_t *); +static void *DAOS_Open(char *, IOR_param_t *); +static int DAOS_Access(const char *, int, IOR_param_t *); +static IOR_offset_t DAOS_Xfer(int, void *, IOR_size_t *, + IOR_offset_t, IOR_param_t *); +static void DAOS_Close(void *, IOR_param_t *); +static void DAOS_Delete(char *, IOR_param_t *); +static char* DAOS_GetVersion(); +static void DAOS_Fsync(void *, IOR_param_t *); +static IOR_offset_t DAOS_GetFileSize(IOR_param_t *, MPI_Comm, char *); +static option_help * DAOS_options(); + +/************************** D E C L A R A T I O N S ***************************/ + +ior_aiori_t daos_aiori = { + .name = "DAOS", + .create = DAOS_Create, + .open = DAOS_Open, + .access = DAOS_Access, + .xfer = DAOS_Xfer, + .close = DAOS_Close, + .delete = DAOS_Delete, + .get_version = DAOS_GetVersion, + .fsync = DAOS_Fsync, + .get_file_size = DAOS_GetFileSize, + .initialize = DAOS_Init, + .finalize = DAOS_Fini, + .get_options = DAOS_options, + .statfs = aiori_posix_statfs, + .mkdir = aiori_posix_mkdir, + .rmdir = aiori_posix_rmdir, + .stat = aiori_posix_stat, +}; + +#define IOR_DAOS_MUR_SEED 0xDEAD10CC + +enum handleType { + POOL_HANDLE, + CONT_HANDLE, + ARRAY_HANDLE +}; + +static daos_handle_t poh; +static daos_handle_t coh; +static daos_handle_t aoh; +static daos_oclass_id_t objectClass = OC_SX; +static bool daos_initialized = false; + +/***************************** F U N C T I O N S ******************************/ + +/* For DAOS methods. */ +#define DCHECK(rc, format, ...) \ +do { \ + int _rc = (rc); \ + \ + if (_rc < 0) { \ + fprintf(stderr, "ior ERROR (%s:%d): %d: %d: " \ + format"\n", __FILE__, __LINE__, rank, _rc, \ + ##__VA_ARGS__); \ + fflush(stdout); \ + MPI_Abort(MPI_COMM_WORLD, -1); \ + } \ +} while (0) + +#define INFO(level, format, ...) \ +do { \ + if (verbose >= level) \ + printf("[%d] "format"\n", rank, ##__VA_ARGS__); \ +} while (0) + +/* For generic errors like invalid command line options. */ +#define GERR(format, ...) \ +do { \ + fprintf(stderr, format"\n", ##__VA_ARGS__); \ + MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); \ +} while (0) + +/* Distribute process 0's pool or container handle to others. */ +static void +HandleDistribute(daos_handle_t *handle, enum handleType type) +{ + d_iov_t global; + int rc; + + global.iov_buf = NULL; + global.iov_buf_len = 0; + global.iov_len = 0; + + if (rank == 0) { + /* Get the global handle size. */ + if (type == POOL_HANDLE) + rc = daos_pool_local2global(*handle, &global); + else if (type == CONT_HANDLE) + rc = daos_cont_local2global(*handle, &global); + else + rc = daos_array_local2global(*handle, &global); + DCHECK(rc, "Failed to get global handle size"); + } + + MPI_CHECK(MPI_Bcast(&global.iov_buf_len, 1, MPI_UINT64_T, 0, + MPI_COMM_WORLD), + "Failed to bcast global handle buffer size"); + + global.iov_len = global.iov_buf_len; + global.iov_buf = malloc(global.iov_buf_len); + if (global.iov_buf == NULL) + ERR("Failed to allocate global handle buffer"); + + if (rank == 0) { + if (type == POOL_HANDLE) + rc = daos_pool_local2global(*handle, &global); + else if (type == CONT_HANDLE) + rc = daos_cont_local2global(*handle, &global); + else + rc = daos_array_local2global(*handle, &global); + DCHECK(rc, "Failed to create global handle"); + } + + MPI_CHECK(MPI_Bcast(global.iov_buf, global.iov_buf_len, MPI_BYTE, 0, + MPI_COMM_WORLD), + "Failed to bcast global pool handle"); + + if (rank != 0) { + if (type == POOL_HANDLE) + rc = daos_pool_global2local(global, handle); + else if (type == CONT_HANDLE) + rc = daos_cont_global2local(poh, global, handle); + else + rc = daos_array_global2local(coh, global, 0, handle); + DCHECK(rc, "Failed to get local handle"); + } + + free(global.iov_buf); +} + +static option_help * +DAOS_options() +{ + return options; +} + +static void +DAOS_Init() +{ + int rc; + + if (daos_initialized) + return; + + if (o.pool == NULL || o.svcl == NULL || o.cont == NULL) + return; + + if (o.oclass) { + objectClass = daos_oclass_name2id(o.oclass); + if (objectClass == OC_UNKNOWN) + GERR("Invalid DAOS Object class %s\n", o.oclass); + } + + rc = daos_init(); + if (rc) + DCHECK(rc, "Failed to initialize daos"); + + if (rank == 0) { + uuid_t uuid; + d_rank_list_t *svcl = NULL; + static daos_pool_info_t po_info; + static daos_cont_info_t co_info; + + INFO(VERBOSE_1, "Connecting to pool %s", o.pool); + + rc = uuid_parse(o.pool, uuid); + DCHECK(rc, "Failed to parse 'pool': %s", o.pool); + + svcl = daos_rank_list_parse(o.svcl, ":"); + if (svcl == NULL) + ERR("Failed to allocate svcl"); + + rc = daos_pool_connect(uuid, o.group, svcl, DAOS_PC_RW, + &poh, &po_info, NULL); + d_rank_list_free(svcl); + DCHECK(rc, "Failed to connect to pool %s", o.pool); + + INFO(VERBOSE_1, "Create/Open Container %s", o.cont); + + uuid_clear(uuid); + rc = uuid_parse(o.cont, uuid); + DCHECK(rc, "Failed to parse 'cont': %s", o.cont); + + rc = daos_cont_open(poh, uuid, DAOS_COO_RW, &coh, &co_info, + NULL); + /* If NOEXIST we create it */ + if (rc == -DER_NONEXIST) { + INFO(VERBOSE_2, "Creating DAOS Container...\n"); + rc = daos_cont_create(poh, uuid, NULL, NULL); + if (rc == 0) + rc = daos_cont_open(poh, uuid, DAOS_COO_RW, + &coh, &co_info, NULL); + } + DCHECK(rc, "Failed to create container"); + } + + HandleDistribute(&poh, POOL_HANDLE); + HandleDistribute(&coh, CONT_HANDLE); + aoh.cookie = 0; + + daos_initialized = true; +} + +static void +DAOS_Fini() +{ + int rc; + + if (!daos_initialized) + return; + + MPI_Barrier(MPI_COMM_WORLD); + rc = daos_cont_close(coh, NULL); + if (rc) { + DCHECK(rc, "Failed to close container %s (%d)", o.cont, rc); + MPI_Abort(MPI_COMM_WORLD, -1); + } + MPI_Barrier(MPI_COMM_WORLD); + + if (o.destroy) { + if (rank == 0) { + uuid_t uuid; + double t1, t2; + + INFO(VERBOSE_1, "Destroying DAOS Container %s", o.cont); + uuid_parse(o.cont, uuid); + t1 = MPI_Wtime(); + rc = daos_cont_destroy(poh, uuid, 1, NULL); + t2 = MPI_Wtime(); + if (rc == 0) + INFO(VERBOSE_1, "Container Destroy time = %f secs", t2-t1); + } + + MPI_Bcast(&rc, 1, MPI_INT, 0, MPI_COMM_WORLD); + if (rc) { + if (rank == 0) + DCHECK(rc, "Failed to destroy container %s (%d)", o.cont, rc); + MPI_Abort(MPI_COMM_WORLD, -1); + } + } + + if (rank == 0) + INFO(VERBOSE_1, "Disconnecting from DAOS POOL.."); + + rc = daos_pool_disconnect(poh, NULL); + DCHECK(rc, "Failed to disconnect from pool %s", o.pool); + + MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD), "barrier error"); + if (rank == 0) + INFO(VERBOSE_1, "Finalizing DAOS.."); + + rc = daos_fini(); + DCHECK(rc, "Failed to finalize daos"); + + daos_initialized = false; +} + +static void +gen_oid(const char *name, daos_obj_id_t *oid) +{ + + oid->lo = d_hash_murmur64(name, strlen(name), IOR_DAOS_MUR_SEED); + oid->hi = 0; + + daos_array_generate_id(oid, objectClass, true, 0); +} + +static void * +DAOS_Create(char *testFileName, IOR_param_t *param) +{ + daos_obj_id_t oid; + int rc; + + /** Convert file name into object ID */ + gen_oid(testFileName, &oid); + + /** Create the array */ + if (param->filePerProc || rank == 0) { + rc = daos_array_create(coh, oid, DAOS_TX_NONE, 1, o.chunk_size, + &aoh, NULL); + DCHECK(rc, "Failed to create array object\n"); + } + + /** Distribute the array handle if not FPP */ + if (!param->filePerProc) + HandleDistribute(&aoh, ARRAY_HANDLE); + + return &aoh; +} + +static int +DAOS_Access(const char *testFileName, int mode, IOR_param_t * param) +{ + daos_obj_id_t oid; + daos_size_t cell_size, chunk_size; + int rc; + + /** Convert file name into object ID */ + gen_oid(testFileName, &oid); + + rc = daos_array_open(coh, oid, DAOS_TX_NONE, DAOS_OO_RO, + &cell_size, &chunk_size, &aoh, NULL); + if (rc) + return rc; + + if (cell_size != 1) + GERR("Invalid DAOS Array object.\n"); + + rc = daos_array_close(aoh, NULL); + aoh.cookie = 0; + return rc; +} + +static void * +DAOS_Open(char *testFileName, IOR_param_t *param) +{ + daos_obj_id_t oid; + + /** Convert file name into object ID */ + gen_oid(testFileName, &oid); + + /** Open the array */ + if (param->filePerProc || rank == 0) { + daos_size_t cell_size, chunk_size; + int rc; + + rc = daos_array_open(coh, oid, DAOS_TX_NONE, DAOS_OO_RW, + &cell_size, &chunk_size, &aoh, NULL); + DCHECK(rc, "Failed to create array object\n"); + + if (cell_size != 1) + GERR("Invalid DAOS Array object.\n"); + } + + /** Distribute the array handle if not FPP */ + if (!param->filePerProc) + HandleDistribute(&aoh, ARRAY_HANDLE); + + return &aoh; +} + +static IOR_offset_t +DAOS_Xfer(int access, void *file, IOR_size_t *buffer, + IOR_offset_t length, IOR_param_t *param) +{ + daos_array_iod_t iod; + daos_range_t rg; + d_sg_list_t sgl; + d_iov_t iov; + int rc; + + /** set array location */ + iod.arr_nr = 1; + rg.rg_len = length; + rg.rg_idx = param->offset; + iod.arr_rgs = &rg; + + /** set memory location */ + sgl.sg_nr = 1; + d_iov_set(&iov, buffer, length); + sgl.sg_iovs = &iov; + + if (access == WRITE) { + rc = daos_array_write(aoh, DAOS_TX_NONE, &iod, &sgl, NULL); + DCHECK(rc, "daos_array_write() failed (%d).", rc); + } else { + rc = daos_array_read(aoh, DAOS_TX_NONE, &iod, &sgl, NULL); + DCHECK(rc, "daos_array_read() failed (%d).", rc); + } + + return length; +} + +static void +DAOS_Close(void *file, IOR_param_t *param) +{ + int rc; + + if (!daos_initialized) + GERR("DAOS is not initialized!"); + + rc = daos_array_close(aoh, NULL); + DCHECK(rc, "daos_array_close() failed (%d).", rc); + + aoh.cookie = 0; +} + +static void +DAOS_Delete(char *testFileName, IOR_param_t *param) +{ + daos_obj_id_t oid; + daos_size_t cell_size, chunk_size; + int rc; + + if (!daos_initialized) + GERR("DAOS is not initialized!"); + + /** Convert file name into object ID */ + gen_oid(testFileName, &oid); + + /** open the array to verify it exists */ + rc = daos_array_open(coh, oid, DAOS_TX_NONE, DAOS_OO_RW, + &cell_size, &chunk_size, &aoh, NULL); + DCHECK(rc, "daos_array_open() failed (%d).", rc); + + if (cell_size != 1) + GERR("Invalid DAOS Array object.\n"); + + rc = daos_array_destroy(aoh, DAOS_TX_NONE, NULL); + DCHECK(rc, "daos_array_destroy() failed (%d).", rc); + + rc = daos_array_close(aoh, NULL); + DCHECK(rc, "daos_array_close() failed (%d).", rc); + aoh.cookie = 0; +} + +static char * +DAOS_GetVersion() +{ + static char ver[1024] = {}; + + sprintf(ver, "%s", "DAOS"); + return ver; +} + +static void +DAOS_Fsync(void *file, IOR_param_t *param) +{ + return; +} + +static IOR_offset_t +DAOS_GetFileSize(IOR_param_t *param, MPI_Comm testComm, char *testFileName) +{ + daos_obj_id_t oid; + daos_size_t size; + int rc; + + if (!daos_initialized) + GERR("DAOS is not initialized!"); + + /** Convert file name into object ID */ + gen_oid(testFileName, &oid); + + /** open the array to verify it exists */ + if (param->filePerProc || rank == 0) { + daos_size_t cell_size, chunk_size; + + rc = daos_array_open(coh, oid, DAOS_TX_NONE, DAOS_OO_RO, + &cell_size, &chunk_size, &aoh, NULL); + DCHECK(rc, "daos_array_open() failed (%d).", rc); + + if (cell_size != 1) + GERR("Invalid DAOS Array object.\n"); + + rc = daos_array_get_size(aoh, DAOS_TX_NONE, &size, NULL); + DCHECK(rc, "daos_array_get_size() failed (%d).", rc); + + rc = daos_array_close(aoh, NULL); + DCHECK(rc, "daos_array_close() failed (%d).", rc); + aoh.cookie = 0; + } + + if (!param->filePerProc) + MPI_Bcast(&size, 1, MPI_LONG, 0, MPI_COMM_WORLD); + + return size; +} diff --git a/src/aiori-DFS.c b/src/aiori-DFS.c new file mode 100755 index 00000000..e7b1d6b9 --- /dev/null +++ b/src/aiori-DFS.c @@ -0,0 +1,900 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ +/* + * Copyright (C) 2018-2020 Intel Corporation + * See the file COPYRIGHT for a complete copyright notice and license. + */ + +/* + * This file implements the abstract I/O interface for DAOS FS API. + */ + +#define _BSD_SOURCE + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "ior.h" +#include "iordef.h" +#include "aiori.h" +#include "utilities.h" + +dfs_t *dfs; +static daos_handle_t poh, coh; +static daos_oclass_id_t objectClass = OC_SX; +static daos_oclass_id_t dir_oclass = OC_SX; +static struct d_hash_table *dir_hash; +static bool dfs_init; + +struct aiori_dir_hdl { + d_list_t entry; + dfs_obj_t *oh; + char name[PATH_MAX]; +}; + +enum handleType { + POOL_HANDLE, + CONT_HANDLE, + DFS_HANDLE +}; + +/************************** O P T I O N S *****************************/ +struct dfs_options{ + char *pool; + char *svcl; + char *group; + char *cont; + int chunk_size; + char *oclass; + char *dir_oclass; + char *prefix; + int destroy; +}; + +static struct dfs_options o = { + .pool = NULL, + .svcl = NULL, + .group = NULL, + .cont = NULL, + .chunk_size = 1048576, + .oclass = NULL, + .dir_oclass = NULL, + .prefix = NULL, + .destroy = 0, +}; + +static option_help options [] = { + {0, "dfs.pool", "pool uuid", OPTION_OPTIONAL_ARGUMENT, 's', & o.pool}, + {0, "dfs.svcl", "pool SVCL", OPTION_OPTIONAL_ARGUMENT, 's', & o.svcl}, + {0, "dfs.group", "server group", OPTION_OPTIONAL_ARGUMENT, 's', & o.group}, + {0, "dfs.cont", "DFS container uuid", OPTION_OPTIONAL_ARGUMENT, 's', & o.cont}, + {0, "dfs.chunk_size", "chunk size", OPTION_OPTIONAL_ARGUMENT, 'd', &o.chunk_size}, + {0, "dfs.oclass", "object class", OPTION_OPTIONAL_ARGUMENT, 's', &o.oclass}, + {0, "dfs.dir_oclass", "directory object class", OPTION_OPTIONAL_ARGUMENT, 's', &o.dir_oclass}, + {0, "dfs.prefix", "mount prefix", OPTION_OPTIONAL_ARGUMENT, 's', & o.prefix}, + {0, "dfs.destroy", "Destroy DFS Container", OPTION_FLAG, 'd', &o.destroy}, + LAST_OPTION +}; + +/**************************** P R O T O T Y P E S *****************************/ +static void *DFS_Create(char *, IOR_param_t *); +static void *DFS_Open(char *, IOR_param_t *); +static IOR_offset_t DFS_Xfer(int, void *, IOR_size_t *, + IOR_offset_t, IOR_param_t *); +static void DFS_Close(void *, IOR_param_t *); +static void DFS_Delete(char *, IOR_param_t *); +static char* DFS_GetVersion(); +static void DFS_Fsync(void *, IOR_param_t *); +static void DFS_Sync(IOR_param_t *); +static IOR_offset_t DFS_GetFileSize(IOR_param_t *, MPI_Comm, char *); +static int DFS_Statfs (const char *, ior_aiori_statfs_t *, IOR_param_t *); +static int DFS_Stat (const char *, struct stat *, IOR_param_t *); +static int DFS_Mkdir (const char *, mode_t, IOR_param_t *); +static int DFS_Rmdir (const char *, IOR_param_t *); +static int DFS_Access (const char *, int, IOR_param_t *); +static void DFS_Init(); +static void DFS_Finalize(); +static option_help * DFS_options(); + +/************************** D E C L A R A T I O N S ***************************/ + +ior_aiori_t dfs_aiori = { + .name = "DFS", + .create = DFS_Create, + .open = DFS_Open, + .xfer = DFS_Xfer, + .close = DFS_Close, + .delete = DFS_Delete, + .get_version = DFS_GetVersion, + .fsync = DFS_Fsync, + .sync = DFS_Sync, + .get_file_size = DFS_GetFileSize, + .statfs = DFS_Statfs, + .mkdir = DFS_Mkdir, + .rmdir = DFS_Rmdir, + .access = DFS_Access, + .stat = DFS_Stat, + .initialize = DFS_Init, + .finalize = DFS_Finalize, + .get_options = DFS_options, + .enable_mdtest = true, +}; + +/***************************** F U N C T I O N S ******************************/ + +/* For DAOS methods. */ +#define DCHECK(rc, format, ...) \ +do { \ + int _rc = (rc); \ + \ + if (_rc != 0) { \ + fprintf(stderr, "ERROR (%s:%d): %d: %d: " \ + format"\n", __FILE__, __LINE__, rank, _rc, \ + ##__VA_ARGS__); \ + fflush(stderr); \ + exit(-1); \ + } \ +} while (0) + +#define INFO(level, format, ...) \ +do { \ + if (verbose >= level) \ + printf("[%d] "format"\n", rank, ##__VA_ARGS__); \ +} while (0) + +#define GERR(format, ...) \ +do { \ + fprintf(stderr, format"\n", ##__VA_ARGS__); \ + MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); \ +} while (0) + +static inline struct aiori_dir_hdl * +hdl_obj(d_list_t *rlink) +{ + return container_of(rlink, struct aiori_dir_hdl, entry); +} + +static bool +key_cmp(struct d_hash_table *htable, d_list_t *rlink, + const void *key, unsigned int ksize) +{ + struct aiori_dir_hdl *hdl = hdl_obj(rlink); + + return (strcmp(hdl->name, (const char *)key) == 0); +} + +static void +rec_free(struct d_hash_table *htable, d_list_t *rlink) +{ + struct aiori_dir_hdl *hdl = hdl_obj(rlink); + + assert(d_hash_rec_unlinked(&hdl->entry)); + dfs_release(hdl->oh); + free(hdl); +} + +static d_hash_table_ops_t hdl_hash_ops = { + .hop_key_cmp = key_cmp, + .hop_rec_free = rec_free +}; + +/* Distribute process 0's pool or container handle to others. */ +static void +HandleDistribute(enum handleType type) +{ + d_iov_t global; + int rc; + + global.iov_buf = NULL; + global.iov_buf_len = 0; + global.iov_len = 0; + + assert(type == POOL_HANDLE || type == CONT_HANDLE || type == DFS_HANDLE); + if (rank == 0) { + /* Get the global handle size. */ + if (type == POOL_HANDLE) + rc = daos_pool_local2global(poh, &global); + else if (type == CONT_HANDLE) + rc = daos_cont_local2global(coh, &global); + else + rc = dfs_local2global(dfs, &global); + DCHECK(rc, "Failed to get global handle size"); + } + + MPI_CHECK(MPI_Bcast(&global.iov_buf_len, 1, MPI_UINT64_T, 0, + MPI_COMM_WORLD), + "Failed to bcast global handle buffer size"); + + global.iov_len = global.iov_buf_len; + global.iov_buf = malloc(global.iov_buf_len); + if (global.iov_buf == NULL) + ERR("Failed to allocate global handle buffer"); + + if (rank == 0) { + if (type == POOL_HANDLE) + rc = daos_pool_local2global(poh, &global); + else if (type == CONT_HANDLE) + rc = daos_cont_local2global(coh, &global); + else + rc = dfs_local2global(dfs, &global); + DCHECK(rc, "Failed to create global handle"); + } + + MPI_CHECK(MPI_Bcast(global.iov_buf, global.iov_buf_len, MPI_BYTE, 0, + MPI_COMM_WORLD), + "Failed to bcast global pool handle"); + + if (rank != 0) { + if (type == POOL_HANDLE) + rc = daos_pool_global2local(global, &poh); + else if (type == CONT_HANDLE) + rc = daos_cont_global2local(poh, global, &coh); + else + rc = dfs_global2local(poh, coh, 0, global, &dfs); + DCHECK(rc, "Failed to get local handle"); + } + + free(global.iov_buf); +} + +static int +parse_filename(const char *path, char **_obj_name, char **_cont_name) +{ + char *f1 = NULL; + char *f2 = NULL; + char *fname = NULL; + char *cont_name = NULL; + int rc = 0; + + if (path == NULL || _obj_name == NULL || _cont_name == NULL) + return -EINVAL; + + if (strcmp(path, "/") == 0) { + *_cont_name = strdup("/"); + if (*_cont_name == NULL) + return -ENOMEM; + *_obj_name = NULL; + return 0; + } + + f1 = strdup(path); + if (f1 == NULL) { + rc = -ENOMEM; + goto out; + } + + f2 = strdup(path); + if (f2 == NULL) { + rc = -ENOMEM; + goto out; + } + + fname = basename(f1); + cont_name = dirname(f2); + + if (cont_name[0] == '.' || cont_name[0] != '/') { + char cwd[1024]; + + if (getcwd(cwd, 1024) == NULL) { + rc = -ENOMEM; + goto out; + } + + if (strcmp(cont_name, ".") == 0) { + cont_name = strdup(cwd); + if (cont_name == NULL) { + rc = -ENOMEM; + goto out; + } + } else { + char *new_dir = calloc(strlen(cwd) + strlen(cont_name) + + 1, sizeof(char)); + if (new_dir == NULL) { + rc = -ENOMEM; + goto out; + } + + strcpy(new_dir, cwd); + if (cont_name[0] == '.') { + strcat(new_dir, &cont_name[1]); + } else { + strcat(new_dir, "/"); + strcat(new_dir, cont_name); + } + cont_name = new_dir; + } + *_cont_name = cont_name; + } else { + *_cont_name = strdup(cont_name); + if (*_cont_name == NULL) { + rc = -ENOMEM; + goto out; + } + } + + *_obj_name = strdup(fname); + if (*_obj_name == NULL) { + free(*_cont_name); + *_cont_name = NULL; + rc = -ENOMEM; + goto out; + } + +out: + if (f1) + free(f1); + if (f2) + free(f2); + return rc; +} + +static dfs_obj_t * +lookup_insert_dir(const char *name, mode_t *mode) +{ + struct aiori_dir_hdl *hdl; + d_list_t *rlink; + int rc; + + rlink = d_hash_rec_find(dir_hash, name, strlen(name)); + if (rlink != NULL) { + hdl = hdl_obj(rlink); + return hdl->oh; + } + + hdl = calloc(1, sizeof(struct aiori_dir_hdl)); + if (hdl == NULL) + GERR("failed to alloc dir handle"); + + strncpy(hdl->name, name, PATH_MAX-1); + hdl->name[PATH_MAX-1] = '\0'; + + rc = dfs_lookup(dfs, name, O_RDWR, &hdl->oh, mode, NULL); + if (rc) + return NULL; + if (mode && S_ISREG(*mode)) + return hdl->oh; + + rc = d_hash_rec_insert(dir_hash, hdl->name, strlen(hdl->name), + &hdl->entry, true); + DCHECK(rc, "Failed to insert dir handle in hashtable"); + + return hdl->oh; +} + +static option_help * DFS_options(){ + return options; +} + +static void +DFS_Init() { + int rc; + + /** in case we are already initialized, return */ + if (dfs_init) + return; + + /** shouldn't be fatal since it can be called with POSIX backend selection */ + if (o.pool == NULL || o.svcl == NULL || o.cont == NULL) + return; + + rc = daos_init(); + DCHECK(rc, "Failed to initialize daos"); + + if (o.oclass) { + objectClass = daos_oclass_name2id(o.oclass); + if (objectClass == OC_UNKNOWN) + GERR("Invalid DAOS object class %s\n", o.oclass); + } + + if (o.dir_oclass) { + dir_oclass = daos_oclass_name2id(o.dir_oclass); + if (dir_oclass == OC_UNKNOWN) + GERR("Invalid DAOS directory object class %s\n", o.dir_oclass); + } + + rc = d_hash_table_create(0, 16, NULL, &hdl_hash_ops, &dir_hash); + DCHECK(rc, "Failed to initialize dir hashtable"); + + if (rank == 0) { + uuid_t pool_uuid, co_uuid; + d_rank_list_t *svcl = NULL; + daos_pool_info_t pool_info; + daos_cont_info_t co_info; + + rc = uuid_parse(o.pool, pool_uuid); + DCHECK(rc, "Failed to parse 'Pool uuid': %s", o.pool); + + rc = uuid_parse(o.cont, co_uuid); + DCHECK(rc, "Failed to parse 'Cont uuid': %s", o.cont); + + svcl = daos_rank_list_parse(o.svcl, ":"); + if (svcl == NULL) + ERR("Failed to allocate svcl"); + + INFO(VERBOSE_1, "Pool uuid = %s, SVCL = %s\n", o.pool, o.svcl); + INFO(VERBOSE_1, "DFS Container namespace uuid = %s\n", o.cont); + + /** Connect to DAOS pool */ + rc = daos_pool_connect(pool_uuid, o.group, svcl, DAOS_PC_RW, + &poh, &pool_info, NULL); + d_rank_list_free(svcl); + DCHECK(rc, "Failed to connect to pool"); + + rc = daos_cont_open(poh, co_uuid, DAOS_COO_RW, &coh, &co_info, + NULL); + /* If NOEXIST we create it */ + if (rc == -DER_NONEXIST) { + INFO(VERBOSE_1, "Creating DFS Container ...\n"); + + rc = dfs_cont_create(poh, co_uuid, NULL, &coh, NULL); + if (rc) + DCHECK(rc, "Failed to create container"); + } else if (rc) { + DCHECK(rc, "Failed to create container"); + } + + rc = dfs_mount(poh, coh, O_RDWR, &dfs); + DCHECK(rc, "Failed to mount DFS namespace"); + } + + HandleDistribute(POOL_HANDLE); + HandleDistribute(CONT_HANDLE); + HandleDistribute(DFS_HANDLE); + + if (o.prefix) { + rc = dfs_set_prefix(dfs, o.prefix); + DCHECK(rc, "Failed to set DFS Prefix"); + } + dfs_init = true; +} + +static void +DFS_Finalize() +{ + int rc; + + MPI_Barrier(MPI_COMM_WORLD); + d_hash_table_destroy(dir_hash, true /* force */); + + rc = dfs_umount(dfs); + DCHECK(rc, "Failed to umount DFS namespace"); + MPI_Barrier(MPI_COMM_WORLD); + + rc = daos_cont_close(coh, NULL); + DCHECK(rc, "Failed to close container %s (%d)", o.cont, rc); + MPI_Barrier(MPI_COMM_WORLD); + + if (o.destroy) { + if (rank == 0) { + uuid_t uuid; + double t1, t2; + + INFO(VERBOSE_1, "Destorying DFS Container: %s\n", o.cont); + uuid_parse(o.cont, uuid); + t1 = MPI_Wtime(); + rc = daos_cont_destroy(poh, uuid, 1, NULL); + t2 = MPI_Wtime(); + if (rc == 0) + INFO(VERBOSE_1, "Container Destroy time = %f secs", t2-t1); + } + + MPI_Bcast(&rc, 1, MPI_INT, 0, MPI_COMM_WORLD); + if (rc) { + if (rank == 0) + DCHECK(rc, "Failed to destroy container %s (%d)", o.cont, rc); + MPI_Abort(MPI_COMM_WORLD, -1); + } + } + + if (rank == 0) + INFO(VERBOSE_1, "Disconnecting from DAOS POOL\n"); + + rc = daos_pool_disconnect(poh, NULL); + DCHECK(rc, "Failed to disconnect from pool"); + + MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD), "barrier error"); + + if (rank == 0) + INFO(VERBOSE_1, "Finalizing DAOS..\n"); + + rc = daos_fini(); + DCHECK(rc, "Failed to finalize DAOS"); + + /** reset tunables */ + o.pool = NULL; + o.svcl = NULL; + o.group = NULL; + o.cont = NULL; + o.chunk_size = 1048576; + o.oclass = NULL; + o.dir_oclass = NULL; + o.prefix = NULL; + o.destroy = 0; + objectClass = OC_SX; + dir_oclass = OC_SX; + dfs_init = false; +} + +/* + * Creat and open a file through the DFS interface. + */ +static void * +DFS_Create(char *testFileName, IOR_param_t *param) +{ + char *name = NULL, *dir_name = NULL; + dfs_obj_t *obj = NULL, *parent = NULL; + mode_t mode = 0644; + int fd_oflag = 0; + int rc; + + assert(param); + + rc = parse_filename(testFileName, &name, &dir_name); + DCHECK(rc, "Failed to parse path %s", testFileName); + assert(dir_name); + assert(name); + + parent = lookup_insert_dir(dir_name, NULL); + if (parent == NULL) + GERR("Failed to lookup parent dir"); + + mode = S_IFREG | param->mode; + if (param->filePerProc || rank == 0) { + fd_oflag |= O_CREAT | O_RDWR | O_EXCL; + + rc = dfs_open(dfs, parent, name, mode, fd_oflag, + objectClass, o.chunk_size, NULL, &obj); + DCHECK(rc, "dfs_open() of %s Failed", name); + } + if (!param->filePerProc) { + MPI_Barrier(MPI_COMM_WORLD); + if (rank != 0) { + fd_oflag |= O_RDWR; + rc = dfs_open(dfs, parent, name, mode, fd_oflag, + objectClass, o.chunk_size, NULL, &obj); + DCHECK(rc, "dfs_open() of %s Failed", name); + } + } + + if (name) + free(name); + if (dir_name) + free(dir_name); + + return ((void *)obj); +} + +/* + * Open a file through the DFS interface. + */ +static void * +DFS_Open(char *testFileName, IOR_param_t *param) +{ + char *name = NULL, *dir_name = NULL; + dfs_obj_t *obj = NULL, *parent = NULL; + mode_t mode; + int rc; + int fd_oflag = 0; + + fd_oflag |= O_RDWR; + mode = S_IFREG | param->mode; + + rc = parse_filename(testFileName, &name, &dir_name); + DCHECK(rc, "Failed to parse path %s", testFileName); + + assert(dir_name); + assert(name); + + parent = lookup_insert_dir(dir_name, NULL); + if (parent == NULL) + GERR("Failed to lookup parent dir"); + + rc = dfs_open(dfs, parent, name, mode, fd_oflag, objectClass, + o.chunk_size, NULL, &obj); + DCHECK(rc, "dfs_open() of %s Failed", name); + + if (name) + free(name); + if (dir_name) + free(dir_name); + + return ((void *)obj); +} + +/* + * Write or read access to file using the DFS interface. + */ +static IOR_offset_t +DFS_Xfer(int access, void *file, IOR_size_t *buffer, IOR_offset_t length, + IOR_param_t *param) +{ + int xferRetries = 0; + long long remaining = (long long)length; + char *ptr = (char *)buffer; + daos_size_t ret; + int rc; + dfs_obj_t *obj; + + obj = (dfs_obj_t *)file; + + while (remaining > 0) { + d_iov_t iov; + d_sg_list_t sgl; + + /** set memory location */ + sgl.sg_nr = 1; + sgl.sg_nr_out = 0; + d_iov_set(&iov, (void *)ptr, remaining); + sgl.sg_iovs = &iov; + + /* write/read file */ + if (access == WRITE) { + rc = dfs_write(dfs, obj, &sgl, param->offset, NULL); + if (rc) { + fprintf(stderr, "dfs_write() failed (%d)", rc); + return -1; + } + ret = remaining; + } else { + rc = dfs_read(dfs, obj, &sgl, param->offset, &ret, NULL); + if (rc || ret == 0) + fprintf(stderr, "dfs_read() failed(%d)", rc); + } + + if (ret < remaining) { + if (param->singleXferAttempt == TRUE) + exit(-1); + if (xferRetries > MAX_RETRY) + ERR("too many retries -- aborting"); + } + + assert(ret >= 0); + assert(ret <= remaining); + remaining -= ret; + ptr += ret; + xferRetries++; + } + + return (length); +} + +/* + * Perform fsync(). + */ +static void +DFS_Fsync(void *fd, IOR_param_t * param) +{ + /* no cache in DFS, so this is a no-op currently */ + dfs_sync(dfs); + return; +} + +/* + * Perform sync() on the dfs mount. + */ +static void +DFS_Sync(IOR_param_t * param) +{ + /* no cache in DFS, so this is a no-op currently */ + dfs_sync(dfs); + return; +} + +/* + * Close a file through the DFS interface. + */ +static void +DFS_Close(void *fd, IOR_param_t * param) +{ + dfs_release((dfs_obj_t *)fd); +} + +/* + * Delete a file through the DFS interface. + */ +static void +DFS_Delete(char *testFileName, IOR_param_t * param) +{ + char *name = NULL, *dir_name = NULL; + dfs_obj_t *parent = NULL; + int rc; + + rc = parse_filename(testFileName, &name, &dir_name); + DCHECK(rc, "Failed to parse path %s", testFileName); + + assert(dir_name); + assert(name); + + parent = lookup_insert_dir(dir_name, NULL); + if (parent == NULL) + GERR("Failed to lookup parent dir"); + + rc = dfs_remove(dfs, parent, name, false, NULL); + DCHECK(rc, "dfs_remove() of %s Failed", name); + + if (name) + free(name); + if (dir_name) + free(dir_name); +} + +static char* DFS_GetVersion() +{ + static char ver[1024] = {}; + + sprintf(ver, "%s", "DAOS"); + return ver; +} + +/* + * Use DFS stat() to return aggregate file size. + */ +static IOR_offset_t +DFS_GetFileSize(IOR_param_t * test, MPI_Comm comm, char *testFileName) +{ + dfs_obj_t *obj; + daos_size_t fsize, tmpMin, tmpMax, tmpSum; + int rc; + + rc = dfs_lookup(dfs, testFileName, O_RDONLY, &obj, NULL, NULL); + if (rc) { + fprintf(stderr, "dfs_lookup() of %s Failed (%d)", testFileName, rc); + return -1; + } + + rc = dfs_get_size(dfs, obj, &fsize); + if (rc) + return -1; + + dfs_release(obj); + + if (test->filePerProc == TRUE) { + MPI_CHECK(MPI_Allreduce(&fsize, &tmpSum, 1, + MPI_LONG_LONG_INT, MPI_SUM, comm), + "cannot total data moved"); + fsize = tmpSum; + } else { + MPI_CHECK(MPI_Allreduce(&fsize, &tmpMin, 1, + MPI_LONG_LONG_INT, MPI_MIN, comm), + "cannot total data moved"); + MPI_CHECK(MPI_Allreduce(&fsize, &tmpMax, 1, + MPI_LONG_LONG_INT, MPI_MAX, comm), + "cannot total data moved"); + if (tmpMin != tmpMax) { + if (rank == 0) { + WARN("inconsistent file size by different tasks"); + } + /* incorrect, but now consistent across tasks */ + fsize = tmpMin; + } + } + + return (fsize); +} + +static int +DFS_Statfs(const char *path, ior_aiori_statfs_t *sfs, IOR_param_t * param) +{ + return 0; +} + +static int +DFS_Mkdir(const char *path, mode_t mode, IOR_param_t * param) +{ + dfs_obj_t *parent = NULL; + char *name = NULL, *dir_name = NULL; + int rc; + + rc = parse_filename(path, &name, &dir_name); + DCHECK(rc, "Failed to parse path %s", path); + + assert(dir_name); + if (!name) + return 0; + + parent = lookup_insert_dir(dir_name, NULL); + if (parent == NULL) + GERR("Failed to lookup parent dir"); + + rc = dfs_mkdir(dfs, parent, name, mode, dir_oclass); + DCHECK(rc, "dfs_mkdir() of %s Failed", name); + + if (name) + free(name); + if (dir_name) + free(dir_name); + return rc; +} + +static int +DFS_Rmdir(const char *path, IOR_param_t * param) +{ + dfs_obj_t *parent = NULL; + char *name = NULL, *dir_name = NULL; + int rc; + + rc = parse_filename(path, &name, &dir_name); + DCHECK(rc, "Failed to parse path %s", path); + + assert(dir_name); + assert(name); + + parent = lookup_insert_dir(dir_name, NULL); + if (parent == NULL) + GERR("Failed to lookup parent dir"); + + rc = dfs_remove(dfs, parent, name, false, NULL); + DCHECK(rc, "dfs_remove() of %s Failed", name); + + if (name) + free(name); + if (dir_name) + free(dir_name); + if (rc) + return -1; + return rc; +} + +static int +DFS_Access(const char *path, int mode, IOR_param_t * param) +{ + dfs_obj_t *obj = NULL; + mode_t fmode; + + obj = lookup_insert_dir(path, &fmode); + if (obj == NULL) + return -1; + + /** just close if it's a file */ + if (S_ISREG(fmode)) + dfs_release(obj); + + return 0; +} + +static int +DFS_Stat(const char *path, struct stat *buf, IOR_param_t * param) +{ + dfs_obj_t *parent = NULL; + char *name = NULL, *dir_name = NULL; + int rc; + + rc = parse_filename(path, &name, &dir_name); + DCHECK(rc, "Failed to parse path %s", path); + + assert(dir_name); + assert(name); + + parent = lookup_insert_dir(dir_name, NULL); + if (parent == NULL) + GERR("Failed to lookup parent dir"); + + rc = dfs_stat(dfs, parent, name, buf); + DCHECK(rc, "dfs_stat() of Failed (%d)", rc); + + if (name) + free(name); + if (dir_name) + free(dir_name); + if (rc) + return -1; + return rc; +} diff --git a/src/aiori-DUMMY.c b/src/aiori-DUMMY.c index 0570f0a8..f368c795 100755 --- a/src/aiori-DUMMY.c +++ b/src/aiori-DUMMY.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "ior.h" #include "aiori.h" @@ -16,29 +17,33 @@ /************************** O P T I O N S *****************************/ -struct dummy_options{ +typedef struct { uint64_t delay_creates; uint64_t delay_xfer; int delay_rank_0_only; -}; +} dummy_options_t; -static struct dummy_options o = { - .delay_creates = 0, - .delay_xfer = 0, - .delay_rank_0_only = 0, -}; +static char * current = (char*) 1; -static option_help options [] = { - {0, "dummy.delay-create", "Delay per create in usec", OPTION_OPTIONAL_ARGUMENT, 'l', & o.delay_creates}, - {0, "dummy.delay-xfer", "Delay per xfer in usec", OPTION_OPTIONAL_ARGUMENT, 'l', & o.delay_xfer}, - {0, "dummy.delay-only-rank0", "Delay only Rank0", OPTION_FLAG, 'd', & o.delay_rank_0_only}, - LAST_OPTION -}; +static option_help * DUMMY_options(void ** init_backend_options, void * init_values){ + dummy_options_t * o = malloc(sizeof(dummy_options_t)); + if (init_values != NULL){ + memcpy(o, init_values, sizeof(dummy_options_t)); + }else{ + memset(o, 0, sizeof(dummy_options_t)); + } -static char * current = (char*) 1; + *init_backend_options = o; -static option_help * DUMMY_options(){ - return options; + option_help h [] = { + {0, "dummy.delay-create", "Delay per create in usec", OPTION_OPTIONAL_ARGUMENT, 'l', & o->delay_creates}, + {0, "dummy.delay-xfer", "Delay per xfer in usec", OPTION_OPTIONAL_ARGUMENT, 'l', & o->delay_xfer}, + {0, "dummy.delay-only-rank0", "Delay only Rank0", OPTION_FLAG, 'd', & o->delay_rank_0_only}, + LAST_OPTION + }; + option_help * help = malloc(sizeof(h)); + memcpy(help, h, sizeof(h)); + return help; } static void *DUMMY_Create(char *testFileName, IOR_param_t * param) @@ -46,9 +51,11 @@ static void *DUMMY_Create(char *testFileName, IOR_param_t * param) if(verbose > 4){ fprintf(out_logfile, "DUMMY create: %s = %p\n", testFileName, current); } - if (o.delay_creates){ - if (! o.delay_rank_0_only || (o.delay_rank_0_only && rank == 0)){ - usleep(o.delay_creates); + dummy_options_t * o = (dummy_options_t*) param->backend_options; + if (o->delay_creates){ + if (! o->delay_rank_0_only || (o->delay_rank_0_only && rank == 0)){ + struct timespec wait = { o->delay_creates / 1000 / 1000, 1000l * (o->delay_creates % 1000000)}; + nanosleep( & wait, NULL); } } return current++; @@ -69,6 +76,11 @@ static void DUMMY_Fsync(void *fd, IOR_param_t * param) } } + +static void DUMMY_Sync(IOR_param_t * param) +{ +} + static void DUMMY_Close(void *fd, IOR_param_t * param) { if(verbose > 4){ @@ -100,9 +112,11 @@ static IOR_offset_t DUMMY_Xfer(int access, void *file, IOR_size_t * buffer, IOR_ if(verbose > 4){ fprintf(out_logfile, "DUMMY xfer: %p\n", file); } - if (o.delay_xfer){ - if (! o.delay_rank_0_only || (o.delay_rank_0_only && rank == 0)){ - usleep(o.delay_xfer); + dummy_options_t * o = (dummy_options_t*) param->backend_options; + if (o->delay_xfer){ + if (! o->delay_rank_0_only || (o->delay_rank_0_only && rank == 0)){ + struct timespec wait = {o->delay_xfer / 1000 / 1000, 1000l * (o->delay_xfer % 1000000)}; + nanosleep( & wait, NULL); } } return length; @@ -134,22 +148,31 @@ static int DUMMY_stat (const char *path, struct stat *buf, IOR_param_t * param){ return 0; } +static int DUMMY_check_params(IOR_param_t * test){ + return 1; +} + ior_aiori_t dummy_aiori = { - "DUMMY", - DUMMY_Create, - DUMMY_Open, - DUMMY_Xfer, - DUMMY_Close, - DUMMY_Delete, - DUMMY_getVersion, - DUMMY_Fsync, - DUMMY_GetFileSize, - DUMMY_statfs, - DUMMY_mkdir, - DUMMY_rmdir, - DUMMY_access, - DUMMY_stat, - NULL, - NULL, - DUMMY_options + .name = "DUMMY", + .name_legacy = NULL, + .create = DUMMY_Create, + .open = DUMMY_Open, + .xfer = DUMMY_Xfer, + .close = DUMMY_Close, + .delete = DUMMY_Delete, + .get_version = DUMMY_getVersion, + .fsync = DUMMY_Fsync, + .get_file_size = DUMMY_GetFileSize, + .statfs = DUMMY_statfs, + .mkdir = DUMMY_mkdir, + .rmdir = DUMMY_rmdir, + .access = DUMMY_access, + .stat = DUMMY_stat, + .initialize = NULL, + .finalize = NULL, + .get_options = DUMMY_options, + .enable_mdtest = true, + .check_params = DUMMY_check_params, + .sync = DUMMY_Sync, + .enable_mdtest = true }; diff --git a/src/aiori-Gfarm.c b/src/aiori-Gfarm.c new file mode 100644 index 00000000..a7af0ea8 --- /dev/null +++ b/src/aiori-Gfarm.c @@ -0,0 +1,316 @@ +#include +#include +#include +#include +#include +#undef PACKAGE_NAME +#undef PACKAGE_STRING +#undef PACKAGE_TARNAME +#undef PACKAGE_VERSION +#include "ior.h" +#include "aiori.h" + +struct gfarm_file { + GFS_File gf; +}; + +void +Gfarm_initialize() +{ + gfarm_initialize(NULL, NULL); +} + +void +Gfarm_finalize() +{ + gfarm_terminate(); +} + +void * +Gfarm_create(char *fn, IOR_param_t *param) +{ + GFS_File gf; + struct gfarm_file *fp; + gfarm_error_t e; + + if (param->dryRun) + return (NULL); + + e = gfs_pio_create(fn, GFARM_FILE_RDWR, 0664, &gf); + if (e != GFARM_ERR_NO_ERROR) + ERR("gfs_pio_create failed"); + GFARM_MALLOC(fp); + if (fp == NULL) + ERR("no memory"); + fp->gf = gf; + return (fp); +} + +void * +Gfarm_open(char *fn, IOR_param_t *param) +{ + GFS_File gf; + struct gfarm_file *fp; + gfarm_error_t e; + + if (param->dryRun) + return (NULL); + + e = gfs_pio_open(fn, GFARM_FILE_RDWR, &gf); + if (e != GFARM_ERR_NO_ERROR) + ERR("gfs_pio_open failed"); + GFARM_MALLOC(fp); + if (fp == NULL) + ERR("no memory"); + fp->gf = gf; + return (fp); +} + +IOR_offset_t +Gfarm_xfer(int access, void *fd, IOR_size_t *buffer, IOR_offset_t len, + IOR_param_t *param) +{ + struct gfarm_file *fp = fd; + IOR_offset_t rem = len; + gfarm_off_t off; + gfarm_error_t e; +#define MAX_SZ (1024 * 1024 * 1024) + int sz, n; + char *buf = (char *)buffer; + + if (param->dryRun) + return (len); + + if (len > MAX_SZ) + sz = MAX_SZ; + else + sz = len; + + e = gfs_pio_seek(fp->gf, param->offset, GFARM_SEEK_SET, &off); + if (e != GFARM_ERR_NO_ERROR) + ERR("gfs_pio_seek failed"); + while (rem > 0) { + if (access == WRITE) + e = gfs_pio_write(fp->gf, buf, sz, &n); + else + e = gfs_pio_read(fp->gf, buf, sz, &n); + if (e != GFARM_ERR_NO_ERROR) + ERR("xfer failed"); + if (n == 0) + ERR("EOF encountered"); + rem -= n; + buf += n; + } + return (len); +} + +void +Gfarm_close(void *fd, IOR_param_t *param) +{ + struct gfarm_file *fp = fd; + + if (param->dryRun) + return; + + if (gfs_pio_close(fp->gf) != GFARM_ERR_NO_ERROR) + ERR("gfs_pio_close failed"); + free(fp); +} + +void +Gfarm_delete(char *fn, IOR_param_t *param) +{ + gfarm_error_t e; + + if (param->dryRun) + return; + + e = gfs_unlink(fn); + if (e != GFARM_ERR_NO_ERROR) + errno = gfarm_error_to_errno(e); +} + +char * +Gfarm_version() +{ + return ((char *)gfarm_version()); +} + +void +Gfarm_fsync(void *fd, IOR_param_t *param) +{ + struct gfarm_file *fp = fd; + + if (param->dryRun) + return; + + if (gfs_pio_sync(fp->gf) != GFARM_ERR_NO_ERROR) + ERR("gfs_pio_sync failed"); +} + +IOR_offset_t +Gfarm_get_file_size(IOR_param_t *param, MPI_Comm comm, char *fn) +{ + struct gfs_stat st; + IOR_offset_t size, sum, min, max; + + if (param->dryRun) + return (0); + + if (gfs_stat(fn, &st) != GFARM_ERR_NO_ERROR) + ERR("gfs_stat failed"); + size = st.st_size; + gfs_stat_free(&st); + + if (param->filePerProc == TRUE) { + MPI_CHECK(MPI_Allreduce(&size, &sum, 1, MPI_LONG_LONG_INT, + MPI_SUM, comm), "cannot total data moved"); + size = sum; + } else { + MPI_CHECK(MPI_Allreduce(&size, &min, 1, MPI_LONG_LONG_INT, + MPI_MIN, comm), "cannot total data moved"); + MPI_CHECK(MPI_Allreduce(&size, &max, 1, MPI_LONG_LONG_INT, + MPI_MAX, comm), "cannot total data moved"); + if (min != max) { + if (rank == 0) + WARN("inconsistent file size by different " + "tasks"); + /* incorrect, but now consistent across tasks */ + size = min; + } + } + return (size); +} + +int +Gfarm_statfs(const char *fn, ior_aiori_statfs_t *st, IOR_param_t *param) +{ + gfarm_off_t used, avail, files; + gfarm_error_t e; + int bsize = 4096; + + if (param->dryRun) + return (0); + + e = gfs_statfs_by_path(fn, &used, &avail, &files); + if (e != GFARM_ERR_NO_ERROR) { + errno = gfarm_error_to_errno(e); + return (-1); + } + st->f_bsize = bsize; + st->f_blocks = (used + avail) / bsize; + st->f_bfree = avail / bsize; + st->f_files = 2 * files; /* XXX */ + st->f_ffree = files; /* XXX */ + return (0); +} + +int +Gfarm_mkdir(const char *fn, mode_t mode, IOR_param_t *param) +{ + gfarm_error_t e; + + if (param->dryRun) + return (0); + + e = gfs_mkdir(fn, mode); + if (e == GFARM_ERR_NO_ERROR) + return (0); + errno = gfarm_error_to_errno(e); + return (-1); +} + +int +Gfarm_rmdir(const char *fn, IOR_param_t *param) +{ + gfarm_error_t e; + + if (param->dryRun) + return (0); + + e = gfs_rmdir(fn); + if (e == GFARM_ERR_NO_ERROR) + return (0); + errno = gfarm_error_to_errno(e); + return (-1); +} + +int +Gfarm_access(const char *fn, int mode, IOR_param_t *param) +{ + struct gfs_stat st; + gfarm_error_t e; + + if (param->dryRun) + return (0); + + e = gfs_stat(fn, &st); + if (e != GFARM_ERR_NO_ERROR) { + errno = gfarm_error_to_errno(e); + return (-1); + } + gfs_stat_free(&st); + return (0); +} + +/* XXX FIXME */ +#define GFS_DEV ((dev_t)-1) +#define GFS_BLKSIZE 8192 +#define STAT_BLKSIZ 512 /* for st_blocks */ + +int +Gfarm_stat(const char *fn, struct stat *buf, IOR_param_t *param) +{ + struct gfs_stat st; + gfarm_error_t e; + + if (param->dryRun) + return (0); + + e = gfs_stat(fn, &st); + if (e != GFARM_ERR_NO_ERROR) { + errno = gfarm_error_to_errno(e); + return (-1); + } + buf->st_dev = GFS_DEV; + buf->st_ino = st.st_ino; + buf->st_mode = st.st_mode; + buf->st_nlink = st.st_nlink; + buf->st_uid = getuid(); /* XXX */ + buf->st_gid = getgid(); /* XXX */ + buf->st_size = st.st_size; + buf->st_blksize = GFS_BLKSIZE; + buf->st_blocks = (st.st_size + STAT_BLKSIZ - 1) / STAT_BLKSIZ; + buf->st_atime = st.st_atimespec.tv_sec; + buf->st_mtime = st.st_mtimespec.tv_sec; + buf->st_ctime = st.st_ctimespec.tv_sec; +#if defined(HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC) + buf->st_atim.tv_nsec = st.st_atimespec.tv_nsec; + buf->st_mtim.tv_nsec = st.st_mtimespec.tv_nsec; + buf->st_ctim.tv_nsec = st.st_ctimespec.tv_nsec; +#endif + gfs_stat_free(&st); + return (0); +} + +ior_aiori_t gfarm_aiori = { + .name = "Gfarm", + .name_legacy = NULL, + .create = Gfarm_create, + .open = Gfarm_open, + .xfer = Gfarm_xfer, + .close = Gfarm_close, + .delete = Gfarm_delete, + .get_version = Gfarm_version, + .fsync = Gfarm_fsync, + .get_file_size = Gfarm_get_file_size, + .statfs = Gfarm_statfs, + .mkdir = Gfarm_mkdir, + .rmdir = Gfarm_rmdir, + .access = Gfarm_access, + .stat = Gfarm_stat, + .initialize = Gfarm_initialize, + .finalize = Gfarm_finalize, + .get_options = NULL, + .enable_mdtest = true, +}; diff --git a/src/aiori-HDF5.c b/src/aiori-HDF5.c index 173abb4b..ab329dbb 100755 --- a/src/aiori-HDF5.c +++ b/src/aiori-HDF5.c @@ -94,10 +94,39 @@ static void HDF5_Fsync(void *, IOR_param_t *); static IOR_offset_t HDF5_GetFileSize(IOR_param_t *, MPI_Comm, char *); static int HDF5_Access(const char *, int, IOR_param_t *); +/************************** O P T I O N S *****************************/ +typedef struct{ + int collective_md; +} HDF5_options_t; +/***************************** F U N C T I O N S ******************************/ + +static option_help * HDF5_options(void ** init_backend_options, void * init_values){ + HDF5_options_t * o = malloc(sizeof(HDF5_options_t)); + + if (init_values != NULL){ + memcpy(o, init_values, sizeof(HDF5_options_t)); + }else{ + /* initialize the options properly */ + o->collective_md = 0; + } + + *init_backend_options = o; + + option_help h [] = { + {0, "hdf5.collectiveMetadata", "Use collectiveMetadata (available since HDF5-1.10.0)", OPTION_FLAG, 'd', & o->collective_md}, + LAST_OPTION + }; + option_help * help = malloc(sizeof(h)); + memcpy(help, h, sizeof(h)); + return help; +} + + /************************** D E C L A R A T I O N S ***************************/ ior_aiori_t hdf5_aiori = { .name = "HDF5", + .name_legacy = NULL, .create = HDF5_Create, .open = HDF5_Open, .xfer = HDF5_Xfer, @@ -111,6 +140,7 @@ ior_aiori_t hdf5_aiori = { .rmdir = aiori_posix_rmdir, .access = HDF5_Access, .stat = aiori_posix_stat, + .get_options = HDF5_options }; static hid_t xferPropList; /* xfer property list */ @@ -229,7 +259,8 @@ static void *HDF5_Open(char *testFileName, IOR_param_t * param) "cannot set alignment"); #ifdef HAVE_H5PSET_ALL_COLL_METADATA_OPS - if (param->collective_md) { + HDF5_options_t *o = (HDF5_options_t*) param->backend_options; + if (o->collective_md) { /* more scalable metadata */ HDF5_CHECK(H5Pset_all_coll_metadata_ops(accessPropList, 1), @@ -240,13 +271,15 @@ static void *HDF5_Open(char *testFileName, IOR_param_t * param) #endif /* open file */ - if (param->open == WRITE) { /* WRITE */ - *fd = H5Fcreate(testFileName, fd_mode, - createPropList, accessPropList); - HDF5_CHECK(*fd, "cannot create file"); - } else { /* READ or CHECK */ - *fd = H5Fopen(testFileName, fd_mode, accessPropList); - HDF5_CHECK(*fd, "cannot open file"); + if(! param->dryRun){ + if (param->open == WRITE) { /* WRITE */ + *fd = H5Fcreate(testFileName, fd_mode, + createPropList, accessPropList); + HDF5_CHECK(*fd, "cannot create file"); + } else { /* READ or CHECK */ + *fd = H5Fopen(testFileName, fd_mode, accessPropList); + HDF5_CHECK(*fd, "cannot open file"); + } } /* show hints actually attached to file handle */ @@ -394,6 +427,9 @@ static IOR_offset_t HDF5_Xfer(int access, void *fd, IOR_size_t * buffer, } } + if(param->dryRun) + return length; + /* create new data set */ if (startNewDataSet == TRUE) { /* if just opened this file, no data set to close yet */ @@ -439,6 +475,8 @@ static void HDF5_Fsync(void *fd, IOR_param_t * param) */ static void HDF5_Close(void *fd, IOR_param_t * param) { + if(param->dryRun) + return; if (param->fd_fppReadCheck == NULL) { HDF5_CHECK(H5Dclose(dataSet), "cannot close data set"); HDF5_CHECK(H5Sclose(dataSpace), "cannot close data space"); @@ -458,7 +496,10 @@ static void HDF5_Close(void *fd, IOR_param_t * param) */ static void HDF5_Delete(char *testFileName, IOR_param_t * param) { - return(MPIIO_Delete(testFileName, param)); + if(param->dryRun) + return + MPIIO_Delete(testFileName, param); + return; } /* @@ -590,7 +631,9 @@ static void SetupDataSet(void *fd, IOR_param_t * param) static IOR_offset_t HDF5_GetFileSize(IOR_param_t * test, MPI_Comm testComm, char *testFileName) { - return(MPIIO_GetFileSize(test, testComm, testFileName)); + if(test->dryRun) + return 0; + return(MPIIO_GetFileSize(test, testComm, testFileName)); } /* @@ -598,5 +641,7 @@ HDF5_GetFileSize(IOR_param_t * test, MPI_Comm testComm, char *testFileName) */ static int HDF5_Access(const char *path, int mode, IOR_param_t *param) { - return(MPIIO_Access(path, mode, param)); + if(param->dryRun) + return 0; + return(MPIIO_Access(path, mode, param)); } diff --git a/src/aiori-HDFS.c b/src/aiori-HDFS.c index 8939fe5f..2d4dcb1b 100755 --- a/src/aiori-HDFS.c +++ b/src/aiori-HDFS.c @@ -115,6 +115,7 @@ static IOR_offset_t HDFS_GetFileSize(IOR_param_t *, MPI_Comm, char *); ior_aiori_t hdfs_aiori = { .name = "HDFS", + .name_legacy = NULL, .create = HDFS_Create, .open = HDFS_Open, .xfer = HDFS_Xfer, @@ -289,9 +290,9 @@ static void *HDFS_Create_Or_Open( char *testFileName, IOR_param_t *param, unsign * truncate each other's writes */ - if (( param->openFlags & IOR_WRONLY ) && - ( !param->filePerProc ) && - ( rank != 0 )) { + if (( param->openFlags & IOR_WRONLY ) && + ( !param->filePerProc ) && + ( rank != 0 )) { MPI_CHECK(MPI_Barrier(testComm), "barrier error"); } @@ -308,7 +309,7 @@ static void *HDFS_Create_Or_Open( char *testFileName, IOR_param_t *param, unsign param->transferSize, param->hdfs_replicas, param->hdfs_block_size); - } + } hdfs_file = hdfsOpenFile( param->hdfs_fs, testFileName, fd_oflags, @@ -323,12 +324,12 @@ static void *HDFS_Create_Or_Open( char *testFileName, IOR_param_t *param, unsign * For N-1 write, Rank 0 waits for the other ranks to open the file after it has. */ - if (( param->openFlags & IOR_WRONLY ) && - ( !param->filePerProc ) && - ( rank == 0 )) { + if (( param->openFlags & IOR_WRONLY ) && + ( !param->filePerProc ) && + ( rank == 0 )) { MPI_CHECK(MPI_Barrier(testComm), "barrier error"); - } + } if (param->verbose >= VERBOSE_4) { printf("<- HDFS_Create_Or_Open\n"); @@ -404,7 +405,7 @@ static IOR_offset_t HDFS_Xfer(int access, void *file, IOR_size_t * buffer, } if (param->verbose >= VERBOSE_4) { - printf("\thdfsWrite( 0x%llx, 0x%llx, 0x%llx, %lld)\n", + printf("\thdfsWrite( 0x%llx, 0x%llx, 0x%llx, %lld)\n", hdfs_fs, hdfs_file, ptr, remaining ); /* DEBUGGING */ } rc = hdfsWrite( hdfs_fs, hdfs_file, ptr, remaining ); @@ -426,7 +427,7 @@ static IOR_offset_t HDFS_Xfer(int access, void *file, IOR_size_t * buffer, } if (param->verbose >= VERBOSE_4) { - printf("\thdfsRead( 0x%llx, 0x%llx, 0x%llx, %lld)\n", + printf("\thdfsRead( 0x%llx, 0x%llx, 0x%llx, %lld)\n", hdfs_fs, hdfs_file, ptr, remaining ); /* DEBUGGING */ } rc = hdfsRead( hdfs_fs, hdfs_file, ptr, remaining ); diff --git a/src/aiori-IME.c b/src/aiori-IME.c index c8291b3e..500f380f 100755 --- a/src/aiori-IME.c +++ b/src/aiori-IME.c @@ -51,9 +51,42 @@ static int IME_StatFS(const char *, ior_aiori_statfs_t *, static int IME_RmDir(const char *, IOR_param_t *); static int IME_MkDir(const char *, mode_t, IOR_param_t *); static int IME_Stat(const char *, struct stat *, IOR_param_t *); + +#if (IME_NATIVE_API_VERSION >= 132) +static int IME_Mknod(char *); +static void IME_Sync(IOR_param_t *); +#endif + static void IME_Initialize(); static void IME_Finalize(); + +/************************** O P T I O N S *****************************/ +typedef struct{ + int direct_io; +} ime_options_t; + + +option_help * IME_options(void ** init_backend_options, void * init_values){ + ime_options_t * o = malloc(sizeof(ime_options_t)); + + if (init_values != NULL){ + memcpy(o, init_values, sizeof(ime_options_t)); + }else{ + o->direct_io = 0; + } + + *init_backend_options = o; + + option_help h [] = { + {0, "ime.odirect", "Direct I/O Mode", OPTION_FLAG, 'd', & o->direct_io}, + LAST_OPTION + }; + option_help * help = malloc(sizeof(h)); + memcpy(help, h, sizeof(h)); + return help; +} + /************************** D E C L A R A T I O N S ***************************/ extern int rank; @@ -63,6 +96,7 @@ extern MPI_Comm testComm; ior_aiori_t ime_aiori = { .name = "IME", + .name_legacy = "IM", .create = IME_Create, .open = IME_Open, .xfer = IME_Xfer, @@ -78,6 +112,12 @@ ior_aiori_t ime_aiori = { .stat = IME_Stat, .initialize = IME_Initialize, .finalize = IME_Finalize, + .get_options = IME_options, +#if (IME_NATIVE_API_VERSION >= 132) + .sync = IME_Sync, + .mknod = IME_Mknod, +#endif + .enable_mdtest = true, }; /***************************** F U N C T I O N S ******************************/ @@ -128,8 +168,10 @@ static void *IME_Open(char *testFileName, IOR_param_t *param) if (fd == NULL) ERR("Unable to malloc file descriptor"); - if (param->useO_DIRECT) - set_o_direct_flag(&fd_oflag); + ime_options_t * o = (ime_options_t*) param->backend_options; + if (o->direct_io == TRUE){ + set_o_direct_flag(&fd_oflag); + } if (param->openFlags & IOR_RDONLY) fd_oflag |= O_RDONLY; @@ -268,43 +310,62 @@ static char *IME_GetVersion() return ver; } -/* - * XXX: statfs call is currently not exposed by IME native interface. - */ -static int IME_StatFS(const char *oid, ior_aiori_statfs_t *stat_buf, +static int IME_StatFS(const char *path, ior_aiori_statfs_t *stat_buf, IOR_param_t *param) { - (void)oid; - (void)stat_buf; (void)param; +#if (IME_NATIVE_API_VERSION >= 130) + struct statvfs statfs_buf; + + int ret = ime_native_statvfs(path, &statfs_buf); + if (ret) + return ret; + + stat_buf->f_bsize = statfs_buf.f_bsize; + stat_buf->f_blocks = statfs_buf.f_blocks; + stat_buf->f_bfree = statfs_buf.f_bfree; + stat_buf->f_files = statfs_buf.f_files; + stat_buf->f_ffree = statfs_buf.f_ffree; + + return 0; +#else + (void)path; + (void)stat_buf; + WARN("statfs is currently not supported in IME backend!"); return -1; +#endif } -/* - * XXX: mkdir call is currently not exposed by IME native interface. - */ -static int IME_MkDir(const char *oid, mode_t mode, IOR_param_t *param) + +static int IME_MkDir(const char *path, mode_t mode, IOR_param_t *param) { - (void)oid; - (void)mode; (void)param; - WARN("mkdir is currently not supported in IME backend!"); +#if (IME_NATIVE_API_VERSION >= 130) + return ime_native_mkdir(path, mode); +#else + (void)path; + (void)mode; + + WARN("mkdir not supported in IME backend!"); return -1; +#endif } -/* - * XXX: rmdir call is curretly not exposed by IME native interface. - */ -static int IME_RmDir(const char *oid, IOR_param_t *param) +static int IME_RmDir(const char *path, IOR_param_t *param) { - (void)oid; (void)param; - WARN("rmdir is currently not supported in IME backend!"); +#if (IME_NATIVE_API_VERSION >= 130) + return ime_native_rmdir(path); +#else + (void)path; + + WARN("rmdir not supported in IME backend!"); return -1; +#endif } /* @@ -355,3 +416,27 @@ static IOR_offset_t IME_GetFileSize(IOR_param_t *test, MPI_Comm testComm, return(aggFileSizeFromStat); } + +#if (IME_NATIVE_API_VERSION >= 132) +/* + * Create a file through mknod interface. + */ +static int IME_Mknod(char *testFileName) +{ + int ret = ime_native_mknod(testFileName, S_IFREG | S_IRUSR, 0); + if (ret < 0) + ERR("mknod failed"); + + return ret; +} + +/* + * Use IME sync to flush page cache of all opened files. + */ +static void IME_Sync(IOR_param_t * param) +{ + int ret = ime_native_sync(0); + if (ret != 0) + FAIL("Error executing the sync command."); +} +#endif diff --git a/src/aiori-MMAP.c b/src/aiori-MMAP.c index f812bddd..7be860a6 100644 --- a/src/aiori-MMAP.c +++ b/src/aiori-MMAP.c @@ -32,6 +32,7 @@ static IOR_offset_t MMAP_Xfer(int, void *, IOR_size_t *, IOR_offset_t, IOR_param_t *); static void MMAP_Close(void *, IOR_param_t *); static void MMAP_Fsync(void *, IOR_param_t *); +static option_help * MMAP_options(void ** init_backend_options, void * init_values); /************************** D E C L A R A T I O N S ***************************/ @@ -45,9 +46,38 @@ ior_aiori_t mmap_aiori = { .get_version = aiori_get_version, .fsync = MMAP_Fsync, .get_file_size = POSIX_GetFileSize, + .get_options = MMAP_options, }; /***************************** F U N C T I O N S ******************************/ +typedef struct{ + int direct_io_ignored; /* this option is ignored */ + void* mmap_ptr; /* for internal usage */ + + int madv_dont_need; + int madv_pattern; +} mmap_options_t; + +static option_help * MMAP_options(void ** init_backend_options, void * init_values){ + mmap_options_t * o = malloc(sizeof(mmap_options_t)); + + if (init_values != NULL){ + memcpy(o, init_values, sizeof(mmap_options_t)); + }else{ + memset(o, 0, sizeof(mmap_options_t)); + } + + *init_backend_options = o; + + option_help h [] = { + {0, "mmap.madv_dont_need", "Use advise don't need", OPTION_FLAG, 'd', & o->madv_dont_need}, + {0, "mmap.madv_pattern", "Use advise to indicate the pattern random/sequential", OPTION_FLAG, 'd', & o->madv_pattern}, + LAST_OPTION + }; + option_help * help = malloc(sizeof(h)); + memcpy(help, h, sizeof(h)); + return help; +} static void ior_mmap_file(int *file, IOR_param_t *param) { @@ -56,21 +86,27 @@ static void ior_mmap_file(int *file, IOR_param_t *param) if (param->open == WRITE) flags |= PROT_WRITE; + mmap_options_t *o = (mmap_options_t*) param->backend_options; - param->mmap_ptr = mmap(NULL, size, flags, MAP_SHARED, + o->mmap_ptr = mmap(NULL, size, flags, MAP_SHARED, *file, 0); - if (param->mmap_ptr == MAP_FAILED) + if (o->mmap_ptr == MAP_FAILED) ERR("mmap() failed"); if (param->randomOffset) flags = POSIX_MADV_RANDOM; else flags = POSIX_MADV_SEQUENTIAL; - if (posix_madvise(param->mmap_ptr, size, flags) != 0) - ERR("madvise() failed"); + + if(o->madv_pattern){ + if (posix_madvise(o->mmap_ptr, size, flags) != 0) + ERR("madvise() failed"); + } - if (posix_madvise(param->mmap_ptr, size, POSIX_MADV_DONTNEED) != 0) - ERR("madvise() failed"); + if (o->madv_dont_need){ + if (posix_madvise(o->mmap_ptr, size, POSIX_MADV_DONTNEED) != 0) + ERR("madvise() failed"); + } return; } @@ -107,16 +143,17 @@ static void *MMAP_Open(char *testFileName, IOR_param_t * param) static IOR_offset_t MMAP_Xfer(int access, void *file, IOR_size_t * buffer, IOR_offset_t length, IOR_param_t * param) { + mmap_options_t *o = (mmap_options_t*) param->backend_options; if (access == WRITE) { - memcpy(param->mmap_ptr + param->offset, buffer, length); + memcpy(o->mmap_ptr + param->offset, buffer, length); } else { - memcpy(buffer, param->mmap_ptr + param->offset, length); + memcpy(buffer, o->mmap_ptr + param->offset, length); } if (param->fsyncPerWrite == TRUE) { - if (msync(param->mmap_ptr + param->offset, length, MS_SYNC) != 0) + if (msync(o->mmap_ptr + param->offset, length, MS_SYNC) != 0) ERR("msync() failed"); - if (posix_madvise(param->mmap_ptr + param->offset, length, + if (posix_madvise(o->mmap_ptr + param->offset, length, POSIX_MADV_DONTNEED) != 0) ERR("madvise() failed"); } @@ -128,7 +165,8 @@ static IOR_offset_t MMAP_Xfer(int access, void *file, IOR_size_t * buffer, */ static void MMAP_Fsync(void *fd, IOR_param_t * param) { - if (msync(param->mmap_ptr, param->expectedAggFileSize, MS_SYNC) != 0) + mmap_options_t *o = (mmap_options_t*) param->backend_options; + if (msync(o->mmap_ptr, param->expectedAggFileSize, MS_SYNC) != 0) EWARN("msync() failed"); } @@ -137,8 +175,9 @@ static void MMAP_Fsync(void *fd, IOR_param_t * param) */ static void MMAP_Close(void *fd, IOR_param_t * param) { - if (munmap(param->mmap_ptr, param->expectedAggFileSize) != 0) + mmap_options_t *o = (mmap_options_t*) param->backend_options; + if (munmap(o->mmap_ptr, param->expectedAggFileSize) != 0) ERR("munmap failed"); - param->mmap_ptr = NULL; + o->mmap_ptr = NULL; POSIX_Close(fd, param); } diff --git a/src/aiori-MPIIO.c b/src/aiori-MPIIO.c index 2ffdc141..04c10bee 100755 --- a/src/aiori-MPIIO.c +++ b/src/aiori-MPIIO.c @@ -46,6 +46,7 @@ static void MPIIO_Fsync(void *, IOR_param_t *); ior_aiori_t mpiio_aiori = { .name = "MPIIO", + .name_legacy = NULL, .create = MPIIO_Create, .open = MPIIO_Open, .xfer = MPIIO_Xfer, @@ -68,8 +69,12 @@ ior_aiori_t mpiio_aiori = { */ int MPIIO_Access(const char *path, int mode, IOR_param_t *param) { + if(param->dryRun){ + return MPI_SUCCESS; + } MPI_File fd; int mpi_mode = MPI_MODE_UNIQUE_OPEN; + MPI_Info mpiHints = MPI_INFO_NULL; if ((mode & W_OK) && (mode & R_OK)) mpi_mode |= MPI_MODE_RDWR; @@ -78,12 +83,15 @@ int MPIIO_Access(const char *path, int mode, IOR_param_t *param) else mpi_mode |= MPI_MODE_RDONLY; - int ret = MPI_File_open(MPI_COMM_SELF, path, mpi_mode, - MPI_INFO_NULL, &fd); + SetHints(&mpiHints, param->hintsFileName); + + int ret = MPI_File_open(MPI_COMM_SELF, path, mpi_mode, mpiHints, &fd); if (!ret) MPI_File_close(&fd); + if (mpiHints != MPI_INFO_NULL) + MPI_CHECK(MPI_Info_free(&mpiHints), "MPI_Info_free failed"); return ret; } @@ -92,7 +100,10 @@ int MPIIO_Access(const char *path, int mode, IOR_param_t *param) */ static void *MPIIO_Create(char *testFileName, IOR_param_t * param) { - return MPIIO_Open(testFileName, param); + if(param->dryRun){ + return 0; + } + return MPIIO_Open(testFileName, param); } /* @@ -170,11 +181,13 @@ static void *MPIIO_Open(char *testFileName, IOR_param_t * param) ShowHints(&mpiHints); fprintf(stdout, "}\n"); } - MPI_CHECK(MPI_File_open(comm, testFileName, fd_mode, mpiHints, fd), - "cannot open file"); + if(! param->dryRun){ + MPI_CHECKF(MPI_File_open(comm, testFileName, fd_mode, mpiHints, fd), + "cannot open file: %s", testFileName); + } /* show hints actually attached to file handle */ - if (rank == 0 && param->showHints) { + if (rank == 0 && param->showHints && ! param->dryRun) { if (mpiHints != MPI_INFO_NULL) MPI_CHECK(MPI_Info_free(&mpiHints), "MPI_Info_free failed"); MPI_CHECK(MPI_File_get_info(*fd, &mpiHints), @@ -185,7 +198,7 @@ static void *MPIIO_Open(char *testFileName, IOR_param_t * param) } /* preallocate space for file */ - if (param->preallocate && param->open == WRITE) { + if (param->preallocate && param->open == WRITE && ! param->dryRun) { MPI_CHECK(MPI_File_preallocate(*fd, (MPI_Offset) (param->segmentCount * @@ -231,11 +244,13 @@ static void *MPIIO_Open(char *testFileName, IOR_param_t * param) MPI_CHECK(MPI_Type_commit(¶m->fileType), "cannot commit datatype"); - MPI_CHECK(MPI_File_set_view(*fd, (MPI_Offset) 0, + if(! param->dryRun){ + MPI_CHECK(MPI_File_set_view(*fd, (MPI_Offset) 0, param->transferType, param->fileType, "native", (MPI_Info) MPI_INFO_NULL), "cannot set file view"); + } } if (mpiHints != MPI_INFO_NULL) MPI_CHECK(MPI_Info_free(&mpiHints), "MPI_Info_free failed"); @@ -253,6 +268,9 @@ static IOR_offset_t MPIIO_Xfer(int access, void *fd, IOR_size_t * buffer, will get "assignment from incompatible pointer-type" warnings, if we only use this one set of signatures. */ + if(param->dryRun) + return length; + int (MPIAPI * Access) (MPI_File, void *, int, MPI_Datatype, MPI_Status *); int (MPIAPI * Access_at) (MPI_File, MPI_Offset, void *, int, @@ -381,8 +399,10 @@ static IOR_offset_t MPIIO_Xfer(int access, void *fd, IOR_size_t * buffer, */ static void MPIIO_Fsync(void *fdp, IOR_param_t * param) { - if (MPI_File_sync(*(MPI_File *)fdp) != MPI_SUCCESS) - EWARN("fsync() failed"); + if(param->dryRun) + return; + if (MPI_File_sync(*(MPI_File *)fdp) != MPI_SUCCESS) + EWARN("fsync() failed"); } /* @@ -390,7 +410,9 @@ static void MPIIO_Fsync(void *fdp, IOR_param_t * param) */ static void MPIIO_Close(void *fd, IOR_param_t * param) { - MPI_CHECK(MPI_File_close((MPI_File *) fd), "cannot close file"); + if(! param->dryRun){ + MPI_CHECK(MPI_File_close((MPI_File *) fd), "cannot close file"); + } if ((param->useFileView == TRUE) && (param->fd_fppReadCheck == NULL)) { /* * need to free the datatype, so done in the close process @@ -408,8 +430,10 @@ static void MPIIO_Close(void *fd, IOR_param_t * param) */ void MPIIO_Delete(char *testFileName, IOR_param_t * param) { - MPI_CHECK(MPI_File_delete(testFileName, (MPI_Info) MPI_INFO_NULL), - "cannot delete file"); + if(param->dryRun) + return; + MPI_CHECKF(MPI_File_delete(testFileName, (MPI_Info) MPI_INFO_NULL), + "cannot delete file: %s", testFileName); } /* @@ -472,9 +496,12 @@ static IOR_offset_t SeekOffset(MPI_File fd, IOR_offset_t offset, IOR_offset_t MPIIO_GetFileSize(IOR_param_t * test, MPI_Comm testComm, char *testFileName) { + if(test->dryRun) + return 0; IOR_offset_t aggFileSizeFromStat, tmpMin, tmpMax, tmpSum; MPI_File fd; MPI_Comm comm; + MPI_Info mpiHints = MPI_INFO_NULL; if (test->filePerProc == TRUE) { comm = MPI_COMM_SELF; @@ -482,12 +509,15 @@ IOR_offset_t MPIIO_GetFileSize(IOR_param_t * test, MPI_Comm testComm, comm = testComm; } + SetHints(&mpiHints, test->hintsFileName); MPI_CHECK(MPI_File_open(comm, testFileName, MPI_MODE_RDONLY, - MPI_INFO_NULL, &fd), + mpiHints, &fd), "cannot open file to get file size"); MPI_CHECK(MPI_File_get_size(fd, (MPI_Offset *) & aggFileSizeFromStat), "cannot get file size"); MPI_CHECK(MPI_File_close(&fd), "cannot close file"); + if (mpiHints != MPI_INFO_NULL) + MPI_CHECK(MPI_Info_free(&mpiHints), "MPI_Info_free failed"); if (test->filePerProc == TRUE) { MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpSum, 1, diff --git a/src/aiori-NCMPI.c b/src/aiori-NCMPI.c index b096c508..5fc13751 100755 --- a/src/aiori-NCMPI.c +++ b/src/aiori-NCMPI.c @@ -62,6 +62,7 @@ static int NCMPI_Access(const char *, int, IOR_param_t *); ior_aiori_t ncmpi_aiori = { .name = "NCMPI", + .name_legacy = NULL, .create = NCMPI_Create, .open = NCMPI_Open, .xfer = NCMPI_Xfer, @@ -88,7 +89,7 @@ static void *NCMPI_Create(char *testFileName, IOR_param_t * param) int fd_mode; MPI_Info mpiHints = MPI_INFO_NULL; - /* Wei-keng Liao: read and set MPI file hints from hintsFile */ + /* read and set MPI file hints from hintsFile */ SetHints(&mpiHints, param->hintsFileName); if (rank == 0 && param->showHints) { fprintf(stdout, "\nhints passed to MPI_File_open() {\n"); @@ -104,22 +105,23 @@ static void *NCMPI_Create(char *testFileName, IOR_param_t * param) NCMPI_CHECK(ncmpi_create(testComm, testFileName, fd_mode, mpiHints, fd), "cannot create file"); - /* Wei-keng Liao: print the MPI file hints currently used */ -/* WEL - add when ncmpi_get_file_info() is in current parallel-netcdf release - if (rank == 0 && param->showHints) { - MPI_CHECK(ncmpi_get_file_info(*fd, &mpiHints), - "cannot get file info"); - fprintf(stdout, "\nhints returned from opened file {\n"); - ShowHints(&mpiHints); - fprintf(stdout, "}\n"); - } -*/ - - /* Wei-keng Liao: free up the mpiHints object */ -/* WEL - this needs future fix from next release of PnetCDF - if (mpiHints != MPI_INFO_NULL) - MPI_CHECK(MPI_Info_free(&mpiHints), "cannot free file info"); -*/ + /* free up the mpiHints object */ + if (mpiHints != MPI_INFO_NULL) + MPI_CHECK(MPI_Info_free(&mpiHints), "cannot free file info"); + +#if defined(PNETCDF_VERSION_MAJOR) && (PNETCDF_VERSION_MAJOR > 1 || PNETCDF_VERSION_MINOR >= 2) + /* ncmpi_get_file_info is first available in 1.2.0 */ + if (rank == 0 && param->showHints) { + MPI_Info info_used; + MPI_CHECK(ncmpi_get_file_info(*fd, &info_used), + "cannot inquire file info"); + /* print the MPI file hints currently used */ + fprintf(stdout, "\nhints returned from opened file {\n"); + ShowHints(&info_used); + fprintf(stdout, "}\n"); + MPI_CHECK(MPI_Info_free(&info_used), "cannot free file info"); + } +#endif return (fd); } @@ -133,7 +135,7 @@ static void *NCMPI_Open(char *testFileName, IOR_param_t * param) int fd_mode; MPI_Info mpiHints = MPI_INFO_NULL; - /* Wei-keng Liao: read and set MPI file hints from hintsFile */ + /* read and set MPI file hints from hintsFile */ SetHints(&mpiHints, param->hintsFileName); if (rank == 0 && param->showHints) { fprintf(stdout, "\nhints passed to MPI_File_open() {\n"); @@ -149,22 +151,23 @@ static void *NCMPI_Open(char *testFileName, IOR_param_t * param) NCMPI_CHECK(ncmpi_open(testComm, testFileName, fd_mode, mpiHints, fd), "cannot open file"); - /* Wei-keng Liao: print the MPI file hints currently used */ -/* WEL - add when ncmpi_get_file_info() is in current parallel-netcdf release - if (rank == 0 && param->showHints) { - MPI_CHECK(ncmpi_get_file_info(*fd, &mpiHints), - "cannot get file info"); - fprintf(stdout, "\nhints returned from opened file {\n"); - ShowHints(&mpiHints); - fprintf(stdout, "}\n"); - } -*/ - - /* Wei-keng Liao: free up the mpiHints object */ -/* WEL - this needs future fix from next release of PnetCDF - if (mpiHints != MPI_INFO_NULL) - MPI_CHECK(MPI_Info_free(&mpiHints), "cannot free file info"); -*/ + /* free up the mpiHints object */ + if (mpiHints != MPI_INFO_NULL) + MPI_CHECK(MPI_Info_free(&mpiHints), "cannot free file info"); + +#if defined(PNETCDF_VERSION_MAJOR) && (PNETCDF_VERSION_MAJOR > 1 || PNETCDF_VERSION_MINOR >= 2) + /* ncmpi_get_file_info is first available in 1.2.0 */ + if (rank == 0 && param->showHints) { + MPI_Info info_used; + MPI_CHECK(ncmpi_get_file_info(*fd, &info_used), + "cannot inquire file info"); + /* print the MPI file hints currently used */ + fprintf(stdout, "\nhints returned from opened file {\n"); + ShowHints(&info_used); + fprintf(stdout, "}\n"); + MPI_CHECK(MPI_Info_free(&info_used), "cannot free file info"); + } +#endif return (fd); } @@ -182,17 +185,6 @@ static IOR_offset_t NCMPI_Xfer(int access, void *fd, IOR_size_t * buffer, IOR_offset_t segmentPosition; int segmentNum, transferNum; - /* Wei-keng Liao: In ior.c line 1979 says "block size must be a multiple - of transfer size." Hence, length should always == param->transferSize - below. I leave it here to double check. - */ - if (length != param->transferSize) { - char errMsg[256]; - sprintf(errMsg, "length(%lld) != param->transferSize(%lld)\n", - length, param->transferSize); - NCMPI_CHECK(-1, errMsg); - } - /* determine by offset if need to start data set */ if (param->filePerProc == TRUE) { segmentPosition = (IOR_offset_t) 0; @@ -223,8 +215,8 @@ static IOR_offset_t NCMPI_Xfer(int access, void *fd, IOR_size_t * buffer, int numTransfers = param->blockSize / param->transferSize; - /* Wei-keng Liao: change 1D array to 3D array of dimensions: - [segmentCount*numTasksWorld][numTransfers][transferSize] + /* reshape 1D array to 3D array: + [segmentCount*numTasks][numTransfers][transferSize] Requirement: none of these dimensions should be > 4G, */ NCMPI_CHECK(ncmpi_def_dim @@ -263,19 +255,19 @@ static IOR_offset_t NCMPI_Xfer(int access, void *fd, IOR_size_t * buffer, var_id = param->var_id; - /* Wei-keng Liao: calculate the segment number */ + /* calculate the segment number */ segmentNum = param->offset / (param->numTasks * param->blockSize); - /* Wei-keng Liao: calculate the transfer number in each block */ + /* calculate the transfer number in each block */ transferNum = param->offset % param->blockSize / param->transferSize; - /* Wei-keng Liao: read/write the 3rd dim of the dataset, each is of + /* read/write the 3rd dim of the dataset, each is of amount param->transferSize */ bufSize[0] = 1; bufSize[1] = 1; bufSize[2] = param->transferSize; - offset[0] = segmentNum * numTasksWorld + rank; + offset[0] = segmentNum * param->numTasks + rank; offset[1] = transferNum; offset[2] = 0; @@ -380,7 +372,7 @@ static int GetFileMode(IOR_param_t * param) fprintf(stdout, "O_DIRECT not implemented in NCMPI\n"); } - /* Wei-keng Liao: to enable > 4GB file size */ + /* to enable > 4GB file size */ fd_mode |= NC_64BIT_OFFSET; return (fd_mode); diff --git a/src/aiori-POSIX.c b/src/aiori-POSIX.c index 4a5a7f31..463a9c81 100755 --- a/src/aiori-POSIX.c +++ b/src/aiori-POSIX.c @@ -71,12 +71,44 @@ static IOR_offset_t POSIX_Xfer(int, void *, IOR_size_t *, IOR_offset_t, IOR_param_t *); static void POSIX_Fsync(void *, IOR_param_t *); +static void POSIX_Sync(IOR_param_t * ); + +/************************** O P T I O N S *****************************/ +typedef struct{ + /* in case of a change, please update depending MMAP module too */ + int direct_io; +} posix_options_t; + + +option_help * POSIX_options(void ** init_backend_options, void * init_values){ + posix_options_t * o = malloc(sizeof(posix_options_t)); + + if (init_values != NULL){ + memcpy(o, init_values, sizeof(posix_options_t)); + }else{ + o->direct_io = 0; + } + + *init_backend_options = o; + + option_help h [] = { + {0, "posix.odirect", "Direct I/O Mode", OPTION_FLAG, 'd', & o->direct_io}, + LAST_OPTION + }; + option_help * help = malloc(sizeof(h)); + memcpy(help, h, sizeof(h)); + return help; +} + /************************** D E C L A R A T I O N S ***************************/ + ior_aiori_t posix_aiori = { .name = "POSIX", + .name_legacy = NULL, .create = POSIX_Create, + .mknod = POSIX_Mknod, .open = POSIX_Open, .xfer = POSIX_Xfer, .close = POSIX_Close, @@ -89,6 +121,9 @@ ior_aiori_t posix_aiori = { .rmdir = aiori_posix_rmdir, .access = aiori_posix_access, .stat = aiori_posix_stat, + .get_options = POSIX_options, + .enable_mdtest = true, + .sync = POSIX_Sync }; /***************************** F U N C T I O N S ******************************/ @@ -113,7 +148,7 @@ void gpfs_free_all_locks(int fd) rc = gpfs_fcntl(fd, &release_all); if (verbose >= VERBOSE_0 && rc != 0) { - EWARN("gpfs_fcntl release all locks hint failed."); + EWARNF("gpfs_fcntl(%d, ...) release all locks hint failed.", fd); } } void gpfs_access_start(int fd, IOR_offset_t length, IOR_param_t *param, int access) @@ -136,7 +171,7 @@ void gpfs_access_start(int fd, IOR_offset_t length, IOR_param_t *param, int acce rc = gpfs_fcntl(fd, &take_locks); if (verbose >= VERBOSE_2 && rc != 0) { - EWARN("gpfs_fcntl access range hint failed."); + EWARNF("gpfs_fcntl(%d, ...) access range hint failed.", fd); } } @@ -160,7 +195,7 @@ void gpfs_access_end(int fd, IOR_offset_t length, IOR_param_t *param, int access rc = gpfs_fcntl(fd, &free_locks); if (verbose >= VERBOSE_2 && rc != 0) { - EWARN("gpfs_fcntl free range hint failed."); + EWARNF("gpfs_fcntl(%d, ...) free range hint failed.", fd); } } @@ -199,6 +234,24 @@ bool beegfs_isOptionSet(int opt) { return opt != -1; } +bool beegfs_compatibleFileExists(char* filepath, int numTargets, int chunkSize) +{ + int fd = open(filepath, O_RDWR); + + if (fd == -1) + return false; + + unsigned read_stripePattern = 0; + u_int16_t read_numTargets = 0; + int read_chunkSize = 0; + + bool retVal = beegfs_getStripeInfo(fd, &read_stripePattern, &read_chunkSize, &read_numTargets); + + close(fd); + + return retVal && read_numTargets == numTargets && read_chunkSize == chunkSize; +} + /* * Create a file on a BeeGFS file system with striping parameters */ @@ -209,14 +262,14 @@ bool beegfs_createFilePath(char* filepath, mode_t mode, int numTargets, int chun char* dir = dirname(dirTmp); DIR* parentDirS = opendir(dir); if (!parentDirS) { - ERR("Failed to get directory"); + ERRF("Failed to get directory: %s", dir); } else { int parentDirFd = dirfd(parentDirS); if (parentDirFd < 0) { - ERR("Failed to get directory descriptor"); + ERRF("Failed to get directory descriptor: %s", dir); } else { @@ -245,8 +298,9 @@ bool beegfs_createFilePath(char* filepath, mode_t mode, int numTargets, int chun char* filenameTmp = strdup(filepath); char* filename = basename(filepath); - bool isFileCreated = beegfs_createFile(parentDirFd, filename, - mode, numTargets, chunkSize); + bool isFileCreated = beegfs_compatibleFileExists(filepath, numTargets, chunkSize) + || beegfs_createFile(parentDirFd, filename, + mode, numTargets, chunkSize); if (!isFileCreated) ERR("Could not create file"); retVal = true; @@ -267,14 +321,19 @@ bool beegfs_createFilePath(char* filepath, mode_t mode, int numTargets, int chun void *POSIX_Create(char *testFileName, IOR_param_t * param) { int fd_oflag = O_BINARY; + int mode = 0664; int *fd; fd = (int *)malloc(sizeof(int)); if (fd == NULL) ERR("Unable to malloc file descriptor"); + posix_options_t * o = (posix_options_t*) param->backend_options; + if (o->direct_io == TRUE){ + set_o_direct_flag(&fd_oflag); + } - if (param->useO_DIRECT == TRUE) - set_o_direct_flag(&fd_oflag); + if(param->dryRun) + return 0; #ifdef HAVE_LUSTRE_LUSTRE_USER_H /* Add a #define for FASYNC if not available, as it forms part of @@ -290,9 +349,10 @@ void *POSIX_Create(char *testFileName, IOR_param_t * param) if (!param->filePerProc && rank != 0) { MPI_CHECK(MPI_Barrier(testComm), "barrier error"); fd_oflag |= O_RDWR; - *fd = open64(testFileName, fd_oflag, 0664); + *fd = open64(testFileName, fd_oflag, mode); if (*fd < 0) - ERR("open64() failed"); + ERRF("open64(\"%s\", %d, %#o) failed", + testFileName, fd_oflag, mode); } else { struct lov_user_md opts = { 0 }; @@ -307,7 +367,7 @@ void *POSIX_Create(char *testFileName, IOR_param_t * param) fd_oflag |= O_CREAT | O_EXCL | O_RDWR | O_LOV_DELAY_CREATE; - *fd = open64(testFileName, fd_oflag, 0664); + *fd = open64(testFileName, fd_oflag, mode); if (*fd < 0) { fprintf(stdout, "\nUnable to open '%s': %s\n", testFileName, strerror(errno)); @@ -336,7 +396,7 @@ void *POSIX_Create(char *testFileName, IOR_param_t * param) if (beegfs_isOptionSet(param->beegfs_chunkSize) || beegfs_isOptionSet(param->beegfs_numTargets)) { bool result = beegfs_createFilePath(testFileName, - 0664, + mode, param->beegfs_numTargets, param->beegfs_chunkSize); if (result) { @@ -347,9 +407,10 @@ void *POSIX_Create(char *testFileName, IOR_param_t * param) } #endif /* HAVE_BEEGFS_BEEGFS_H */ - *fd = open64(testFileName, fd_oflag, 0664); + *fd = open64(testFileName, fd_oflag, mode); if (*fd < 0) - ERR("open64() failed"); + ERRF("open64(\"%s\", %d, %#o) failed", + testFileName, fd_oflag, mode); #ifdef HAVE_LUSTRE_LUSTRE_USER_H } @@ -357,7 +418,7 @@ void *POSIX_Create(char *testFileName, IOR_param_t * param) if (param->lustre_ignore_locks) { int lustre_ioctl_flags = LL_FILE_IGNORE_LOCK; if (ioctl(*fd, LL_IOC_SETFLAGS, &lustre_ioctl_flags) == -1) - ERR("ioctl(LL_IOC_SETFLAGS) failed"); + ERRF("ioctl(%d, LL_IOC_SETFLAGS, ...) failed", *fd); } #endif /* HAVE_LUSTRE_LUSTRE_USER_H */ @@ -372,6 +433,20 @@ void *POSIX_Create(char *testFileName, IOR_param_t * param) return ((void *)fd); } +/* + * Creat a file through mknod interface. + */ +int POSIX_Mknod(char *testFileName) +{ + int ret; + + ret = mknod(testFileName, S_IFREG | S_IRUSR, 0); + if (ret < 0) + ERR("mknod failed"); + + return ret; +} + /* * Open a file through the POSIX interface. */ @@ -384,13 +459,18 @@ void *POSIX_Open(char *testFileName, IOR_param_t * param) if (fd == NULL) ERR("Unable to malloc file descriptor"); - if (param->useO_DIRECT == TRUE) + posix_options_t * o = (posix_options_t*) param->backend_options; + if (o->direct_io == TRUE) set_o_direct_flag(&fd_oflag); fd_oflag |= O_RDWR; + + if(param->dryRun) + return 0; + *fd = open64(testFileName, fd_oflag); if (*fd < 0) - ERR("open64 failed"); + ERRF("open64(\"%s\", %d) failed", testFileName, fd_oflag); #ifdef HAVE_LUSTRE_LUSTRE_USER_H if (param->lustre_ignore_locks) { @@ -400,7 +480,7 @@ void *POSIX_Open(char *testFileName, IOR_param_t * param) "** Disabling lustre range locking **\n"); } if (ioctl(*fd, LL_IOC_SETFLAGS, &lustre_ioctl_flags) == -1) - ERR("ioctl(LL_IOC_SETFLAGS) failed"); + ERRF("ioctl(%d, LL_IOC_SETFLAGS, ...) failed", *fd); } #endif /* HAVE_LUSTRE_LUSTRE_USER_H */ @@ -424,6 +504,9 @@ static IOR_offset_t POSIX_Xfer(int access, void *file, IOR_size_t * buffer, long long rc; int fd; + if(param->dryRun) + return length; + fd = *(int *)file; #ifdef HAVE_GPFS_FCNTL_H @@ -435,7 +518,7 @@ static IOR_offset_t POSIX_Xfer(int access, void *file, IOR_size_t * buffer, /* seek to offset */ if (lseek64(fd, param->offset, SEEK_SET) == -1) - ERR("lseek64() failed"); + ERRF("lseek64(%d, %lld, SEEK_SET) failed", fd, param->offset); while (remaining > 0) { /* write/read file */ @@ -448,7 +531,8 @@ static IOR_offset_t POSIX_Xfer(int access, void *file, IOR_size_t * buffer, } rc = write(fd, ptr, remaining); if (rc == -1) - ERR("write() failed"); + ERRF("write(%d, %p, %lld) failed", + fd, (void*)ptr, remaining); if (param->fsyncPerWrite == TRUE) POSIX_Fsync(&fd, param); } else { /* READ or CHECK */ @@ -460,9 +544,11 @@ static IOR_offset_t POSIX_Xfer(int access, void *file, IOR_size_t * buffer, } rc = read(fd, ptr, remaining); if (rc == 0) - ERR("read() returned EOF prematurely"); + ERRF("read(%d, %p, %lld) returned EOF prematurely", + fd, (void*)ptr, remaining); if (rc == -1) - ERR("read() failed"); + ERRF("read(%d, %p, %lld) failed", + fd, (void*)ptr, remaining); } if (rc < remaining) { fprintf(stdout, @@ -497,16 +583,28 @@ static IOR_offset_t POSIX_Xfer(int access, void *file, IOR_size_t * buffer, static void POSIX_Fsync(void *fd, IOR_param_t * param) { if (fsync(*(int *)fd) != 0) - EWARN("fsync() failed"); + EWARNF("fsync(%d) failed", *(int *)fd); } + +static void POSIX_Sync(IOR_param_t * param) +{ + int ret = system("sync"); + if (ret != 0){ + FAIL("Error executing the sync command, ensure it exists."); + } +} + + /* * Close a file through the POSIX interface. */ void POSIX_Close(void *fd, IOR_param_t * param) { + if(param->dryRun) + return; if (close(*(int *)fd) != 0) - ERR("close() failed"); + ERRF("close(%d) failed", *(int *)fd); free(fd); } @@ -515,11 +613,12 @@ void POSIX_Close(void *fd, IOR_param_t * param) */ void POSIX_Delete(char *testFileName, IOR_param_t * param) { - char errmsg[256]; - sprintf(errmsg, "[RANK %03d]: unlink() of file \"%s\" failed\n", - rank, testFileName); - if (unlink(testFileName) != 0) - EWARN(errmsg); + if(param->dryRun) + return; + if (unlink(testFileName) != 0){ + EWARNF("[RANK %03d]: unlink() of file \"%s\" failed\n", + rank, testFileName); + } } /* @@ -528,11 +627,13 @@ void POSIX_Delete(char *testFileName, IOR_param_t * param) IOR_offset_t POSIX_GetFileSize(IOR_param_t * test, MPI_Comm testComm, char *testFileName) { + if(test->dryRun) + return 0; struct stat stat_buf; IOR_offset_t aggFileSizeFromStat, tmpMin, tmpMax, tmpSum; if (stat(testFileName, &stat_buf) != 0) { - ERR("stat() failed"); + ERRF("stat(\"%s\", ...) failed", testFileName); } aggFileSizeFromStat = stat_buf.st_size; diff --git a/src/aiori-RADOS.c b/src/aiori-RADOS.c index ad3ad474..b8789d4b 100755 --- a/src/aiori-RADOS.c +++ b/src/aiori-RADOS.c @@ -67,6 +67,7 @@ static option_help * RADOS_options(); /************************** D E C L A R A T I O N S ***************************/ ior_aiori_t rados_aiori = { .name = "RADOS", + .name_legacy = NULL, .create = RADOS_Create, .open = RADOS_Open, .xfer = RADOS_Xfer, @@ -136,9 +137,6 @@ static void *RADOS_Create_Or_Open(char *testFileName, IOR_param_t * param, int c RADOS_Cluster_Init(param); - if (param->useO_DIRECT == TRUE) - WARN("direct I/O mode is not implemented in RADOS\n"); - oid = strdup(testFileName); if (!oid) ERR("unable to allocate RADOS oid"); diff --git a/src/aiori-S3.c b/src/aiori-S3.c index 326b5bca..2c9a9afd 100755 --- a/src/aiori-S3.c +++ b/src/aiori-S3.c @@ -159,6 +159,8 @@ static void S3_Fsync(void*, IOR_param_t*); static IOR_offset_t S3_GetFileSize(IOR_param_t*, MPI_Comm, char*); static void S3_init(); static void S3_finalize(); +static int S3_check_params(IOR_param_t *); + /************************** D E C L A R A T I O N S ***************************/ @@ -167,6 +169,7 @@ static void S3_finalize(); // N:N fails if "transfer-size" != "block-size" (because that requires "append") ior_aiori_t s3_aiori = { .name = "S3", + .name_legacy = NULL, .create = S3_Create, .open = S3_Open, .xfer = S3_Xfer, @@ -176,7 +179,8 @@ ior_aiori_t s3_aiori = { .fsync = S3_Fsync, .get_file_size = S3_GetFileSize, .initialize = S3_init, - .finalize = S3_finalize + .finalize = S3_finalize, + .check_params = S3_check_params }; // "S3", plus EMC-extensions enabled @@ -227,6 +231,22 @@ static void S3_finalize(){ aws_cleanup(); } +static int S3_check_params(IOR_param_t * test){ + /* N:1 and N:N */ + IOR_offset_t NtoN = test->filePerProc; + IOR_offset_t Nto1 = ! NtoN; + IOR_offset_t s = test->segmentCount; + IOR_offset_t t = test->transferSize; + IOR_offset_t b = test->blockSize; + + if (Nto1 && (s != 1) && (b != t)) { + ERR("N:1 (strided) requires xfer-size == block-size"); + return 0; + } + + return 1; +} + /* modelled on similar macros in iordef.h */ #define CURL_ERR(MSG, CURL_ERRNO, PARAM) \ do { \ diff --git a/src/aiori.c b/src/aiori.c index a2e73303..71f99d1c 100644 --- a/src/aiori.c +++ b/src/aiori.c @@ -12,6 +12,17 @@ * \******************************************************************************/ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include + +#if defined(HAVE_STRINGS_H) +#include +#endif + #include "aiori.h" #if defined(HAVE_SYS_STATVFS_H) @@ -30,6 +41,10 @@ ior_aiori_t *available_aiori[] = { #ifdef USE_POSIX_AIORI &posix_aiori, +#endif +#ifdef USE_DAOS_AIORI + &daos_aiori, + &dfs_aiori, #endif & dummy_aiori, #ifdef USE_HDF5_AIORI @@ -57,39 +72,75 @@ ior_aiori_t *available_aiori[] = { #endif #ifdef USE_RADOS_AIORI &rados_aiori, +#endif +#ifdef USE_CEPHFS_AIORI + &cephfs_aiori, +#endif +#ifdef USE_GFARM_AIORI + &gfarm_aiori, #endif NULL }; -void airoi_parse_options(int argc, char ** argv, option_help * global_options){ - int airoi_c = aiori_count(); - options_all opt; - opt.module_count = airoi_c + 1; - opt.modules = malloc(sizeof(option_module) * (airoi_c + 1)); - opt.modules[0].prefix = NULL; - opt.modules[0].options = global_options; - ior_aiori_t **tmp = available_aiori; - for (int i=1; *tmp != NULL; ++tmp, i++) { - opt.modules[i].prefix = (*tmp)->name; - if((*tmp)->get_options != NULL){ - opt.modules[i].options = (*tmp)->get_options(); - }else{ - opt.modules[i].options = NULL; - } +void * airoi_update_module_options(const ior_aiori_t * backend, options_all_t * opt){ + if (backend->get_options == NULL) + return NULL; + char * name = backend->name; + ior_aiori_t **tmp = available_aiori; + for (int i=1; *tmp != NULL; ++tmp, i++) { + if (strcmp(opt->modules[i].prefix, name) == 0){ + opt->modules[i].options = (*tmp)->get_options(& opt->modules[i].defaults, opt->modules[i].defaults); + return opt->modules[i].defaults; } - option_parse(argc, argv, &opt); - free(opt.modules); + } + return NULL; } -void aiori_supported_apis(char * APIs){ +options_all_t * airoi_create_all_module_options(option_help * global_options){ + int airoi_c = aiori_count(); + options_all_t * opt = malloc(sizeof(options_all_t)); + opt->module_count = airoi_c + 1; + opt->modules = malloc(sizeof(option_module) * (airoi_c + 1)); + opt->modules[0].prefix = NULL; + opt->modules[0].options = global_options; ior_aiori_t **tmp = available_aiori; - if(*tmp != NULL){ - APIs += sprintf(APIs, "%s", (*tmp)->name); - tmp++; - for (; *tmp != NULL; ++tmp) { - APIs += sprintf(APIs, "|%s", (*tmp)->name); + for (int i=1; *tmp != NULL; ++tmp, i++) { + opt->modules[i].prefix = (*tmp)->name; + if((*tmp)->get_options != NULL){ + opt->modules[i].options = (*tmp)->get_options(& opt->modules[i].defaults, NULL); + }else{ + opt->modules[i].options = NULL; } } + return opt; +} + +void aiori_supported_apis(char * APIs, char * APIs_legacy, enum bench_type type) +{ + ior_aiori_t **tmp = available_aiori; + char delimiter = ' '; + + while (*tmp != NULL) + { + if ((type == MDTEST) && !(*tmp)->enable_mdtest) + { + tmp++; + continue; + } + + if (delimiter == ' ') + { + APIs += sprintf(APIs, "%s", (*tmp)->name); + delimiter = '|'; + } + else + APIs += sprintf(APIs, "%c%s", delimiter, (*tmp)->name); + + if ((*tmp)->name_legacy != NULL) + APIs_legacy += sprintf(APIs_legacy, "%c%s", + delimiter, (*tmp)->name_legacy); + tmp++; + } } /** @@ -151,73 +202,141 @@ char* aiori_get_version() return ""; } -static int is_initialized = FALSE; +static bool is_initialized = false; -void aiori_initialize(){ - if (is_initialized) return; - is_initialized = TRUE; +static void init_or_fini_internal(const ior_aiori_t *test_backend, + const bool init) +{ + if (init) + { + if (test_backend->initialize) + test_backend->initialize(); + } + else + { + if (test_backend->finalize) + test_backend->finalize(); + } +} - /* Sanity check, we were compiled with SOME backend, right? */ - if (0 == aiori_count ()) { - ERR("No IO backends compiled into aiori. " - "Run 'configure --with-', and recompile."); - } +static void init_or_fini(IOR_test_t *tests, const bool init) +{ + /* Sanity check, we were compiled with SOME backend, right? */ + if (0 == aiori_count ()) { + ERR("No IO backends compiled into aiori. " + "Run 'configure --with-', and recompile."); + } - for (ior_aiori_t **tmp = available_aiori ; *tmp != NULL; ++tmp) { - if((*tmp)->initialize){ - (*tmp)->initialize(); - } - } + /* Pointer to the initialize of finalize function */ + + + /* if tests is NULL, initialize or finalize all available backends */ + if (tests == NULL) + { + for (ior_aiori_t **tmp = available_aiori ; *tmp != NULL; ++tmp) + init_or_fini_internal(*tmp, init); + + return; + } + + for (IOR_test_t *t = tests; t != NULL; t = t->next) + { + IOR_param_t *params = &t->params; + assert(params != NULL); + + const ior_aiori_t *test_backend = params->backend; + assert(test_backend != NULL); + + init_or_fini_internal(test_backend, init); + } } -void aiori_finalize(){ - if (! is_initialized) return; - is_initialized = FALSE; - for (ior_aiori_t **tmp = available_aiori ; *tmp != NULL; ++tmp) { - if((*tmp)->finalize){ - (*tmp)->finalize(); - } - } +/** + * Initialize IO backends. + * + * @param[in] tests Pointers to the first test + * + * This function initializes all backends which will be used. If tests is NULL + * all available backends are initialized. + */ +void aiori_initialize(IOR_test_t *tests) +{ + if (is_initialized) + return; + + init_or_fini(tests, true); + + is_initialized = true; +} + +/** + * Finalize IO backends. + * + * @param[in] tests Pointers to the first test + * + * This function finalizes all backends which were used. If tests is NULL + * all available backends are finialized. + */ +void aiori_finalize(IOR_test_t *tests) +{ + if (!is_initialized) + return; + + is_initialized = false; + + init_or_fini(tests, false); } const ior_aiori_t *aiori_select (const char *api) { char warn_str[256] = {0}; for (ior_aiori_t **tmp = available_aiori ; *tmp != NULL; ++tmp) { - if (NULL == api || strcasecmp(api, (*tmp)->name) == 0) { - if (NULL == (*tmp)->statfs) { - (*tmp)->statfs = aiori_posix_statfs; - snprintf(warn_str, 256, "assuming POSIX-based backend for" - " %s statfs call", api); - WARN(warn_str); - } - if (NULL == (*tmp)->mkdir) { - (*tmp)->mkdir = aiori_posix_mkdir; - snprintf(warn_str, 256, "assuming POSIX-based backend for" - " %s mkdir call", api); - WARN(warn_str); - } - if (NULL == (*tmp)->rmdir) { - (*tmp)->rmdir = aiori_posix_rmdir; - snprintf(warn_str, 256, "assuming POSIX-based backend for" - " %s rmdir call", api); - WARN(warn_str); - } - if (NULL == (*tmp)->access) { - (*tmp)->access = aiori_posix_access; - snprintf(warn_str, 256, "assuming POSIX-based backend for" - " %s access call", api); - WARN(warn_str); - } - if (NULL == (*tmp)->stat) { - (*tmp)->stat = aiori_posix_stat; - snprintf(warn_str, 256, "assuming POSIX-based backend for" - " %s stat call", api); - WARN(warn_str); - } - return *tmp; + char *name_leg = (*tmp)->name_legacy; + if (NULL != api && + (strcasecmp(api, (*tmp)->name) != 0) && + (name_leg == NULL || strcasecmp(api, name_leg) != 0)) + continue; + + if (name_leg != NULL && strcasecmp(api, name_leg) == 0) + { + snprintf(warn_str, 256, "%s backend is deprecated use %s" + " instead", api, (*tmp)->name); + WARN(warn_str); } + + if (NULL == (*tmp)->statfs) { + (*tmp)->statfs = aiori_posix_statfs; + snprintf(warn_str, 256, "assuming POSIX-based backend for" + " %s statfs call", api); + WARN(warn_str); + } + if (NULL == (*tmp)->mkdir) { + (*tmp)->mkdir = aiori_posix_mkdir; + snprintf(warn_str, 256, "assuming POSIX-based backend for" + " %s mkdir call", api); + WARN(warn_str); + } + if (NULL == (*tmp)->rmdir) { + (*tmp)->rmdir = aiori_posix_rmdir; + snprintf(warn_str, 256, "assuming POSIX-based backend for" + " %s rmdir call", api); + WARN(warn_str); + } + if (NULL == (*tmp)->access) { + (*tmp)->access = aiori_posix_access; + snprintf(warn_str, 256, "assuming POSIX-based backend for" + " %s access call", api); + WARN(warn_str); + } + if (NULL == (*tmp)->stat) { + (*tmp)->stat = aiori_posix_stat; + snprintf(warn_str, 256, "assuming POSIX-based backend for" + " %s stat call", api); + WARN(warn_str); + } + + return *tmp; } return NULL; diff --git a/src/aiori.h b/src/aiori.h index d6c1d0ef..4d416c21 100755 --- a/src/aiori.h +++ b/src/aiori.h @@ -22,6 +22,7 @@ #endif /* not MPI_FILE_NULL */ #include +#include #include "ior.h" #include "iordef.h" /* IOR Definitions */ @@ -65,13 +66,15 @@ typedef struct ior_aiori_statfs { typedef struct ior_aiori { char *name; + char *name_legacy; void *(*create)(char *, IOR_param_t *); + int (*mknod)(char *); void *(*open)(char *, IOR_param_t *); IOR_offset_t (*xfer)(int, void *, IOR_size_t *, IOR_offset_t, IOR_param_t *); void (*close)(void *, IOR_param_t *); void (*delete)(char *, IOR_param_t *); - char* (*get_version)(); + char* (*get_version)(void); void (*fsync)(void *, IOR_param_t *); IOR_offset_t (*get_file_size)(IOR_param_t *, MPI_Comm, char *); int (*statfs) (const char *, ior_aiori_statfs_t *, IOR_param_t * param); @@ -79,12 +82,22 @@ typedef struct ior_aiori { int (*rmdir) (const char *path, IOR_param_t * param); int (*access) (const char *path, int mode, IOR_param_t * param); int (*stat) (const char *path, struct stat *buf, IOR_param_t * param); - void (*initialize)(); /* called once per program before MPI is started */ - void (*finalize)(); /* called once per program after MPI is shutdown */ - option_help * (*get_options)(); + void (*initialize)(void); /* called once per program before MPI is started */ + void (*finalize)(void); /* called once per program after MPI is shutdown */ + option_help * (*get_options)(void ** init_backend_options, void* init_values); /* initializes the backend options as well and returns the pointer to the option help structure */ + bool enable_mdtest; + int (*check_params)(IOR_param_t *); /* check if the provided parameters for the given test and the module options are correct, if they aren't print a message and exit(1) or return 1*/ + void (*sync)(IOR_param_t * ); /* synchronize every pending operation for this storage */ } ior_aiori_t; +enum bench_type { + IOR, + MDTEST +}; + extern ior_aiori_t dummy_aiori; +extern ior_aiori_t daos_aiori; +extern ior_aiori_t dfs_aiori; extern ior_aiori_t hdf5_aiori; extern ior_aiori_t hdfs_aiori; extern ior_aiori_t ime_aiori; @@ -96,17 +109,22 @@ extern ior_aiori_t s3_aiori; extern ior_aiori_t s3_plus_aiori; extern ior_aiori_t s3_emc_aiori; extern ior_aiori_t rados_aiori; +extern ior_aiori_t cephfs_aiori; +extern ior_aiori_t gfarm_aiori; -void aiori_initialize(); -void aiori_finalize(); +void aiori_initialize(IOR_test_t * tests); +void aiori_finalize(IOR_test_t * tests); const ior_aiori_t *aiori_select (const char *api); int aiori_count (void); -void aiori_supported_apis(char * APIs); -void airoi_parse_options(int argc, char ** argv, option_help * global_options); +void aiori_supported_apis(char * APIs, char * APIs_legacy, enum bench_type type); +options_all_t * airoi_create_all_module_options(option_help * global_options); + +void * airoi_update_module_options(const ior_aiori_t * backend, options_all_t * module_defaults); + const char *aiori_default (void); /* some generic POSIX-based backend calls */ -char * aiori_get_version(); +char * aiori_get_version (void); int aiori_posix_statfs (const char *path, ior_aiori_statfs_t *stat_buf, IOR_param_t * param); int aiori_posix_mkdir (const char *path, mode_t mode, IOR_param_t * param); int aiori_posix_rmdir (const char *path, IOR_param_t * param); @@ -114,10 +132,13 @@ int aiori_posix_access (const char *path, int mode, IOR_param_t * param); int aiori_posix_stat (const char *path, struct stat *buf, IOR_param_t * param); void *POSIX_Create(char *testFileName, IOR_param_t * param); +int POSIX_Mknod(char *testFileName); void *POSIX_Open(char *testFileName, IOR_param_t * param); IOR_offset_t POSIX_GetFileSize(IOR_param_t * test, MPI_Comm testComm, char *testFileName); void POSIX_Delete(char *testFileName, IOR_param_t * param); void POSIX_Close(void *fd, IOR_param_t * param); +option_help * POSIX_options(void ** init_backend_options, void * init_values); + /* NOTE: these 3 MPI-IO functions are exported for reuse by HDF5/PNetCDF */ void MPIIO_Delete(char *testFileName, IOR_param_t * param); diff --git a/src/ior-internal.h b/src/ior-internal.h index 11bbb8d1..9cc8406b 100644 --- a/src/ior-internal.h +++ b/src/ior-internal.h @@ -6,7 +6,6 @@ #define _IOR_INTERNAL_H /* Part of ior-output.c */ -void PrintEarlyHeader(); void PrintHeader(int argc, char **argv); void ShowTestStart(IOR_param_t *params); void ShowTestEnd(IOR_test_t *tptr); @@ -21,11 +20,15 @@ void PrintLongSummaryOneTest(IOR_test_t *test); void DisplayFreespace(IOR_param_t * test); void GetTestFileName(char *, IOR_param_t *); void PrintRemoveTiming(double start, double finish, int rep); -void PrintReducedResult(IOR_test_t *test, int access, double bw, double *diff_subset, double totalTime, int rep); +void PrintReducedResult(IOR_test_t *test, int access, double bw, double iops, double latency, + double *diff_subset, double totalTime, int rep); void PrintTestEnds(); void PrintTableHeader(); /* End of ior-output */ +IOR_offset_t *GetOffsetArraySequential(IOR_param_t * test, int pretendRank); +IOR_offset_t *GetOffsetArrayRandom(IOR_param_t * test, int pretendRank, int access); + struct results { double min; double max; diff --git a/src/ior-output.c b/src/ior-output.c index 385e63a7..01136fbe 100644 --- a/src/ior-output.c +++ b/src/ior-output.c @@ -11,8 +11,6 @@ extern char **environ; -static struct results *bw_values(int reps, IOR_results_t * measured, int offset, double *vals); -static struct results *ops_values(int reps, IOR_results_t * measured, int offset, IOR_offset_t transfer_size, double *vals); static double mean_of_array_of_doubles(double *values, int len); static void PPDouble(int leftjustify, double number, char *append); static void PrintNextToken(); @@ -20,8 +18,8 @@ static void PrintNextToken(); void PrintTableHeader(){ if (outputFormat == OUTPUT_DEFAULT){ fprintf(out_resultfile, "\n"); - fprintf(out_resultfile, "access bw(MiB/s) block(KiB) xfer(KiB) open(s) wr/rd(s) close(s) total(s) iter\n"); - fprintf(out_resultfile, "------ --------- ---------- --------- -------- -------- -------- -------- ----\n"); + fprintf(out_resultfile, "access bw(MiB/s) IOPS Latency(s) block(KiB) xfer(KiB) open(s) wr/rd(s) close(s) total(s) iter\n"); + fprintf(out_resultfile, "------ --------- ---- ---------- ---------- --------- -------- -------- -------- -------- ----\n"); } } @@ -153,6 +151,9 @@ static void PrintNamedArrayStart(char * key){ } static void PrintEndSection(){ + if (rank != 0) + return; + indent--; if(outputFormat == OUTPUT_JSON){ fprintf(out_resultfile, "\n"); @@ -163,6 +164,8 @@ static void PrintEndSection(){ } static void PrintArrayStart(){ + if (rank != 0) + return; PrintNextToken(); needNextToken = 0; if(outputFormat == OUTPUT_JSON){ @@ -171,6 +174,8 @@ static void PrintArrayStart(){ } static void PrintArrayNamedStart(char * key){ + if (rank != 0) + return; PrintNextToken(); needNextToken = 0; if(outputFormat == OUTPUT_JSON){ @@ -179,6 +184,9 @@ static void PrintArrayNamedStart(char * key){ } static void PrintArrayEnd(){ + if (rank != 0) + return; + indent--; if(outputFormat == OUTPUT_JSON){ fprintf(out_resultfile, "]\n"); @@ -187,10 +195,14 @@ static void PrintArrayEnd(){ } void PrintRepeatEnd(){ + if (rank != 0) + return; PrintArrayEnd(); } void PrintRepeatStart(){ + if (rank != 0) + return; if( outputFormat == OUTPUT_DEFAULT){ return; } @@ -207,10 +219,13 @@ void PrintTestEnds(){ PrintEndSection(); } -void PrintReducedResult(IOR_test_t *test, int access, double bw, double *diff_subset, double totalTime, int rep){ +void PrintReducedResult(IOR_test_t *test, int access, double bw, double iops, double latency, + double *diff_subset, double totalTime, int rep){ if (outputFormat == OUTPUT_DEFAULT){ fprintf(out_resultfile, "%-10s", access == WRITE ? "write" : "read"); PPDouble(1, bw / MEBIBYTE, " "); + PPDouble(1, iops, " "); + PPDouble(1, latency, " "); PPDouble(1, (double)test->params.blockSize / KIBIBYTE, " "); PPDouble(1, (double)test->params.transferSize / KIBIBYTE, " "); PPDouble(1, diff_subset[0], " "); @@ -224,6 +239,8 @@ void PrintReducedResult(IOR_test_t *test, int access, double bw, double *diff_su PrintKeyValDouble("bwMiB", bw / MEBIBYTE); PrintKeyValDouble("blockKiB", (double)test->params.blockSize / KIBIBYTE); PrintKeyValDouble("xferKiB", (double)test->params.transferSize / KIBIBYTE); + PrintKeyValDouble("iops", iops); + PrintKeyValDouble("latency", latency); PrintKeyValDouble("openTime", diff_subset[0]); PrintKeyValDouble("wrRdTime", diff_subset[1]); PrintKeyValDouble("closeTime", diff_subset[2]); @@ -233,20 +250,6 @@ void PrintReducedResult(IOR_test_t *test, int access, double bw, double *diff_su fflush(out_resultfile); } - -/* - * Message to print immediately after MPI_Init so we know that - * ior has started. - */ -void PrintEarlyHeader() -{ - if (rank != 0) - return; - - fprintf(out_resultfile, "IOR-" META_VERSION ": MPI Coordinated Test of Parallel I/O\n"); - fflush(out_resultfile); -} - void PrintHeader(int argc, char **argv) { struct utsname unamebuf; @@ -254,8 +257,13 @@ void PrintHeader(int argc, char **argv) if (rank != 0) return; - PrintStartSection(); + PrintStartSection(); + if (outputFormat != OUTPUT_DEFAULT){ + PrintKeyVal("Version", META_VERSION); + }else{ + fprintf(out_resultfile, "IOR-" META_VERSION ": MPI Coordinated Test of Parallel I/O\n"); + } PrintKeyVal("Began", CurrentTimeString()); PrintKeyValStart("Command line"); fprintf(out_resultfile, "%s", argv[0]); @@ -315,7 +323,8 @@ void ShowTestStart(IOR_param_t *test) PrintKeyValInt("TestID", test->id); PrintKeyVal("StartTime", CurrentTimeString()); /* if pvfs2:, then skip */ - if (Regex(test->testFileName, "^[a-z][a-z].*:") == 0) { + if (strcasecmp(test->api, "DFS") && + Regex(test->testFileName, "^[a-z][a-z].*:") == 0) { DisplayFreespace(test); } @@ -335,10 +344,11 @@ void ShowTestStart(IOR_param_t *test) PrintKeyValInt("outlierThreshold", test->outlierThreshold); PrintKeyVal("options", test->options); - PrintKeyValInt("nodes", test->nodes); + PrintKeyValInt("dryRun", test->dryRun); + PrintKeyValInt("nodes", test->numNodes); PrintKeyValInt("memoryPerTask", (unsigned long) test->memoryPerTask); PrintKeyValInt("memoryPerNode", (unsigned long) test->memoryPerNode); - PrintKeyValInt("tasksPerNode", tasksPerNode); + PrintKeyValInt("tasksPerNode", test->numTasksOnNode0); PrintKeyValInt("repetitions", test->repetitions); PrintKeyValInt("multiFile", test->multiFile); PrintKeyValInt("interTestDelay", test->interTestDelay); @@ -363,7 +373,6 @@ void ShowTestStart(IOR_param_t *test) PrintKeyValInt("setAlignment", test->setAlignment); PrintKeyValInt("storeFileOffset", test->storeFileOffset); PrintKeyValInt("useSharedFilePointer", test->useSharedFilePointer); - PrintKeyValInt("useO_DIRECT", test->useO_DIRECT); PrintKeyValInt("useStridedDatatype", test->useStridedDatatype); PrintKeyValInt("keepFile", test->keepFile); PrintKeyValInt("keepFileWithError", test->keepFileWithError); @@ -387,10 +396,12 @@ void ShowTestStart(IOR_param_t *test) void ShowTestEnd(IOR_test_t *tptr){ if(rank == 0 && tptr->params.stoneWallingWearOut){ + + size_t pairs_accessed = tptr->results->write.pairs_accessed; if (tptr->params.stoneWallingStatusFile){ - StoreStoneWallingIterations(tptr->params.stoneWallingStatusFile, tptr->results->pairs_accessed); + StoreStoneWallingIterations(tptr->params.stoneWallingStatusFile, pairs_accessed); }else{ - fprintf(out_logfile, "Pairs deadlineForStonewallingaccessed: %lld\n", (long long) tptr->results->pairs_accessed); + fprintf(out_logfile, "Pairs deadlineForStonewallingaccessed: %ld\n", pairs_accessed); } } PrintEndSection(); @@ -425,8 +436,9 @@ void ShowSetup(IOR_param_t *params) PrintKeyValInt("task offset", params->taskPerNodeOffset); PrintKeyValInt("reorder random seed", params->reorderTasksRandomSeed); } + PrintKeyValInt("nodes", params->numNodes); PrintKeyValInt("tasks", params->numTasks); - PrintKeyValInt("clients per node", params->tasksPerNode); + PrintKeyValInt("clients per node", params->numTasksOnNode0); if (params->memoryPerTask != 0){ PrintKeyVal("memoryPerTask", HumanReadable(params->memoryPerTask, BASE_TWO)); } @@ -437,12 +449,15 @@ void ShowSetup(IOR_param_t *params) PrintKeyVal("xfersize", HumanReadable(params->transferSize, BASE_TWO)); PrintKeyVal("blocksize", HumanReadable(params->blockSize, BASE_TWO)); PrintKeyVal("aggregate filesize", HumanReadable(params->expectedAggFileSize, BASE_TWO)); + if(params->dryRun){ + PrintKeyValInt("dryRun", params->dryRun); + } #ifdef HAVE_LUSTRE_LUSTRE_USER_H if (params->lustre_set_striping) { PrintKeyVal("Lustre stripe size", ((params->lustre_stripe_size == 0) ? "Use default" : HumanReadable(params->lustre_stripe_size, BASE_TWO))); - PrintKeyVal("stripe count", (params->lustre_stripe_count == 0 ? "Use default" : HumanReadable(params->lustre_stripe_count, BASE_TWO))); + PrintKeyValInt("Lustre stripe count", params->lustre_stripe_count); } #endif /* HAVE_LUSTRE_LUSTRE_USER_H */ if (params->deadlineForStonewalling > 0) { @@ -456,14 +471,63 @@ void ShowSetup(IOR_param_t *params) fflush(out_resultfile); } +static struct results *bw_ops_values(const int reps, IOR_results_t *measured, + IOR_offset_t transfer_size, + const double *vals, const int access) +{ + struct results *r; + int i; + + r = (struct results *)malloc(sizeof(struct results) + + (reps * sizeof(double))); + if (r == NULL) + ERR("malloc failed"); + r->val = (double *)&r[1]; + for (i = 0; i < reps; i++, measured++) { + IOR_point_t *point = (access == WRITE) ? &measured->write : + &measured->read; + + r->val[i] = ((double) (point->aggFileSizeForBW)) + / transfer_size / vals[i]; + + if (i == 0) { + r->min = r->val[i]; + r->max = r->val[i]; + r->sum = 0.0; + } + r->min = MIN(r->min, r->val[i]); + r->max = MAX(r->max, r->val[i]); + r->sum += r->val[i]; + } + r->mean = r->sum / reps; + r->var = 0.0; + for (i = 0; i < reps; i++) { + r->var += pow((r->mean - r->val[i]), 2); + } + r->var = r->var / reps; + r->sd = sqrt(r->var); + + return r; +} + +static struct results *bw_values(const int reps, IOR_results_t *measured, + const double *vals, const int access) +{ + return bw_ops_values(reps, measured, 1, vals, access); +} + +static struct results *ops_values(const int reps, IOR_results_t *measured, + IOR_offset_t transfer_size, + const double *vals, const int access) +{ + return bw_ops_values(reps, measured, transfer_size, vals, access); +} /* * Summarize results - * - * operation is typically "write" or "read" */ -static void PrintLongSummaryOneOperation(IOR_test_t *test, int times_offset, char *operation) +static void PrintLongSummaryOneOperation(IOR_test_t *test, const int access) { IOR_param_t *params = &test->params; IOR_results_t *results = test->results; @@ -477,15 +541,25 @@ static void PrintLongSummaryOneOperation(IOR_test_t *test, int times_offset, cha reps = params->repetitions; double * times = malloc(sizeof(double)* reps); + long long stonewall_avg_data_accessed = 0; + double stonewall_time = 0; for(int i=0; i < reps; i++){ - times[i] = *(double*)((char*) & results[i] + times_offset); + IOR_point_t *point = (access == WRITE) ? &results[i].write : + &results[i].read; + times[i] = point->time; + stonewall_time += point->stonewall_time; + stonewall_avg_data_accessed += point->stonewall_avg_data_accessed; } - bw = bw_values(reps, results, offsetof(IOR_results_t, aggFileSizeForBW), times); - ops = ops_values(reps, results, offsetof(IOR_results_t, aggFileSizeForBW), params->transferSize, times); + bw = bw_values(reps, results, times, access); + ops = ops_values(reps, results, params->transferSize, times, access); + + IOR_point_t *point = (access == WRITE) ? &results[0].write : + &results[0].read; + if(outputFormat == OUTPUT_DEFAULT){ - fprintf(out_resultfile, "%-9s ", operation); + fprintf(out_resultfile, "%-9s ", access == WRITE ? "write" : "read"); fprintf(out_resultfile, "%10.2f ", bw->max / MEBIBYTE); fprintf(out_resultfile, "%10.2f ", bw->min / MEBIBYTE); fprintf(out_resultfile, "%10.2f ", bw->mean / MEBIBYTE); @@ -495,9 +569,16 @@ static void PrintLongSummaryOneOperation(IOR_test_t *test, int times_offset, cha fprintf(out_resultfile, "%10.2f ", ops->mean); fprintf(out_resultfile, "%10.2f ", ops->sd); fprintf(out_resultfile, "%10.5f ", mean_of_array_of_doubles(times, reps)); + if(test->params.stoneWallingWearOut){ + fprintf(out_resultfile, "%10.2f ", stonewall_time / reps); + fprintf(out_resultfile, "%13.2f ", stonewall_avg_data_accessed / stonewall_time / MEBIBYTE); + }else{ + fprintf(out_resultfile, "%10s ", "NA"); + fprintf(out_resultfile, "%13s ", "NA"); + } fprintf(out_resultfile, "%5d ", params->id); fprintf(out_resultfile, "%6d ", params->numTasks); - fprintf(out_resultfile, "%3d ", params->tasksPerNode); + fprintf(out_resultfile, "%3d ", params->numTasksOnNode0); fprintf(out_resultfile, "%4d ", params->repetitions); fprintf(out_resultfile, "%3d ", params->filePerProc); fprintf(out_resultfile, "%5d ", params->reorderTasks); @@ -507,13 +588,13 @@ static void PrintLongSummaryOneOperation(IOR_test_t *test, int times_offset, cha fprintf(out_resultfile, "%6lld ", params->segmentCount); fprintf(out_resultfile, "%8lld ", params->blockSize); fprintf(out_resultfile, "%8lld ", params->transferSize); - fprintf(out_resultfile, "%9.1f ", (float)results[0].aggFileSizeForBW / MEBIBYTE); + fprintf(out_resultfile, "%9.1f ", (float)point->aggFileSizeForBW / MEBIBYTE); fprintf(out_resultfile, "%3s ", params->api); fprintf(out_resultfile, "%6d", params->referenceNumber); fprintf(out_resultfile, "\n"); }else if (outputFormat == OUTPUT_JSON){ PrintStartSection(); - PrintKeyVal("operation", operation); + PrintKeyVal("operation", access == WRITE ? "write" : "read"); PrintKeyVal("API", params->api); PrintKeyValInt("TestID", params->id); PrintKeyValInt("ReferenceNumber", params->referenceNumber); @@ -521,7 +602,7 @@ static void PrintLongSummaryOneOperation(IOR_test_t *test, int times_offset, cha PrintKeyValInt("blockSize", params->blockSize); PrintKeyValInt("transferSize", params->transferSize); PrintKeyValInt("numTasks", params->numTasks); - PrintKeyValInt("tasksPerNode", params->tasksPerNode); + PrintKeyValInt("tasksPerNode", params->numTasksOnNode0); PrintKeyValInt("repetitions", params->repetitions); PrintKeyValInt("filePerProc", params->filePerProc); PrintKeyValInt("reorderTasks", params->reorderTasks); @@ -540,7 +621,11 @@ static void PrintLongSummaryOneOperation(IOR_test_t *test, int times_offset, cha PrintKeyValDouble("OPsMean", ops->mean); PrintKeyValDouble("OPsSD", ops->sd); PrintKeyValDouble("MeanTime", mean_of_array_of_doubles(times, reps)); - PrintKeyValDouble("xsizeMiB", (double) results[0].aggFileSizeForBW / MEBIBYTE); + if(test->params.stoneWallingWearOut){ + PrintKeyValDouble("StoneWallTime", stonewall_time / reps); + PrintKeyValDouble("StoneWallbwMeanMIB", stonewall_avg_data_accessed / stonewall_time / MEBIBYTE); + } + PrintKeyValDouble("xsizeMiB", (double) point->aggFileSizeForBW / MEBIBYTE); PrintEndSection(); }else if (outputFormat == OUTPUT_CSV){ @@ -558,9 +643,9 @@ void PrintLongSummaryOneTest(IOR_test_t *test) IOR_param_t *params = &test->params; if (params->writeFile) - PrintLongSummaryOneOperation(test, offsetof(IOR_results_t, writeTime), "write"); + PrintLongSummaryOneOperation(test, WRITE); if (params->readFile) - PrintLongSummaryOneOperation(test, offsetof(IOR_results_t, readTime), "read"); + PrintLongSummaryOneOperation(test, READ); } void PrintLongSummaryHeader() @@ -572,10 +657,10 @@ void PrintLongSummaryHeader() } fprintf(out_resultfile, "\n"); - fprintf(out_resultfile, "%-9s %10s %10s %10s %10s %10s %10s %10s %10s %10s", + fprintf(out_resultfile, "%-9s %10s %10s %10s %10s %10s %10s %10s %10s %10s %10s %13s", "Operation", "Max(MiB)", "Min(MiB)", "Mean(MiB)", "StdDev", "Max(OPs)", "Min(OPs)", "Mean(OPs)", "StdDev", - "Mean(s)"); + "Mean(s)", "Stonewall(s)", "Stonewall(MiB)"); fprintf(out_resultfile, " Test# #Tasks tPN reps fPP reord reordoff reordrand seed" " segcnt "); fprintf(out_resultfile, "%8s %8s %9s %5s", " blksiz", "xsize","aggs(MiB)", "API"); @@ -626,9 +711,9 @@ void PrintShortSummary(IOR_test_t * test) reps = params->repetitions; for (i = 0; i < reps; i++) { - bw = (double)results[i].aggFileSizeForBW / results[i].writeTime; + bw = (double)results[i].write.aggFileSizeForBW / results[i].write.time; max_write_bw = MAX(bw, max_write_bw); - bw = (double)results[i].aggFileSizeForBW / results[i].readTime; + bw = (double)results[i].read.aggFileSizeForBW / results[i].read.time; max_read_bw = MAX(bw, max_read_bw); } @@ -693,7 +778,7 @@ void PrintRemoveTiming(double start, double finish, int rep) return; if (outputFormat == OUTPUT_DEFAULT){ - fprintf(out_resultfile, "remove - - - - - - "); + fprintf(out_resultfile, "remove - - - - - - - - "); PPDouble(1, finish-start, " "); fprintf(out_resultfile, "%-4d\n", rep); }else if (outputFormat == OUTPUT_JSON){ @@ -735,78 +820,6 @@ static void PPDouble(int leftjustify, double number, char *append) fprintf(out_resultfile, format, number, append); } - - -static struct results *bw_values(int reps, IOR_results_t * measured, int offset, double *vals) -{ - struct results *r; - int i; - - r = (struct results *) malloc(sizeof(struct results) + (reps * sizeof(double))); - if (r == NULL) - ERR("malloc failed"); - r->val = (double *)&r[1]; - - for (i = 0; i < reps; i++, measured++) { - - r->val[i] = (double) *((IOR_offset_t*) ((char*)measured + offset)) / vals[i]; - if (i == 0) { - r->min = r->val[i]; - r->max = r->val[i]; - r->sum = 0.0; - } - r->min = MIN(r->min, r->val[i]); - r->max = MAX(r->max, r->val[i]); - r->sum += r->val[i]; - } - r->mean = r->sum / reps; - r->var = 0.0; - for (i = 0; i < reps; i++) { - r->var += pow((r->mean - r->val[i]), 2); - } - r->var = r->var / reps; - r->sd = sqrt(r->var); - - return r; -} - -static struct results *ops_values(int reps, IOR_results_t * measured, int offset, - IOR_offset_t transfer_size, - double *vals) -{ - struct results *r; - int i; - - r = (struct results *)malloc(sizeof(struct results) - + (reps * sizeof(double))); - if (r == NULL) - ERR("malloc failed"); - r->val = (double *)&r[1]; - - for (i = 0; i < reps; i++, measured++) { - r->val[i] = (double) *((IOR_offset_t*) ((char*)measured + offset)) - / transfer_size / vals[i]; - if (i == 0) { - r->min = r->val[i]; - r->max = r->val[i]; - r->sum = 0.0; - } - r->min = MIN(r->min, r->val[i]); - r->max = MAX(r->max, r->val[i]); - r->sum += r->val[i]; - } - r->mean = r->sum / reps; - r->var = 0.0; - for (i = 0; i < reps; i++) { - r->var += pow((r->mean - r->val[i]), 2); - } - r->var = r->var / reps; - r->sd = sqrt(r->var); - - return r; -} - - static double mean_of_array_of_doubles(double *values, int len) { double tot = 0.0; diff --git a/src/ior.c b/src/ior.c index 4664bae7..361a9a4c 100755 --- a/src/ior.c +++ b/src/ior.c @@ -20,6 +20,11 @@ #include #include #include + +#if defined(HAVE_STRINGS_H) +#include +#endif + #include /* struct stat */ #include @@ -36,6 +41,7 @@ #include "utilities.h" #include "parse_options.h" +#define IOR_NB_TIMERS 6 /* file scope globals */ extern char **environ; @@ -48,8 +54,9 @@ static char **ParseFileName(char *, int *); static void InitTests(IOR_test_t * , MPI_Comm); static void TestIoSys(IOR_test_t *); static void ValidateTests(IOR_param_t *); -static IOR_offset_t WriteOrRead(IOR_param_t * test, IOR_results_t * results, void *fd, int access, IOR_io_buffers* ioBuffers); -static void WriteTimes(IOR_param_t *, double **, int, int); +static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results, + void *fd, const int access, + IOR_io_buffers *ioBuffers); IOR_test_t * ior_run(int argc, char **argv, MPI_Comm world_com, FILE * world_out){ IOR_test_t *tests_head; @@ -58,9 +65,7 @@ IOR_test_t * ior_run(int argc, char **argv, MPI_Comm world_com, FILE * world_out out_resultfile = world_out; mpi_comm_world = world_com; - MPI_CHECK(MPI_Comm_size(mpi_comm_world, &numTasksWorld), "cannot get number of tasks"); MPI_CHECK(MPI_Comm_rank(mpi_comm_world, &rank), "cannot get rank"); - PrintEarlyHeader(); /* setup tests, and validate parameters */ tests_head = ParseCommandLine(argc, argv); @@ -71,14 +76,17 @@ IOR_test_t * ior_run(int argc, char **argv, MPI_Comm world_com, FILE * world_out /* perform each test */ for (tptr = tests_head; tptr != NULL; tptr = tptr->next) { + aiori_initialize(tptr); totalErrorCount = 0; verbose = tptr->params.verbose; + backend = tptr->params.backend; if (rank == 0 && verbose >= VERBOSE_0) { ShowTestStart(&tptr->params); } TestIoSys(tptr); tptr->results->errors = totalErrorCount; ShowTestEnd(tptr); + aiori_finalize(tptr); } PrintLongSummaryAllTests(tests_head); @@ -98,8 +106,6 @@ int ior_main(int argc, char **argv) out_logfile = stdout; out_resultfile = stdout; - aiori_initialize(); - /* * check -h option from commandline without starting MPI; */ @@ -109,12 +115,8 @@ int ior_main(int argc, char **argv) MPI_CHECK(MPI_Init(&argc, &argv), "cannot initialize MPI"); mpi_comm_world = MPI_COMM_WORLD; - MPI_CHECK(MPI_Comm_size(mpi_comm_world, &numTasksWorld), - "cannot get number of tasks"); MPI_CHECK(MPI_Comm_rank(mpi_comm_world, &rank), "cannot get rank"); - PrintEarlyHeader(); - /* set error-handling */ /*MPI_CHECK(MPI_Errhandler_set(mpi_comm_world, MPI_ERRORS_RETURN), "cannot set errhandler"); */ @@ -123,13 +125,17 @@ int ior_main(int argc, char **argv) InitTests(tests_head, mpi_comm_world); verbose = tests_head->params.verbose; + aiori_initialize(tests_head); // this is quite suspicious, likely an error when multiple tests need to be executed with different backends and options + PrintHeader(argc, argv); /* perform each test */ for (tptr = tests_head; tptr != NULL; tptr = tptr->next) { verbose = tptr->params.verbose; + backend = tptr->params.backend; if (rank == 0 && verbose >= VERBOSE_0) { - ShowTestStart(&tptr->params); + backend = tptr->params.backend; + ShowTestStart(&tptr->params); } // This is useful for trapping a running MPI process. While @@ -139,6 +145,7 @@ int ior_main(int argc, char **argv) sleep(5); fprintf(out_logfile, "\trank %d: awake.\n", rank); } + TestIoSys(tptr); ShowTestEnd(tptr); } @@ -151,11 +158,11 @@ int ior_main(int argc, char **argv) /* display finish time */ PrintTestEnds(); - DestroyTests(tests_head); + aiori_finalize(tests_head); MPI_CHECK(MPI_Finalize(), "cannot finalize MPI"); - aiori_finalize(); + DestroyTests(tests_head); return totalErrorCount; } @@ -184,8 +191,14 @@ void init_IOR_Param_t(IOR_param_t * p) p->writeFile = p->readFile = FALSE; p->checkWrite = p->checkRead = FALSE; - p->nodes = 1; - p->tasksPerNode = 1; + /* + * These can be overridden from the command-line but otherwise will be + * set from MPI. + */ + p->numTasks = -1; + p->numNodes = -1; + p->numTasksOnNode0 = -1; + p->repetitions = 1; p->repCounter = -1; p->open = WRITE; @@ -214,8 +227,6 @@ void init_IOR_Param_t(IOR_param_t * p) p->beegfs_numTargets = -1; p->beegfs_chunkSize = -1; - - p->mmap_ptr = NULL; } static void @@ -246,8 +257,13 @@ DisplayOutliers(int numTasks, strcpy(accessString, "read"); } if (fabs(timerVal - mean) > (double)outlierThreshold) { - fprintf(out_logfile, "WARNING: for task %d, %s %s is %f\n", - rank, accessString, timeString, timerVal); + char hostname[MAX_STR]; + int ret = gethostname(hostname, MAX_STR); + if (ret != 0) + strcpy(hostname, "unknown"); + + fprintf(out_logfile, "WARNING: for %s, task %d, %s %s is %f\n", + hostname, rank, accessString, timeString, timerVal); fprintf(out_logfile, " (mean=%f, stddev=%f)\n", mean, sd); fflush(out_logfile); } @@ -256,62 +272,57 @@ DisplayOutliers(int numTasks, /* * Check for outliers in start/end times and elapsed create/xfer/close times. */ -static void CheckForOutliers(IOR_param_t * test, double **timer, int rep, - int access) +static void +CheckForOutliers(IOR_param_t *test, const double *timer, const int access) { - int shift; - - if (access == WRITE) { - shift = 0; - } else { /* READ */ - shift = 6; - } - - DisplayOutliers(test->numTasks, timer[shift + 0][rep], + DisplayOutliers(test->numTasks, timer[0], "start time", access, test->outlierThreshold); DisplayOutliers(test->numTasks, - timer[shift + 1][rep] - timer[shift + 0][rep], + timer[1] - timer[0], "elapsed create time", access, test->outlierThreshold); DisplayOutliers(test->numTasks, - timer[shift + 3][rep] - timer[shift + 2][rep], + timer[3] - timer[2], "elapsed transfer time", access, test->outlierThreshold); DisplayOutliers(test->numTasks, - timer[shift + 5][rep] - timer[shift + 4][rep], + timer[5] - timer[4], "elapsed close time", access, test->outlierThreshold); - DisplayOutliers(test->numTasks, timer[shift + 5][rep], "end time", + DisplayOutliers(test->numTasks, timer[5], "end time", access, test->outlierThreshold); - } /* * Check if actual file size equals expected size; if not use actual for * calculating performance rate. */ -static void CheckFileSize(IOR_test_t *test, IOR_offset_t dataMoved, int rep) +static void CheckFileSize(IOR_test_t *test, IOR_offset_t dataMoved, int rep, + const int access) { IOR_param_t *params = &test->params; IOR_results_t *results = test->results; + IOR_point_t *point = (access == WRITE) ? &results[rep].write : + &results[rep].read; - MPI_CHECK(MPI_Allreduce(&dataMoved, & results[rep].aggFileSizeFromXfer, + MPI_CHECK(MPI_Allreduce(&dataMoved, &point->aggFileSizeFromXfer, 1, MPI_LONG_LONG_INT, MPI_SUM, testComm), "cannot total data moved"); - if (strcasecmp(params->api, "HDF5") != 0 && strcasecmp(params->api, "NCMPI") != 0) { + if (strcasecmp(params->api, "HDF5") != 0 && strcasecmp(params->api, "NCMPI") != 0 && + strcasecmp(params->api, "DAOS") != 0) { if (verbose >= VERBOSE_0 && rank == 0) { if ((params->expectedAggFileSize - != results[rep].aggFileSizeFromXfer) - || (results[rep].aggFileSizeFromStat - != results[rep].aggFileSizeFromXfer)) { + != point->aggFileSizeFromXfer) + || (point->aggFileSizeFromStat + != point->aggFileSizeFromXfer)) { fprintf(out_logfile, "WARNING: Expected aggregate file size = %lld.\n", (long long) params->expectedAggFileSize); fprintf(out_logfile, "WARNING: Stat() of aggregate file size = %lld.\n", - (long long) results[rep].aggFileSizeFromStat); + (long long) point->aggFileSizeFromStat); fprintf(out_logfile, "WARNING: Using actual aggregate bytes moved = %lld.\n", - (long long) results[rep].aggFileSizeFromXfer); + (long long) point->aggFileSizeFromXfer); if(params->deadlineForStonewalling){ fprintf(out_logfile, "WARNING: maybe caused by deadlineForStonewalling\n"); @@ -319,7 +330,8 @@ static void CheckFileSize(IOR_test_t *test, IOR_offset_t dataMoved, int rep) } } } - results[rep].aggFileSizeForBW = results[rep].aggFileSizeFromXfer; + + point->aggFileSizeForBW = point->aggFileSizeFromXfer; } /* @@ -461,12 +473,16 @@ static int CountErrors(IOR_param_t * test, int access, int errors) */ static void *aligned_buffer_alloc(size_t size) { - size_t pageSize; size_t pageMask; char *buf, *tmp; char *aligned; - pageSize = getpagesize(); +#ifdef HAVE_SYSCONF + long pageSize = sysconf(_SC_PAGESIZE); +#else + size_t pageSize = getpagesize(); +#endif + pageMask = pageSize - 1; buf = malloc(size + pageSize + sizeof(void *)); if (buf == NULL) @@ -490,20 +506,11 @@ static void aligned_buffer_free(void *buf) free(*(void **)((char *)buf - sizeof(char *))); } -static void* safeMalloc(uint64_t size){ - void * d = malloc(size); - if (d == NULL){ - ERR("Could not malloc an array"); - } - memset(d, 0, size); - return d; -} - void AllocResults(IOR_test_t *test) { int reps; if (test->results != NULL) - return; + return; reps = test->params.repetitions; test->results = (IOR_results_t *) safeMalloc(sizeof(IOR_results_t) * reps); @@ -791,8 +798,7 @@ void GetTestFileName(char *testFileName, IOR_param_t * test) static char *PrependDir(IOR_param_t * test, char *rootDir) { char *dir; - char fname[MAX_STR + 1]; - char *p; + char *fname; int i; dir = (char *)malloc(MAX_STR + 1); @@ -812,34 +818,27 @@ static char *PrependDir(IOR_param_t * test, char *rootDir) } /* get file name */ - strcpy(fname, rootDir); - p = fname; - while (i > 0) { - if (fname[i] == '\0' || fname[i] == '/') { - p = fname + (i + 1); - break; - } - i--; - } + fname = rootDir + i + 1; /* create directory with rank as subdirectory */ - sprintf(dir, "%s%d", dir, (rank + rankOffset) % test->numTasks); + sprintf(dir + i + 1, "%d", (rank + rankOffset) % test->numTasks); /* dir doesn't exist, so create */ - if (access(dir, F_OK) != 0) { - if (mkdir(dir, S_IRWXU) < 0) { - ERR("cannot create directory"); + if (backend->access(dir, F_OK, test) != 0) { + if (backend->mkdir(dir, S_IRWXU, test) < 0) { + ERRF("cannot create directory: %s", dir); } /* check if correct permissions */ - } else if (access(dir, R_OK) != 0 || access(dir, W_OK) != 0 || - access(dir, X_OK) != 0) { - ERR("invalid directory permissions"); + } else if (backend->access(dir, R_OK, test) != 0 || + backend->access(dir, W_OK, test) != 0 || + backend->access(dir, X_OK, test) != 0) { + ERRF("invalid directory permissions: %s", dir); } /* concatenate dir and file names */ strcat(dir, "/"); - strcat(dir, p); + strcat(dir, fname); return dir; } @@ -848,54 +847,62 @@ static char *PrependDir(IOR_param_t * test, char *rootDir) /* * Reduce test results, and show if verbose set. */ - -static void ReduceIterResults(IOR_test_t *test, double **timer, int rep, - int access) +static void +ReduceIterResults(IOR_test_t *test, double *timer, const int rep, const int access) { - double reduced[12] = { 0 }; - double diff[6]; - double *diff_subset; - double totalTime; - double bw; - int i; - MPI_Op op; + double reduced[IOR_NB_TIMERS] = { 0 }; + double diff[IOR_NB_TIMERS / 2 + 1]; + double totalTime, accessTime; + IOR_param_t *params = &test->params; + double bw, iops, latency, minlatency; + int i; + MPI_Op op; - assert(access == WRITE || access == READ); + assert(access == WRITE || access == READ); /* Find the minimum start time of the even numbered timers, and the maximum finish time for the odd numbered timers */ - for (i = 0; i < 12; i++) { + for (i = 0; i < IOR_NB_TIMERS; i++) { op = i % 2 ? MPI_MAX : MPI_MIN; - MPI_CHECK(MPI_Reduce(&timer[i][rep], &reduced[i], 1, MPI_DOUBLE, + MPI_CHECK(MPI_Reduce(&timer[i], &reduced[i], 1, MPI_DOUBLE, op, 0, testComm), "MPI_Reduce()"); } - if (rank != 0) { - /* Only rank 0 tallies and prints the results. */ - return; - } + /* Calculate elapsed times and throughput numbers */ + for (i = 0; i < IOR_NB_TIMERS / 2; i++) + diff[i] = reduced[2 * i + 1] - reduced[2 * i]; - /* Calculate elapsed times and throughput numbers */ - for (i = 0; i < 6; i++) { - diff[i] = reduced[2 * i + 1] - reduced[2 * i]; - } - if (access == WRITE) { - totalTime = reduced[5] - reduced[0]; - test->results[rep].writeTime = totalTime; - diff_subset = &diff[0]; - } else { /* READ */ - totalTime = reduced[11] - reduced[6]; - test->results[rep].readTime = totalTime; - diff_subset = &diff[3]; - } + totalTime = reduced[5] - reduced[0]; + accessTime = reduced[3] - reduced[2]; - if (verbose < VERBOSE_0) { - return; - } + IOR_point_t *point = (access == WRITE) ? &test->results[rep].write : + &test->results[rep].read; + + point->time = totalTime; + + if (verbose < VERBOSE_0) + return; + + bw = (double)point->aggFileSizeForBW / totalTime; + + /* For IOPS in this iteration, we divide the total amount of IOs from + * all ranks over the entire access time (first start -> last end). */ + iops = (point->aggFileSizeForBW / params->transferSize) / accessTime; - bw = (double)test->results[rep].aggFileSizeForBW / totalTime; + /* For Latency, we divide the total access time for each task over the + * number of I/Os issued from that task; then reduce and display the + * minimum (best) latency achieved. So what is reported is the average + * latency of all ops from a single task, then taking the minimum of + * that between all tasks. */ + latency = (timer[3] - timer[2]) / (params->blockSize / params->transferSize); + MPI_CHECK(MPI_Reduce(&latency, &minlatency, 1, MPI_DOUBLE, + MPI_MIN, 0, testComm), "MPI_Reduce()"); - PrintReducedResult(test, access, bw, diff_subset, totalTime, rep); + /* Only rank 0 tallies and prints the results. */ + if (rank != 0) + return; + + PrintReducedResult(test, access, bw, iops, latency, diff, totalTime, rep); } /* @@ -913,6 +920,10 @@ static void RemoveFile(char *testFileName, int filePerProc, IOR_param_t * test) GetTestFileName(testFileName, test); } if (backend->access(testFileName, F_OK, test) == 0) { + if (verbose >= VERBOSE_3) { + fprintf(out_logfile, "task %d removing %s\n", rank, + testFileName); + } backend->delete(testFileName, test); } if (test->reorderTasksRandom == TRUE) { @@ -921,6 +932,10 @@ static void RemoveFile(char *testFileName, int filePerProc, IOR_param_t * test) } } else { if ((rank == 0) && (backend->access(testFileName, F_OK, test) == 0)) { + if (verbose >= VERBOSE_3) { + fprintf(out_logfile, "task %d removing %s\n", rank, + testFileName); + } backend->delete(testFileName, test); } } @@ -932,12 +947,17 @@ static void RemoveFile(char *testFileName, int filePerProc, IOR_param_t * test) */ static void InitTests(IOR_test_t *tests, MPI_Comm com) { - int size; - - MPI_CHECK(MPI_Comm_size(com, & size), "MPI_Comm_size() error"); + int mpiNumNodes = 0; + int mpiNumTasks = 0; + int mpiNumTasksOnNode0 = 0; - /* count the tasks per node */ - tasksPerNode = CountTasksPerNode(com); + /* + * These default values are the same for every test and expensive to + * retrieve so just do it once. + */ + mpiNumNodes = GetNumNodes(com); + mpiNumTasks = GetNumTasks(com); + mpiNumTasksOnNode0 = GetNumTasksOnNode0(com); /* * Since there is no guarantee that anyone other than @@ -950,11 +970,28 @@ static void InitTests(IOR_test_t *tests, MPI_Comm com) while (tests != NULL) { IOR_param_t *params = & tests->params; params->testComm = com; - params->nodes = params->numTasks / tasksPerNode; - params->tasksPerNode = tasksPerNode; - if (params->numTasks == 0) { - params->numTasks = size; + + /* use MPI values if not overridden on command-line */ + if (params->numNodes == -1) { + params->numNodes = mpiNumNodes; + } + if (params->numTasks == -1) { + params->numTasks = mpiNumTasks; + } else if (params->numTasks > mpiNumTasks) { + if (rank == 0) { + fprintf(out_logfile, + "WARNING: More tasks requested (%d) than available (%d),", + params->numTasks, mpiNumTasks); + fprintf(out_logfile, " running with %d tasks.\n", + mpiNumTasks); + } + params->numTasks = mpiNumTasks; } + if (params->numTasksOnNode0 == -1) { + params->numTasksOnNode0 = mpiNumTasksOnNode0; + } + + params->tasksBlockMapping = QueryNodeMapping(com,false); params->expectedAggFileSize = params->blockSize * params->segmentCount * params->numTasks; @@ -1102,7 +1139,7 @@ static void *HogMemory(IOR_param_t *params) if (verbose >= VERBOSE_3) fprintf(out_logfile, "This node hogging %ld bytes of memory\n", params->memoryPerNode); - size = params->memoryPerNode / params->tasksPerNode; + size = params->memoryPerNode / params->numTasksOnNode0; } else { return NULL; } @@ -1116,7 +1153,72 @@ static void *HogMemory(IOR_param_t *params) return buf; } +/* + * Write times taken during each iteration of the test. + */ +static void +WriteTimes(IOR_param_t *test, const double *timer, const int iteration, + const int access) +{ + char timerName[MAX_STR]; + for (int i = 0; i < IOR_NB_TIMERS; i++) { + + if (access == WRITE) { + switch (i) { + case 0: + strcpy(timerName, "write open start"); + break; + case 1: + strcpy(timerName, "write open stop"); + break; + case 2: + strcpy(timerName, "write start"); + break; + case 3: + strcpy(timerName, "write stop"); + break; + case 4: + strcpy(timerName, "write close start"); + break; + case 5: + strcpy(timerName, "write close stop"); + break; + default: + strcpy(timerName, "invalid timer"); + break; + } + } + else { + switch (i) { + case 0: + strcpy(timerName, "read open start"); + break; + case 1: + strcpy(timerName, "read open stop"); + break; + case 2: + strcpy(timerName, "read start"); + break; + case 3: + strcpy(timerName, "read stop"); + break; + case 4: + strcpy(timerName, "read close start"); + break; + case 5: + strcpy(timerName, "read close stop"); + break; + default: + strcpy(timerName, "invalid timer"); + break; + } + } + fprintf(out_logfile, "Test %d: Iter=%d, Task=%d, Time=%f, %s\n", + test->id, iteration, (int)rank, timer[i], + timerName); + } +} /* * Using the test parameters, run iteration(s) of single test. */ @@ -1125,10 +1227,10 @@ static void TestIoSys(IOR_test_t *test) IOR_param_t *params = &test->params; IOR_results_t *results = test->results; char testFileName[MAX_STR]; - double *timer[12]; + double timer[IOR_NB_TIMERS]; double startTime; int pretendRank; - int i, rep; + int rep; void *fd; MPI_Group orig_group, new_group; int range[3]; @@ -1137,16 +1239,6 @@ static void TestIoSys(IOR_test_t *test) IOR_io_buffers ioBuffers; /* set up communicator for test */ - if (params->numTasks > numTasksWorld) { - if (rank == 0) { - fprintf(out_logfile, - "WARNING: More tasks requested (%d) than available (%d),", - params->numTasks, numTasksWorld); - fprintf(out_logfile, " running on %d tasks.\n", - numTasksWorld); - } - params->numTasks = numTasksWorld; - } MPI_CHECK(MPI_Comm_group(mpi_comm_world, &orig_group), "MPI_Comm_group() error"); range[0] = 0; /* first rank */ @@ -1170,24 +1262,9 @@ static void TestIoSys(IOR_test_t *test) } if (rank == 0 && params->reorderTasks == TRUE && verbose >= VERBOSE_1) { fprintf(out_logfile, - "Using reorderTasks '-C' (expecting block, not cyclic, task assignment)\n"); + "Using reorderTasks '-C' (useful to avoid read cache in client)\n"); fflush(out_logfile); } - params->tasksPerNode = CountTasksPerNode(testComm); - - /* setup timers */ - for (i = 0; i < 12; i++) { - timer[i] = (double *)malloc(params->repetitions * sizeof(double)); - if (timer[i] == NULL) - ERR("malloc failed"); - } - - /* bind I/O calls to specific API */ - backend = aiori_select(params->api); - if (backend == NULL) - ERR_SIMPLE("unrecognized I/O API"); - - /* show test setup */ if (rank == 0 && verbose >= VERBOSE_0) ShowSetup(params); @@ -1199,7 +1276,7 @@ static void TestIoSys(IOR_test_t *test) /* IO Buffer Setup */ if (params->setTimeStampSignature) { // initialize the buffer properly - params->timeStampSignatureValue = (unsigned int)params->setTimeStampSignature; + params->timeStampSignatureValue = (unsigned int) params->setTimeStampSignature; } XferBuffersSetup(&ioBuffers, params, pretendRank); reseed_incompressible_prng = TRUE; // reset pseudo random generator, necessary to guarantee the next call to FillBuffer produces the same value as it is right now @@ -1220,7 +1297,7 @@ static void TestIoSys(IOR_test_t *test) ERR("cannot get current time"); } params->timeStampSignatureValue = - (unsigned int)currentTime; + (unsigned int) currentTime; if (verbose >= VERBOSE_2) { fprintf(out_logfile, "Using Time Stamp %u (0x%x) for Data Signature\n", @@ -1260,9 +1337,9 @@ static void TestIoSys(IOR_test_t *test) params->stoneWallingWearOutIterations = params_saved_wearout; MPI_CHECK(MPI_Barrier(testComm), "barrier error"); params->open = WRITE; - timer[0][rep] = GetTimeStamp(); + timer[0] = GetTimeStamp(); fd = backend->create(testFileName, params); - timer[1][rep] = GetTimeStamp(); + timer[1] = GetTimeStamp(); if (params->intraTestBarriers) MPI_CHECK(MPI_Barrier(testComm), "barrier error"); @@ -1271,40 +1348,40 @@ static void TestIoSys(IOR_test_t *test) "Commencing write performance test: %s", CurrentTimeString()); } - timer[2][rep] = GetTimeStamp(); - dataMoved = WriteOrRead(params, & results[rep], fd, WRITE, &ioBuffers); + timer[2] = GetTimeStamp(); + dataMoved = WriteOrRead(params, &results[rep], fd, WRITE, &ioBuffers); if (params->verbose >= VERBOSE_4) { fprintf(out_logfile, "* data moved = %llu\n", dataMoved); fflush(out_logfile); } - timer[3][rep] = GetTimeStamp(); + timer[3] = GetTimeStamp(); if (params->intraTestBarriers) MPI_CHECK(MPI_Barrier(testComm), "barrier error"); - timer[4][rep] = GetTimeStamp(); + timer[4] = GetTimeStamp(); backend->close(fd, params); - timer[5][rep] = GetTimeStamp(); + timer[5] = GetTimeStamp(); MPI_CHECK(MPI_Barrier(testComm), "barrier error"); /* get the size of the file just written */ - results[rep].aggFileSizeFromStat = + results[rep].write.aggFileSizeFromStat = backend->get_file_size(params, testComm, testFileName); /* check if stat() of file doesn't equal expected file size, use actual amount of byte moved */ - CheckFileSize(test, dataMoved, rep); + CheckFileSize(test, dataMoved, rep, WRITE); if (verbose >= VERBOSE_3) WriteTimes(params, timer, rep, WRITE); ReduceIterResults(test, timer, rep, WRITE); if (params->outlierThreshold) { - CheckForOutliers(params, timer, rep, WRITE); + CheckForOutliers(params, timer, WRITE); } /* check if in this round we run write with stonewalling */ if(params->deadlineForStonewalling > 0){ - params->stoneWallingWearOutIterations = results[rep].pairs_accessed; + params->stoneWallingWearOutIterations = results[rep].write.pairs_accessed; } } @@ -1321,7 +1398,11 @@ static void TestIoSys(IOR_test_t *test) } if (params->reorderTasks) { /* move two nodes away from writing node */ - rankOffset = (2 * params->tasksPerNode) % params->numTasks; + int shift = 1; /* assume a by-node (round-robin) mapping of tasks to nodes */ + if (params->tasksBlockMapping) { + shift = params->numTasksOnNode0; /* switch to by-slot (contiguous block) mapping */ + } + rankOffset = (2 * shift) % params->numTasks; } // update the check buffer @@ -1332,7 +1413,7 @@ static void TestIoSys(IOR_test_t *test) GetTestFileName(testFileName, params); params->open = WRITECHECK; fd = backend->open(testFileName, params); - dataMoved = WriteOrRead(params, & results[rep], fd, WRITECHECK, &ioBuffers); + dataMoved = WriteOrRead(params, &results[rep], fd, WRITECHECK, &ioBuffers); backend->close(fd, params); rankOffset = 0; } @@ -1344,7 +1425,7 @@ static void TestIoSys(IOR_test_t *test) if(params->stoneWallingStatusFile){ params->stoneWallingWearOutIterations = ReadStoneWallingIterations(params->stoneWallingStatusFile); if(params->stoneWallingWearOutIterations == -1 && rank == 0){ - fprintf(out_logfile, "WARNING: Could not read back the stonewalling status from the file!"); + fprintf(out_logfile, "WARNING: Could not read back the stonewalling status from the file!\n"); params->stoneWallingWearOutIterations = 0; } } @@ -1356,18 +1437,21 @@ static void TestIoSys(IOR_test_t *test) /* Get rankOffset [file offset] for this process to read, based on -C,-Z,-Q,-X options */ /* Constant process offset reading */ if (params->reorderTasks) { - /* move taskPerNodeOffset nodes[1==default] away from writing node */ - rankOffset = (params->taskPerNodeOffset * - params->tasksPerNode) % params->numTasks; + /* move one node away from writing node */ + int shift = 1; /* assume a by-node (round-robin) mapping of tasks to nodes */ + if (params->tasksBlockMapping) { + shift=params->numTasksOnNode0; /* switch to a by-slot (contiguous block) mapping */ + } + rankOffset = (params->taskPerNodeOffset * shift) % params->numTasks; } /* random process offset reading */ if (params->reorderTasksRandom) { /* this should not intefere with randomOffset within a file because GetOffsetArrayRandom */ - /* seeds every random() call */ + /* seeds every rand() call */ int nodeoffset; unsigned int iseed0; nodeoffset = params->taskPerNodeOffset; - nodeoffset = (nodeoffset < params->nodes) ? nodeoffset : params->nodes - 1; + nodeoffset = (nodeoffset < params->numNodes) ? nodeoffset : params->numNodes - 1; if (params->reorderTasksRandomSeed < 0) iseed0 = -1 * params->reorderTasksRandomSeed + rep; else @@ -1377,7 +1461,7 @@ static void TestIoSys(IOR_test_t *test) rankOffset = rand() % params->numTasks; } while (rankOffset < - (nodeoffset * params->tasksPerNode)) { + (nodeoffset * params->numTasksOnNode0)) { rankOffset = rand() % params->numTasks; } /* Get more detailed stats if requested by verbose level */ @@ -1399,41 +1483,41 @@ static void TestIoSys(IOR_test_t *test) DelaySecs(params->interTestDelay); MPI_CHECK(MPI_Barrier(testComm), "barrier error"); params->open = READ; - timer[6][rep] = GetTimeStamp(); + timer[0] = GetTimeStamp(); fd = backend->open(testFileName, params); - timer[7][rep] = GetTimeStamp(); + timer[1] = GetTimeStamp(); if (params->intraTestBarriers) MPI_CHECK(MPI_Barrier(testComm), "barrier error"); if (rank == 0 && verbose >= VERBOSE_1) { fprintf(out_logfile, - "Commencing read performance test: %s", + "Commencing read performance test: %s\n", CurrentTimeString()); } - timer[8][rep] = GetTimeStamp(); - dataMoved = WriteOrRead(params, & results[rep], fd, operation_flag, &ioBuffers); - timer[9][rep] = GetTimeStamp(); + timer[2] = GetTimeStamp(); + dataMoved = WriteOrRead(params, &results[rep], fd, operation_flag, &ioBuffers); + timer[3] = GetTimeStamp(); if (params->intraTestBarriers) MPI_CHECK(MPI_Barrier(testComm), "barrier error"); - timer[10][rep] = GetTimeStamp(); + timer[4] = GetTimeStamp(); backend->close(fd, params); - timer[11][rep] = GetTimeStamp(); + timer[5] = GetTimeStamp(); /* get the size of the file just read */ - results[rep].aggFileSizeFromStat = + results[rep].read.aggFileSizeFromStat = backend->get_file_size(params, testComm, testFileName); /* check if stat() of file doesn't equal expected file size, use actual amount of byte moved */ - CheckFileSize(test, dataMoved, rep); + CheckFileSize(test, dataMoved, rep, READ); if (verbose >= VERBOSE_3) WriteTimes(params, timer, rep, READ); ReduceIterResults(test, timer, rep, READ); if (params->outlierThreshold) { - CheckForOutliers(params, timer, rep, READ); + CheckForOutliers(params, timer, READ); } } @@ -1468,9 +1552,6 @@ static void TestIoSys(IOR_test_t *test) if (hog_buf != NULL) free(hog_buf); - for (i = 0; i < 12; i++) { - free(timer[i]); - } /* Sync with the tasks that did not participate in this test */ MPI_CHECK(MPI_Barrier(mpi_comm_world), "barrier error"); @@ -1544,7 +1625,11 @@ static void ValidateTests(IOR_param_t * test) && (strcasecmp(test->api, "MPIIO") != 0) && (strcasecmp(test->api, "MMAP") != 0) && (strcasecmp(test->api, "HDFS") != 0) - && (strcasecmp(test->api, "RADOS") != 0)) && test->fsync) + && (strcasecmp(test->api, "DFS") != 0) + && (strcasecmp(test->api, "DAOS") != 0) + && (strcasecmp(test->api, "Gfarm") != 0) + && (strcasecmp(test->api, "RADOS") != 0) + && (strcasecmp(test->api, "CEPHFS") != 0)) && test->fsync) WARN_RESET("fsync() not supported in selected backend", test, &defaults, fsync); if ((strcasecmp(test->api, "MPIIO") != 0) && test->preallocate) @@ -1623,11 +1708,8 @@ static void ValidateTests(IOR_param_t * test) #if (H5_VERS_MAJOR > 0 && H5_VERS_MINOR > 5) ; #else - char errorString[MAX_STR]; - sprintf(errorString, - "'no fill' option not available in %s", + ERRF("'no fill' option not available in %s", test->apiVersion); - ERR(errorString); #endif #else WARN("unable to determine HDF5 version for 'no fill' usage"); @@ -1637,15 +1719,12 @@ static void ValidateTests(IOR_param_t * test) if (test->useExistingTestFile && test->lustre_set_striping) ERR("Lustre stripe options are incompatible with useExistingTestFile"); - /* N:1 and N:N */ - IOR_offset_t NtoN = test->filePerProc; - IOR_offset_t Nto1 = ! NtoN; - IOR_offset_t s = test->segmentCount; - IOR_offset_t t = test->transferSize; - IOR_offset_t b = test->blockSize; - - if (Nto1 && (s != 1) && (b != t)) { - ERR("N:1 (strided) requires xfer-size == block-size"); + /* allow the backend to validate the options */ + if(test->backend->check_params){ + int check = test->backend->check_params(test); + if (check == 0){ + ERR("The backend returned that the test parameters are invalid."); + } } } @@ -1656,8 +1735,7 @@ static void ValidateTests(IOR_param_t * test) * @param pretendRank int pretended Rank for shifting the offsest corectly * @return IOR_offset_t */ -static IOR_offset_t *GetOffsetArraySequential(IOR_param_t * test, - int pretendRank) +IOR_offset_t *GetOffsetArraySequential(IOR_param_t * test, int pretendRank) { IOR_offset_t i, j, k = 0; IOR_offset_t offsets; @@ -1706,8 +1784,7 @@ static IOR_offset_t *GetOffsetArraySequential(IOR_param_t * test, * @return IOR_offset_t * @return */ -static IOR_offset_t *GetOffsetArrayRandom(IOR_param_t * test, int pretendRank, - int access) +IOR_offset_t *GetOffsetArrayRandom(IOR_param_t * test, int pretendRank, int access) { int seed; IOR_offset_t i, value, tmp; @@ -1718,11 +1795,11 @@ static IOR_offset_t *GetOffsetArrayRandom(IOR_param_t * test, int pretendRank, /* set up seed for random() */ if (access == WRITE || access == READ) { - test->randomSeed = seed = random(); + test->randomSeed = seed = rand(); } else { seed = test->randomSeed; } - srandom(seed); + srand(seed); fileSize = test->blockSize * test->segmentCount; if (test->filePerProc == FALSE) { @@ -1734,7 +1811,7 @@ static IOR_offset_t *GetOffsetArrayRandom(IOR_param_t * test, int pretendRank, if (test->filePerProc == FALSE) { // this counts which process get how many transferes in // a shared file - if ((random() % test->numTasks) == pretendRank) { + if ((rand() % test->numTasks) == pretendRank) { offsets++; } } else { @@ -1756,9 +1833,9 @@ static IOR_offset_t *GetOffsetArrayRandom(IOR_param_t * test, int pretendRank, } } else { /* fill with offsets (pass 2) */ - srandom(seed); /* need same seed to get same transfers as counted in the beginning*/ + srand(seed); /* need same seed to get same transfers as counted in the beginning*/ for (i = 0; i < fileSize; i += test->transferSize) { - if ((random() % test->numTasks) == pretendRank) { + if ((rand() % test->numTasks) == pretendRank) { offsetArray[offsetCnt] = i; offsetCnt++; } @@ -1766,7 +1843,7 @@ static IOR_offset_t *GetOffsetArrayRandom(IOR_param_t * test, int pretendRank, } /* reorder array */ for (i = 0; i < offsets; i++) { - value = random() % offsets; + value = rand() % offsets; tmp = offsetArray[value]; offsetArray[value] = offsetArray[i]; offsetArray[i] = tmp; @@ -1798,11 +1875,19 @@ static IOR_offset_t WriteOrReadSingle(IOR_offset_t pairCnt, IOR_offset_t *offset backend->xfer(access, fd, buffer, transfer, test); if (amtXferred != transfer) ERR("cannot write to file"); + if (test->interIODelay > 0){ + struct timespec wait = {test->interIODelay / 1000 / 1000, 1000l * (test->interIODelay % 1000000)}; + nanosleep( & wait, NULL); + } } else if (access == READ) { amtXferred = backend->xfer(access, fd, buffer, transfer, test); if (amtXferred != transfer) ERR("cannot read from file"); + if (test->interIODelay > 0){ + struct timespec wait = {test->interIODelay / 1000 / 1000, 1000l * (test->interIODelay % 1000000)}; + nanosleep( & wait, NULL); + } } else if (access == WRITECHECK) { memset(checkBuffer, 'a', transfer); @@ -1818,14 +1903,16 @@ static IOR_offset_t WriteOrReadSingle(IOR_offset_t pairCnt, IOR_offset_t *offset *transferCount, test, WRITECHECK); } else if (access == READCHECK) { - amtXferred = backend->xfer(access, fd, buffer, transfer, test); + memset(checkBuffer, 'a', transfer); + + amtXferred = backend->xfer(access, fd, checkBuffer, transfer, test); if (amtXferred != transfer){ ERR("cannot read from file"); } if (test->storeFileOffset == TRUE) { FillBuffer(readCheckBuffer, test, test->offset, pretendRank); } - *errors += CompareBuffers(readCheckBuffer, buffer, transfer, *transferCount, test, READCHECK); + *errors += CompareBuffers(readCheckBuffer, checkBuffer, transfer, *transferCount, test, READCHECK); } return amtXferred; } @@ -1834,7 +1921,8 @@ static IOR_offset_t WriteOrReadSingle(IOR_offset_t pairCnt, IOR_offset_t *offset * Write or Read data to file(s). This loops through the strides, writing * out the data to each block in transfer sizes, until the remainder left is 0. */ -static IOR_offset_t WriteOrRead(IOR_param_t * test, IOR_results_t * results, void *fd, int access, IOR_io_buffers* ioBuffers) +static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results, + void *fd, const int access, IOR_io_buffers *ioBuffers) { int errors = 0; IOR_offset_t transferCount = 0; @@ -1844,6 +1932,8 @@ static IOR_offset_t WriteOrRead(IOR_param_t * test, IOR_results_t * results, voi IOR_offset_t dataMoved = 0; /* for data rate calculation */ double startForStonewall; int hitStonewall; + IOR_point_t *point = ((access == WRITE) || (access == WRITECHECK)) ? + &results->write : &results->read; /* initialize values */ pretendRank = (rank + rankOffset) % test->numTasks; @@ -1865,6 +1955,13 @@ static IOR_offset_t WriteOrRead(IOR_param_t * test, IOR_results_t * results, voi hitStonewall = ((test->deadlineForStonewalling != 0 && (GetTimeStamp() - startForStonewall) > test->deadlineForStonewalling)) || (test->stoneWallingWearOutIterations != 0 && pairCnt == test->stoneWallingWearOutIterations) ; + + if ( test->collective && test->deadlineForStonewalling ) { + // if collective-mode, you'll get a HANG, if some rank 'accidentally' leave this loop + // it absolutely must be an 'all or none': + MPI_CHECK(MPI_Bcast(&hitStonewall, 1, MPI_INT, 0, MPI_COMM_WORLD), "hitStonewall broadcast failed"); + } + } if (test->stoneWallingWearOut){ if (verbose >= VERBOSE_1){ @@ -1872,35 +1969,31 @@ static IOR_offset_t WriteOrRead(IOR_param_t * test, IOR_results_t * results, voi } long long data_moved_ll = (long long) dataMoved; long long pairs_accessed_min = 0; - MPI_CHECK(MPI_Allreduce(& pairCnt, &results->pairs_accessed, + MPI_CHECK(MPI_Allreduce(& pairCnt, &point->pairs_accessed, 1, MPI_LONG_LONG_INT, MPI_MAX, testComm), "cannot reduce pairs moved"); double stonewall_runtime = GetTimeStamp() - startForStonewall; - results->stonewall_time = stonewall_runtime; + point->stonewall_time = stonewall_runtime; MPI_CHECK(MPI_Reduce(& pairCnt, & pairs_accessed_min, 1, MPI_LONG_LONG_INT, MPI_MIN, 0, testComm), "cannot reduce pairs moved"); - MPI_CHECK(MPI_Reduce(& data_moved_ll, & results->stonewall_min_data_accessed, + MPI_CHECK(MPI_Reduce(& data_moved_ll, &point->stonewall_min_data_accessed, 1, MPI_LONG_LONG_INT, MPI_MIN, 0, testComm), "cannot reduce pairs moved"); - MPI_CHECK(MPI_Reduce(& data_moved_ll, & results->stonewall_avg_data_accessed, + MPI_CHECK(MPI_Reduce(& data_moved_ll, &point->stonewall_avg_data_accessed, 1, MPI_LONG_LONG_INT, MPI_SUM, 0, testComm), "cannot reduce pairs moved"); if(rank == 0){ fprintf(out_logfile, "stonewalling pairs accessed min: %lld max: %zu -- min data: %.1f GiB mean data: %.1f GiB time: %.1fs\n", - pairs_accessed_min, results->pairs_accessed, - results->stonewall_min_data_accessed /1024.0 / 1024 / 1024, results->stonewall_avg_data_accessed / 1024.0 / 1024 / 1024 / test->numTasks , results->stonewall_time); - results->stonewall_min_data_accessed *= test->numTasks; - } - if(pairs_accessed_min == pairCnt){ - results->stonewall_min_data_accessed = 0; - results->stonewall_avg_data_accessed = 0; + pairs_accessed_min, point->pairs_accessed, + point->stonewall_min_data_accessed /1024.0 / 1024 / 1024, point->stonewall_avg_data_accessed / 1024.0 / 1024 / 1024 / test->numTasks , point->stonewall_time); + point->stonewall_min_data_accessed *= test->numTasks; } - if(pairCnt != results->pairs_accessed){ + if(pairCnt != point->pairs_accessed){ // some work needs still to be done ! - for(; pairCnt < results->pairs_accessed; pairCnt++ ) { + for(; pairCnt < point->pairs_accessed; pairCnt++ ) { dataMoved += WriteOrReadSingle(pairCnt, offsetArray, pretendRank, & transferCount, & errors, test, fd, ioBuffers, access); } } }else{ - results->pairs_accessed = pairCnt; + point->pairs_accessed = pairCnt; } @@ -1913,73 +2006,3 @@ static IOR_offset_t WriteOrRead(IOR_param_t * test, IOR_results_t * results, voi } return (dataMoved); } - -/* - * Write times taken during each iteration of the test. - */ -static void -WriteTimes(IOR_param_t * test, double **timer, int iteration, int writeOrRead) -{ - char accessType[MAX_STR]; - char timerName[MAX_STR]; - int i, start = 0, stop = 0; - - if (writeOrRead == WRITE) { - start = 0; - stop = 6; - strcpy(accessType, "WRITE"); - } else if (writeOrRead == READ) { - start = 6; - stop = 12; - strcpy(accessType, "READ"); - } else { - ERR("incorrect WRITE/READ option"); - } - - for (i = start; i < stop; i++) { - switch (i) { - case 0: - strcpy(timerName, "write open start"); - break; - case 1: - strcpy(timerName, "write open stop"); - break; - case 2: - strcpy(timerName, "write start"); - break; - case 3: - strcpy(timerName, "write stop"); - break; - case 4: - strcpy(timerName, "write close start"); - break; - case 5: - strcpy(timerName, "write close stop"); - break; - case 6: - strcpy(timerName, "read open start"); - break; - case 7: - strcpy(timerName, "read open stop"); - break; - case 8: - strcpy(timerName, "read start"); - break; - case 9: - strcpy(timerName, "read stop"); - break; - case 10: - strcpy(timerName, "read close start"); - break; - case 11: - strcpy(timerName, "read close stop"); - break; - default: - strcpy(timerName, "invalid timer"); - break; - } - fprintf(out_logfile, "Test %d: Iter=%d, Task=%d, Time=%f, %s\n", - test->id, iteration, (int)rank, timer[i][iteration], - timerName); - } -} diff --git a/src/ior.h b/src/ior.h index c283be79..758b0486 100755 --- a/src/ior.h +++ b/src/ior.h @@ -35,6 +35,7 @@ typedef void *rados_t; typedef void *rados_ioctx_t; #endif +#include "option.h" #include "iordef.h" /******************** DATA Packet Type ***************************************/ @@ -77,9 +78,11 @@ typedef struct IO_BUFFERS * USER_GUIDE */ +struct ior_aiori; + typedef struct { - const void * backend; + const struct ior_aiori * backend; char * debug; /* debug info string */ unsigned int mode; /* file permissions */ unsigned int openFlags; /* open flags (see also ) */ @@ -91,13 +94,17 @@ typedef struct char * testFileName_fppReadCheck;/* filename for fpp read check */ char * hintsFileName; /* full name for hints file */ char * options; /* options string */ + // intermediate options + int dryRun; /* do not perform any I/Os just run evtl. inputs print dummy output */ int numTasks; /* number of tasks for test */ - int nodes; /* number of nodes for test */ - int tasksPerNode; /* number of tasks per node */ + int numNodes; /* number of nodes for test */ + int numTasksOnNode0; /* number of tasks on node 0 (usually all the same, but don't have to be, use with caution) */ + int tasksBlockMapping; /* are the tasks in contiguous blocks across nodes or round-robin */ int repetitions; /* number of repetitions of test */ int repCounter; /* rep counter */ int multiFile; /* multiple files */ int interTestDelay; /* delay between reps in seconds */ + int interIODelay; /* delay after each I/O in us */ int open; /* flag for writing or reading */ int readFile; /* read of existing file */ int writeFile; /* write of file */ @@ -122,7 +129,6 @@ typedef struct int useFileView; /* use MPI_File_set_view */ int useSharedFilePointer; /* use shared file pointer */ int useStridedDatatype; /* put strided access into datatype */ - int useO_DIRECT; /* use O_DIRECT, bypassing I/O buffers */ int showHints; /* show hints */ int summary_every_test; /* flag to print summary every test, not just at end */ int uniqueDir; /* use unique directory for each fpp */ @@ -144,16 +150,18 @@ typedef struct int randomOffset; /* access is to random offsets */ size_t memoryPerTask; /* additional memory used per task */ size_t memoryPerNode; /* additional memory used per node */ - enum PACKET_TYPE dataPacketType; /* The type of data packet. */ + char * memoryPerNodeStr; /* for parsing */ + char * testscripts; /* for parsing */ + char * buffer_type; /* for parsing */ + enum PACKET_TYPE dataPacketType; /* The type of data packet. */ + void * backend_options; /* Backend-specific options */ /* POSIX variables */ int singleXferAttempt; /* do not retry transfer if incomplete */ int fsyncPerWrite; /* fsync() after each write */ int fsync; /* fsync() after write */ - void* mmap_ptr; - /* MPI variables */ MPI_Comm testComm; /* MPI communicator */ MPI_Datatype transferType; /* datatype for transfer */ @@ -202,12 +210,9 @@ typedef struct int intraTestBarriers; /* barriers between open/op and op/close */ } IOR_param_t; -/* each pointer is to an array, each of length equal to the number of - repetitions in the test */ +/* each pointer for a single test */ typedef struct { - double writeTime; - double readTime; - int errors; + double time; size_t pairs_accessed; // number of I/Os done, useful for deadlineForStonewalling double stonewall_time; @@ -217,20 +222,25 @@ typedef struct { IOR_offset_t aggFileSizeFromStat; IOR_offset_t aggFileSizeFromXfer; IOR_offset_t aggFileSizeForBW; +} IOR_point_t; + +typedef struct { + int errors; + IOR_point_t write; + IOR_point_t read; } IOR_results_t; /* define the queuing structure for the test parameters */ typedef struct IOR_test_t { IOR_param_t params; - IOR_results_t *results; /* This is an array of reps times IOR_results_t */ + IOR_results_t *results; struct IOR_test_t *next; } IOR_test_t; - IOR_test_t *CreateTest(IOR_param_t *init_params, int test_num); void AllocResults(IOR_test_t *test); -char * GetPlatformName(); +char * GetPlatformName(void); void init_IOR_Param_t(IOR_param_t *p); /* diff --git a/src/iordef.h b/src/iordef.h index 23febd5a..78cf1d39 100755 --- a/src/iordef.h +++ b/src/iordef.h @@ -45,7 +45,7 @@ # define srandom srand # define random() (rand() * (RAND_MAX+1) + rand()) /* Note: only 30 bits */ # define sleep(X) Sleep((X)*1000) -# define getpagesize() 4096 +# define sysconf(X) 4096 #else # include /* MAXPATHLEN */ # include @@ -96,7 +96,6 @@ enum OutputFormat_t{ #define WRITECHECK 1 #define READ 2 #define READCHECK 3 -#define CHECK 4 /* verbosity settings */ #define VERBOSE_0 0 @@ -152,28 +151,41 @@ typedef long long int IOR_size_t; fflush(stdout); \ } while (0) -/* warning with errno printed */ -#define EWARN(MSG) do { \ + +/* warning with format string and errno printed */ +#define EWARNF(FORMAT, ...) do { \ if (verbose > VERBOSE_2) { \ - fprintf(stdout, "ior WARNING: %s, errno %d, %s (%s:%d).\n", \ - MSG, errno, strerror(errno), __FILE__, __LINE__); \ + fprintf(stdout, "ior WARNING: " FORMAT ", errno %d, %s (%s:%d).\n", \ + __VA_ARGS__, errno, strerror(errno), __FILE__, __LINE__); \ } else { \ - fprintf(stdout, "ior WARNING: %s, errno %d, %s \n", \ - MSG, errno, strerror(errno)); \ + fprintf(stdout, "ior WARNING: " FORMAT ", errno %d, %s \n", \ + __VA_ARGS__, errno, strerror(errno)); \ } \ fflush(stdout); \ } while (0) -/* display error message and terminate execution */ -#define ERR(MSG) do { \ - fprintf(stdout, "ior ERROR: %s, errno %d, %s (%s:%d)\n", \ - MSG, errno, strerror(errno), __FILE__, __LINE__); \ +/* warning with errno printed */ +#define EWARN(MSG) do { \ + EWARNF("%s", MSG); \ +} while (0) + + +/* display error message with format string and terminate execution */ +#define ERRF(FORMAT, ...) do { \ + fprintf(stdout, "ior ERROR: " FORMAT ", errno %d, %s (%s:%d)\n", \ + __VA_ARGS__, errno, strerror(errno), __FILE__, __LINE__); \ fflush(stdout); \ MPI_Abort(MPI_COMM_WORLD, -1); \ } while (0) +/* display error message and terminate execution */ +#define ERR(MSG) do { \ + ERRF("%s", MSG); \ +} while (0) + + /* display a simple error message (i.e. errno is not set) and terminate execution */ #define ERR_SIMPLE(MSG) do { \ fprintf(stdout, "ior ERROR: %s, (%s:%d)\n", \ @@ -185,24 +197,35 @@ typedef long long int IOR_size_t; /******************************************************************************/ /* - * MPI_CHECK will display a custom error message as well as an error string + * MPI_CHECKF will display a custom format string as well as an error string * from the MPI_STATUS and then exit the program */ -#define MPI_CHECK(MPI_STATUS, MSG) do { \ +#define MPI_CHECKF(MPI_STATUS, FORMAT, ...) do { \ char resultString[MPI_MAX_ERROR_STRING]; \ int resultLength; \ \ if (MPI_STATUS != MPI_SUCCESS) { \ MPI_Error_string(MPI_STATUS, resultString, &resultLength); \ - fprintf(stdout, "ior ERROR: %s, MPI %s, (%s:%d)\n", \ - MSG, resultString, __FILE__, __LINE__); \ + fprintf(stdout, "ior ERROR: " FORMAT ", MPI %s, (%s:%d)\n", \ + __VA_ARGS__, resultString, __FILE__, __LINE__); \ fflush(stdout); \ MPI_Abort(MPI_COMM_WORLD, -1); \ } \ } while(0) +/******************************************************************************/ +/* + * MPI_CHECK will display a custom error message as well as an error string + * from the MPI_STATUS and then exit the program + */ + +#define MPI_CHECK(MPI_STATUS, MSG) do { \ + MPI_CHECKF(MPI_STATUS, "%s", MSG); \ +} while(0) + + /******************************************************************************/ /* * System info for Windows. diff --git a/src/mdtest-main.c b/src/mdtest-main.c index 854456f6..6dbc7bdf 100644 --- a/src/mdtest-main.c +++ b/src/mdtest-main.c @@ -2,12 +2,9 @@ #include "aiori.h" int main(int argc, char **argv) { - aiori_initialize(); MPI_Init(&argc, &argv); - mdtest_run(argc, argv, MPI_COMM_WORLD, stdout); - MPI_Finalize(); - aiori_finalize(); + return 0; } diff --git a/src/mdtest.c b/src/mdtest.c index 20593d91..105837f3 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -28,7 +28,6 @@ * $Date: 2013/11/27 17:05:31 $ * $Author: brettkettering $ */ - #include #include #include @@ -37,6 +36,7 @@ #include #include #include +#include #include "option.h" #include "utilities.h" @@ -59,6 +59,11 @@ #include #include + +#if HAVE_STRINGS_H +#include +#endif + #include #include #include @@ -71,10 +76,14 @@ #include +#ifdef HAVE_LUSTRE_LUSTREAPI +#include +#endif /* HAVE_LUSTRE_LUSTREAPI */ + #define FILEMODE S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH #define DIRMODE S_IRUSR|S_IWUSR|S_IXUSR|S_IRGRP|S_IWGRP|S_IXGRP|S_IROTH|S_IXOTH -#define RELEASE_VERS "1.9.3" -#define TEST_DIR "#test-dir" +#define RELEASE_VERS META_VERSION +#define TEST_DIR "test-dir" #define ITEM_COUNT 25000 #define LLU "%lu" @@ -98,6 +107,7 @@ static char unique_rm_dir[MAX_PATHLEN]; static char unique_rm_uni_dir[MAX_PATHLEN]; static char *write_buffer; static char *read_buffer; +static char *verify_read_buffer; static char *stoneWallingStatusFile; @@ -105,6 +115,8 @@ static int barriers; static int create_only; static int stat_only; static int read_only; +static int verify_read; +static int verification_error; static int remove_only; static int leaf_only; static unsigned branch_factor; @@ -126,6 +138,7 @@ static uint64_t items_per_dir; static uint64_t num_dirs_in_tree_calc; /* this is a workaround until the overal code is refactored */ static int directory_loops; static int print_time; +static int print_rate_and_time; static int random_seed; static int shared_file; static int files_only; @@ -139,15 +152,19 @@ static size_t write_bytes; static int stone_wall_timer_seconds; static size_t read_bytes; static int sync_file; +static int call_sync; static int path_count; static int nstride; /* neighbor stride */ +static int make_node = 0; +#ifdef HAVE_LUSTRE_LUSTREAPI +static int global_dir_layout; +#endif /* HAVE_LUSTRE_LUSTREAPI */ static mdtest_results_t * summary_table; static pid_t pid; static uid_t uid; -/* just use the POSIX backend for now */ -static const char *backend_name = "POSIX"; +/* Use the POSIX backend by default */ static const ior_aiori_t *backend; static IOR_param_t param; @@ -169,18 +186,45 @@ typedef struct{ /* for making/removing unique directory && stating/deleting subdirectory */ enum {MK_UNI_DIR, STAT_SUB_DIR, READ_SUB_DIR, RM_SUB_DIR, RM_UNI_DIR}; +/* a helper function for passing debug and verbose messages. + use the MACRO as it will insert __LINE__ for you. + Pass the verbose level for root to print, then the verbose level for anyone to print. + Pass -1 to suppress the print for anyone. + Then do the standard printf stuff. This function adds the newline for you. +*/ +#define VERBOSE(root,any,...) VerboseMessage(root,any,__LINE__,__VA_ARGS__) +void VerboseMessage (int root_level, int any_level, int line, char * format, ...) { + if ((rank==0 && verbose >= root_level) || (any_level > 0 && verbose >= any_level)) { + char buffer[1024]; + va_list args; + va_start (args, format); + vsnprintf (buffer, 1024, format, args); + va_end (args); + if (root_level == 0 && any_level == -1) { + /* No header when it is just the standard output */ + fprintf( out_logfile, "%s\n", buffer ); + } else { + /* add a header when the verbose is greater than 0 */ + fprintf( out_logfile, "V-%d: Rank %3d Line %5d %s\n", root_level, rank, line, buffer ); + } + fflush(out_logfile); + } +} + +void generate_memory_pattern(char * buffer, size_t bytes){ + for(int i=0; i < bytes; i++){ + buffer[i] = i + 1; + } +} void offset_timers(double * t, int tcount) { double toffset; int i; - if (( rank == 0 ) && ( verbose >= 1 )) { - fprintf( out_logfile, "V-1: Entering offset_timers...\n" ); - fflush( out_logfile ); - } + VERBOSE(1,-1,"V-1: Entering offset_timers..." ); - toffset = MPI_Wtime() - t[tcount]; + toffset = GetTimeStamp() - t[tcount]; for (i = 0; i < tcount+1; i++) { t[i] += toffset; } @@ -192,10 +236,7 @@ void parse_dirpath(char *dirpath_arg) { int i = 0; - if (( rank == 0 ) && ( verbose >= 1 )) { - fprintf( out_logfile, "V-1: Entering parse_dirpath...\n" ); - fflush( out_logfile ); - } + VERBOSE(1,-1, "Entering parse_dirpath on %s...", dirpath_arg ); tmp = dirpath_arg; @@ -230,17 +271,25 @@ static void prep_testdir(int j, int dir_iter){ pos += sprintf(& testdir[pos], ".%d-%d", j, dir_iter); } +static void phase_end(){ + if (call_sync){ + if(! backend->sync){ + FAIL("Error, backend does not provide the sync method, but you requested to use sync.\n"); + } + backend->sync(& param); + } + + if (barriers) { + MPI_Barrier(testComm); + } +} + /* * This function copies the unique directory name for a given option to * the "to" parameter. Some memory must be allocated to the "to" parameter. */ void unique_dir_access(int opt, char *to) { - if (( rank == 0 ) && ( verbose >= 1 )) { - fprintf( out_logfile, "V-1: Entering unique_dir_access...\n" ); - fflush( out_logfile ); - } - if (opt == MK_UNI_DIR) { MPI_Barrier(testComm); sprintf( to, "%s/%s", testdir, unique_chdir_dir ); @@ -253,34 +302,28 @@ void unique_dir_access(int opt, char *to) { } else if (opt == RM_UNI_DIR) { sprintf( to, "%s/%s", testdir, unique_rm_uni_dir ); } + VERBOSE(1,-1,"Entering unique_dir_access, set it to %s", to ); } static void create_remove_dirs (const char *path, bool create, uint64_t itemNum) { char curr_item[MAX_PATHLEN]; const char *operation = create ? "create" : "remove"; - if (( rank == 0 ) && - ( verbose >= 3 ) && - (itemNum % ITEM_COUNT==0 && (itemNum != 0))) { - - fprintf(out_logfile, "V-3: %s dir: "LLU"\n", operation, itemNum); - fflush(out_logfile); + if ( (itemNum % ITEM_COUNT==0 && (itemNum != 0))) { + VERBOSE(3,5,"dir: "LLU"", operation, itemNum); } //create dirs sprintf(curr_item, "%s/dir.%s%" PRIu64, path, create ? mk_name : rm_name, itemNum); - if (rank == 0 && verbose >= 3) { - fprintf(out_logfile, "V-3: create_remove_items_helper (dirs %s): curr_item is \"%s\"\n", operation, curr_item); - fflush(out_logfile); - } + VERBOSE(3,5,"create_remove_items_helper (dirs %s): curr_item is '%s'", operation, curr_item); if (create) { if (backend->mkdir(curr_item, DIRMODE, ¶m) == -1) { - FAIL("unable to create directory"); + FAIL("unable to create directory %s", curr_item); } } else { if (backend->rmdir(curr_item, ¶m) == -1) { - FAIL("unable to remove directory"); + FAIL("unable to remove directory %s", curr_item); } } } @@ -288,20 +331,13 @@ static void create_remove_dirs (const char *path, bool create, uint64_t itemNum) static void remove_file (const char *path, uint64_t itemNum) { char curr_item[MAX_PATHLEN]; - if (( rank == 0 ) && - ( verbose >= 3 ) && - (itemNum % ITEM_COUNT==0 && (itemNum != 0))) { - - fprintf(out_logfile, "V-3: remove file: "LLU"\n", itemNum); - fflush(out_logfile); + if ( (itemNum % ITEM_COUNT==0 && (itemNum != 0))) { + VERBOSE(3,5,"remove file: "LLU"\n", itemNum); } //remove files sprintf(curr_item, "%s/file.%s"LLU"", path, rm_name, itemNum); - if (rank == 0 && verbose >= 3) { - fprintf(out_logfile, "V-3: create_remove_items_helper (non-dirs remove): curr_item is \"%s\"\n", curr_item); - fflush(out_logfile); - } + VERBOSE(3,5,"create_remove_items_helper (non-dirs remove): curr_item is '%s'", curr_item); if (!(shared_file && rank != 0)) { backend->delete (curr_item, ¶m); } @@ -309,59 +345,50 @@ static void remove_file (const char *path, uint64_t itemNum) { static void create_file (const char *path, uint64_t itemNum) { char curr_item[MAX_PATHLEN]; - void *aiori_fh; + void *aiori_fh = NULL; - if (( rank == 0 ) && - ( verbose >= 3 ) && - (itemNum % ITEM_COUNT==0 && (itemNum != 0))) { - - fprintf(out_logfile, "V-3: create file: "LLU"\n", itemNum); - fflush(out_logfile); + if ( (itemNum % ITEM_COUNT==0 && (itemNum != 0))) { + VERBOSE(3,5,"create file: "LLU"", itemNum); } //create files sprintf(curr_item, "%s/file.%s"LLU"", path, mk_name, itemNum); - if ((rank == 0 && verbose >= 3) || verbose >= 5) { - fprintf(out_logfile, "V-3: create_remove_items_helper (non-dirs create): curr_item is \"%s\"\n", curr_item); - fflush(out_logfile); - } + VERBOSE(3,5,"create_remove_items_helper (non-dirs create): curr_item is '%s'", curr_item); - if (collective_creates) { - param.openFlags = IOR_WRONLY; + param.openFlags = IOR_WRONLY; - if (rank == 0 && verbose >= 3) { - fprintf(out_logfile, "V-3: create_remove_items_helper (collective): open...\n" ); - fflush( out_logfile ); - } + if (make_node) { + int ret; + VERBOSE(3,5,"create_remove_items_helper : mknod..." ); + + ret = backend->mknod (curr_item); + if (ret != 0) + FAIL("unable to mknode file %s", curr_item); + + return; + } else if (collective_creates) { + VERBOSE(3,5,"create_remove_items_helper (collective): open..." ); aiori_fh = backend->open (curr_item, ¶m); - if (NULL == aiori_fh) { - FAIL("unable to open file"); - } + if (NULL == aiori_fh) + FAIL("unable to open file %s", curr_item); /* * !collective_creates */ } else { - param.openFlags = IOR_CREAT | IOR_WRONLY; + param.openFlags |= IOR_CREAT; param.filePerProc = !shared_file; - - if (rank == 0 && verbose >= 3) { - fprintf(out_logfile, "V-3: create_remove_items_helper (non-collective, shared): open...\n" ); - fflush( out_logfile ); - } + param.mode = FILEMODE; + VERBOSE(3,5,"create_remove_items_helper (non-collective, shared): open..." ); aiori_fh = backend->create (curr_item, ¶m); - if (NULL == aiori_fh) { - FAIL("unable to create file"); - } + if (NULL == aiori_fh) + FAIL("unable to create file %s", curr_item); } if (write_bytes > 0) { - if (rank == 0 && verbose >= 3) { - fprintf(out_logfile, "V-3: create_remove_items_helper: write...\n" ); - fflush( out_logfile ); - } + VERBOSE(3,5,"create_remove_items_helper: write..." ); /* * According to Bill Loewe, writes are only done one time, so they are always at @@ -370,15 +397,11 @@ static void create_file (const char *path, uint64_t itemNum) { param.offset = 0; param.fsyncPerWrite = sync_file; if ( write_bytes != (size_t) backend->xfer (WRITE, aiori_fh, (IOR_size_t *) write_buffer, write_bytes, ¶m)) { - FAIL("unable to write file"); + FAIL("unable to write file %s", curr_item); } } - if (rank == 0 && verbose >= 3) { - fprintf(out_logfile, "V-3: create_remove_items_helper: close...\n" ); - fflush( out_logfile ); - } - + VERBOSE(3,5,"create_remove_items_helper: close..." ); backend->close (aiori_fh, ¶m); } @@ -386,10 +409,7 @@ static void create_file (const char *path, uint64_t itemNum) { void create_remove_items_helper(const int dirs, const int create, const char *path, uint64_t itemNum, rank_progress_t * progress) { - if (( rank == 0 ) && ( verbose >= 1 )) { - fprintf( out_logfile, "V-1: Entering create_remove_items_helper...\n" ); - fflush( out_logfile ); - } + VERBOSE(1,-1,"Entering create_remove_items_helper on %s", path ); for (uint64_t i = progress->items_start; i < progress->items_per_dir ; ++i) { if (!dirs) { @@ -415,10 +435,7 @@ void create_remove_items_helper(const int dirs, const int create, const char *pa void collective_helper(const int dirs, const int create, const char* path, uint64_t itemNum, rank_progress_t * progress) { char curr_item[MAX_PATHLEN]; - if (( rank == 0 ) && ( verbose >= 1 )) { - fprintf( out_logfile, "V-1: Entering collective_helper...\n" ); - fflush( out_logfile ); - } + VERBOSE(1,-1,"Entering collective_helper on %s", path ); for (uint64_t i = progress->items_start ; i < progress->items_per_dir ; ++i) { if (dirs) { create_remove_dirs (path, create, itemNum + i); @@ -426,19 +443,17 @@ void collective_helper(const int dirs, const int create, const char* path, uint6 } sprintf(curr_item, "%s/file.%s"LLU"", path, create ? mk_name : rm_name, itemNum+i); - if (rank == 0 && verbose >= 3) { - fprintf(out_logfile, "V-3: create file: %s\n", curr_item); - fflush(out_logfile); - } + VERBOSE(3,5,"create file: %s", curr_item); if (create) { void *aiori_fh; //create files param.openFlags = IOR_WRONLY | IOR_CREAT; + param.mode = FILEMODE; aiori_fh = backend->create (curr_item, ¶m); if (NULL == aiori_fh) { - FAIL("unable to create file"); + FAIL("unable to create file %s", curr_item); } backend->close (aiori_fh, ¶m); @@ -463,19 +478,13 @@ void create_remove_items(int currDepth, const int dirs, const int create, const unsigned long long currDir = dirNum; - if (( rank == 0 ) && ( verbose >= 1 )) { - fprintf( out_logfile, "V-1: Entering create_remove_items, currDepth = %d...\n", currDepth ); - fflush( out_logfile ); - } + VERBOSE(1,-1,"Entering create_remove_items on %s, currDepth = %d...", path, currDepth ); memset(dir, 0, MAX_PATHLEN); strcpy(temp_path, path); - if (rank == 0 && verbose >= 3) { - fprintf(out_logfile, "V-3: create_remove_items (start): temp_path is \"%s\"\n", temp_path ); - fflush(out_logfile); - } + VERBOSE(3,5,"create_remove_items (start): temp_path is '%s'", temp_path ); if (currDepth == 0) { /* create items at this depth */ @@ -501,10 +510,7 @@ void create_remove_items(int currDepth, const int dirs, const int create, const strcat(temp_path, "/"); strcat(temp_path, dir); - if (rank == 0 && verbose >= 3) { - fprintf(out_logfile, "V-3: create_remove_items (for loop): temp_path is \"%s\"\n", temp_path ); - fflush(out_logfile); - } + VERBOSE(3,5,"create_remove_items (for loop): temp_path is '%s'", temp_path ); /* create the items in this branch */ if (!leaf_only || (leaf_only && currDepth == depth)) { @@ -540,10 +546,7 @@ void mdtest_stat(const int random, const int dirs, const long dir_iter, const ch uint64_t parent_dir, item_num = 0; char item[MAX_PATHLEN], temp[MAX_PATHLEN]; - if (( rank == 0 ) && ( verbose >= 1 )) { - fprintf( out_logfile, "V-1: Entering mdtest_stat...\n" ); - fflush( out_logfile ); - } + VERBOSE(1,-1,"Entering mdtest_stat on %s", path ); uint64_t stop_items = items; @@ -579,15 +582,13 @@ void mdtest_stat(const int random, const int dirs, const long dir_iter, const ch /* create name of file/dir to stat */ if (dirs) { - if (rank == 0 && verbose >= 3 && (i%ITEM_COUNT == 0) && (i != 0)) { - fprintf(out_logfile, "V-3: stat dir: "LLU"\n", i); - fflush(out_logfile); + if ( (i % ITEM_COUNT == 0) && (i != 0)) { + VERBOSE(3,5,"stat dir: "LLU"", i); } sprintf(item, "dir.%s"LLU"", stat_name, item_num); } else { - if (rank == 0 && verbose >= 3 && (i%ITEM_COUNT == 0) && (i != 0)) { - fprintf(out_logfile, "V-3: stat file: "LLU"\n", i); - fflush(out_logfile); + if ( (i % ITEM_COUNT == 0) && (i != 0)) { + VERBOSE(3,5,"stat file: "LLU"", i); } sprintf(item, "file.%s"LLU"", stat_name, item_num); } @@ -614,29 +615,9 @@ void mdtest_stat(const int random, const int dirs, const long dir_iter, const ch strcpy( item, temp ); /* below temp used to be hiername */ - if (rank == 0 && verbose >= 3) { - if (dirs) { - fprintf(out_logfile, "V-3: mdtest_stat dir : %s\n", item); - } else { - fprintf(out_logfile, "V-3: mdtest_stat file: %s\n", item); - } - fflush(out_logfile); - } - + VERBOSE(3,5,"mdtest_stat %4s: %s", (dirs ? "dir" : "file"), item); if (-1 == backend->stat (item, &buf, ¶m)) { - if (dirs) { - if ( verbose >= 3 ) { - fprintf( out_logfile, "V-3: Stat'ing directory \"%s\"\n", item ); - fflush( out_logfile ); - } - FAIL("unable to stat directory"); - } else { - if ( verbose >= 3 ) { - fprintf( out_logfile, "V-3: Stat'ing file \"%s\"\n", item ); - fflush( out_logfile ); - } - FAIL("unable to stat file"); - } + FAIL("unable to stat %s %s", dirs ? "directory" : "file", item); } } } @@ -648,17 +629,22 @@ void mdtest_read(int random, int dirs, const long dir_iter, char *path) { char item[MAX_PATHLEN], temp[MAX_PATHLEN]; void *aiori_fh; - if (( rank == 0 ) && ( verbose >= 1 )) { - fprintf( out_logfile, "V-1: Entering mdtest_read...\n" ); - fflush( out_logfile ); - } + VERBOSE(1,-1,"Entering mdtest_read on %s", path ); /* allocate read buffer */ if (read_bytes > 0) { - read_buffer = (char *)malloc(read_bytes); - if (read_buffer == NULL) { + int alloc_res = posix_memalign((void**)&read_buffer, sysconf(_SC_PAGESIZE), read_bytes); + if (alloc_res) { FAIL("out of memory"); } + + if (verify_read > 0) { + verify_read_buffer = (char *)malloc(read_bytes); + if (verify_read_buffer == NULL) { + FAIL("out of memory"); + } + generate_memory_pattern(verify_read_buffer, read_bytes); + } } uint64_t stop_items = items; @@ -696,9 +682,8 @@ void mdtest_read(int random, int dirs, const long dir_iter, char *path) { /* create name of file to read */ if (!dirs) { - if (rank == 0 && verbose >= 3 && (i%ITEM_COUNT == 0) && (i != 0)) { - fprintf(out_logfile, "V-3: read file: "LLU"\n", i); - fflush(out_logfile); + if ((i%ITEM_COUNT == 0) && (i != 0)) { + VERBOSE(3,5,"read file: "LLU"", i); } sprintf(item, "file.%s"LLU"", read_name, item_num); } @@ -725,24 +710,26 @@ void mdtest_read(int random, int dirs, const long dir_iter, char *path) { strcpy( item, temp ); /* below temp used to be hiername */ - if (rank == 0 && verbose >= 3) { - if (!dirs) { - fprintf(out_logfile, "V-3: mdtest_read file: %s\n", item); - } - fflush(out_logfile); - } + VERBOSE(3,5,"mdtest_read file: %s", item); /* open file for reading */ param.openFlags = O_RDONLY; aiori_fh = backend->open (item, ¶m); if (NULL == aiori_fh) { - FAIL("unable to open file"); + FAIL("unable to open file %s", item); } /* read file */ if (read_bytes > 0) { + read_buffer[0] = 42; /* use a random value to ensure that the read_buffer is now different from the expected buffer and read isn't sometimes NOOP */ if (read_bytes != (size_t) backend->xfer (READ, aiori_fh, (IOR_size_t *) read_buffer, read_bytes, ¶m)) { - FAIL("unable to read file"); + FAIL("unable to read file %s", item); + } + if(verify_read){ + if (memcmp(read_buffer, verify_read_buffer, read_bytes) != 0){ + VERBOSE(2, -1, "Error verifying %s", item); + verification_error++; + } } } @@ -756,10 +743,7 @@ void mdtest_read(int random, int dirs, const long dir_iter, char *path) { void collective_create_remove(const int create, const int dirs, const int ntasks, const char *path, rank_progress_t * progress) { char temp[MAX_PATHLEN]; - if (( rank == 0 ) && ( verbose >= 1 )) { - fprintf( out_logfile, "V-1: Entering collective_create_remove...\n" ); - fflush( out_logfile ); - } + VERBOSE(1,-1,"Entering collective_create_remove on %s", path ); /* rank 0 does all of the creates and removes for all of the ranks */ for (int i = 0 ; i < ntasks ; ++i) { @@ -787,6 +771,7 @@ void collective_create_remove(const int create, const int dirs, const int ntasks sprintf(rm_name, "mdtest.%d.", (i+(3*nstride))%ntasks); } if (unique_dir_per_task) { + VERBOSE(3,5,"i %d nstride %d ntasks %d", i, nstride, ntasks); sprintf(unique_mk_dir, "%s/mdtest_tree.%d.0", testdir, (i+(0*nstride))%ntasks); sprintf(unique_chdir_dir, "%s/mdtest_tree.%d.0", testdir, @@ -801,10 +786,7 @@ void collective_create_remove(const int create, const int dirs, const int ntasks } /* Now that everything is set up as it should be, do the create or remove */ - if (rank == 0 && verbose >= 3) { - fprintf(out_logfile, "V-3: collective_create_remove (create_remove_items): temp is \"%s\"\n", temp); - fflush( out_logfile ); - } + VERBOSE(3,5,"collective_create_remove (create_remove_items): temp is '%s'", temp); create_remove_items(0, dirs, create, 1, temp, 0, progress); } @@ -843,13 +825,10 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran MPI_Comm_size(testComm, &size); - if (( rank == 0 ) && ( verbose >= 1 )) { - fprintf( out_logfile, "V-1: Entering directory_test...\n" ); - fflush( out_logfile ); - } + VERBOSE(1,-1,"Entering directory_test on %s", path ); MPI_Barrier(testComm); - t[0] = MPI_Wtime(); + t[0] = GetTimeStamp(); /* create phase */ if(create_only) { @@ -864,10 +843,7 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran sprintf( temp_path, "%s/%s", testdir, path ); } - if (verbose >= 3 && rank == 0) { - fprintf(out_logfile, "V-3: directory_test: create path is \"%s\"\n", temp_path ); - fflush( out_logfile ); - } + VERBOSE(3,-1,"directory_test: create path is '%s'", temp_path ); /* "touch" the files */ if (collective_creates) { @@ -881,10 +857,8 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran } } - if (barriers) { - MPI_Barrier(testComm); - } - t[1] = MPI_Wtime(); + phase_end(); + t[1] = GetTimeStamp(); /* stat phase */ if (stat_only) { @@ -899,10 +873,7 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran sprintf( temp_path, "%s/%s", testdir, path ); } - if (verbose >= 3 && rank == 0) { - fprintf(out_logfile, "V-3: directory_test: stat path is \"%s\"\n", temp_path ); - fflush( out_logfile ); - } + VERBOSE(3,5,"stat path is '%s'", temp_path ); /* stat directories */ if (random_seed > 0) { @@ -912,11 +883,8 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran } } } - - if (barriers) { - MPI_Barrier(testComm); - } - t[2] = MPI_Wtime(); + phase_end(); + t[2] = GetTimeStamp(); /* read phase */ if (read_only) { @@ -931,10 +899,7 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran sprintf( temp_path, "%s/%s", testdir, path ); } - if (verbose >= 3 && rank == 0) { - fprintf(out_logfile, "V-3: directory_test: read path is \"%s\"\n", temp_path ); - fflush( out_logfile ); - } + VERBOSE(3,5,"directory_test: read path is '%s'", temp_path ); /* read directories */ if (random_seed > 0) { @@ -945,10 +910,8 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran } } - if (barriers) { - MPI_Barrier(testComm); - } - t[3] = MPI_Wtime(); + phase_end(); + t[3] = GetTimeStamp(); if (remove_only) { for (int dir_iter = 0; dir_iter < directory_loops; dir_iter ++){ @@ -962,10 +925,7 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran sprintf( temp_path, "%s/%s", testdir, path ); } - if (verbose >= 3 && rank == 0) { - fprintf(out_logfile, "V-3: directory_test: remove directories path is \"%s\"\n", temp_path ); - fflush( out_logfile ); - } + VERBOSE(3,5,"directory_test: remove directories path is '%s'", temp_path ); /* remove directories */ if (collective_creates) { @@ -978,10 +938,8 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran } } - if (barriers) { - MPI_Barrier(testComm); - } - t[4] = MPI_Wtime(); + phase_end(); + t[4] = GetTimeStamp(); if (remove_only) { if (unique_dir_per_task) { @@ -990,10 +948,7 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran sprintf( temp_path, "%s/%s", testdir, path ); } - if (verbose >= 3 && rank == 0) { - fprintf(out_logfile, "V-3: directory_test: remove unique directories path is \"%s\"\n", temp_path ); - fflush( out_logfile ); - } + VERBOSE(3,5,"directory_test: remove unique directories path is '%s'\n", temp_path ); } if (unique_dir_per_task && !time_unique_dir_overhead) { @@ -1026,46 +981,34 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran summary_table[iteration].stonewall_last_item[3] = items; } - if (verbose >= 1 && rank == 0) { - fprintf(out_logfile, "V-1: Directory creation: %14.3f sec, %14.3f ops/sec\n", - t[1] - t[0], summary_table[iteration].rate[0]); - fprintf(out_logfile, "V-1: Directory stat : %14.3f sec, %14.3f ops/sec\n", - t[2] - t[1], summary_table[iteration].rate[1]); -/* N/A - fprintf(out_logfile, "V-1: Directory read : %14.3f sec, %14.3f ops/sec\n", - t[3] - t[2], summary_table[iteration].rate[2]); -*/ - fprintf(out_logfile, "V-1: Directory removal : %14.3f sec, %14.3f ops/sec\n", - t[4] - t[3], summary_table[iteration].rate[3]); - fflush(out_logfile); - } + VERBOSE(1,-1," Directory creation: %14.3f sec, %14.3f ops/sec", t[1] - t[0], summary_table[iteration].rate[0]); + VERBOSE(1,-1," Directory stat : %14.3f sec, %14.3f ops/sec", t[2] - t[1], summary_table[iteration].rate[1]); + /* N/A + VERBOSE(1,-1," Directory read : %14.3f sec, %14.3f ops/sec", t[3] - t[2], summary_table[iteration].rate[2]); + */ + VERBOSE(1,-1," Directory removal : %14.3f sec, %14.3f ops/sec", t[4] - t[3], summary_table[iteration].rate[3]); } /* Returns if the stonewall was hit */ int updateStoneWallIterations(int iteration, rank_progress_t * progress, double tstart){ int hit = 0; - if (verbose >= 1 ) { - fprintf( out_logfile, "V-1: rank %d stonewall hit with %lld items\n", rank, (long long) progress->items_done ); - fflush( out_logfile ); - } uint64_t done = progress->items_done; long long unsigned max_iter = 0; + + VERBOSE(1,1,"stonewall hit with %lld items", (long long) progress->items_done ); MPI_Allreduce(& progress->items_done, & max_iter, 1, MPI_LONG_LONG_INT, MPI_MAX, testComm); - summary_table[iteration].stonewall_time[MDTEST_FILE_CREATE_NUM] = MPI_Wtime() - tstart; + summary_table[iteration].stonewall_time[MDTEST_FILE_CREATE_NUM] = GetTimeStamp() - tstart; // continue to the maximum... long long min_accessed = 0; MPI_Reduce(& progress->items_done, & min_accessed, 1, MPI_LONG_LONG_INT, MPI_MIN, 0, testComm); long long sum_accessed = 0; MPI_Reduce(& progress->items_done, & sum_accessed, 1, MPI_LONG_LONG_INT, MPI_SUM, 0, testComm); + summary_table[iteration].stonewall_item_sum[MDTEST_FILE_CREATE_NUM] = sum_accessed; + summary_table[iteration].stonewall_item_min[MDTEST_FILE_CREATE_NUM] = min_accessed * size; if(items != (sum_accessed / size)){ - summary_table[iteration].stonewall_item_sum[MDTEST_FILE_CREATE_NUM] = sum_accessed; - summary_table[iteration].stonewall_item_min[MDTEST_FILE_CREATE_NUM] = min_accessed * size; - if (rank == 0){ - fprintf( out_logfile, "Continue stonewall hit min: %lld max: %lld avg: %.1f \n", min_accessed, max_iter, ((double) sum_accessed) / size); - fflush( out_logfile ); - } + VERBOSE(0,-1, "Continue stonewall hit min: %lld max: %lld avg: %.1f \n", min_accessed, max_iter, ((double) sum_accessed) / size); hit = 1; } progress->items_start = done; @@ -1080,13 +1023,10 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro char temp_path[MAX_PATHLEN]; MPI_Comm_size(testComm, &size); - if (( rank == 0 ) && ( verbose >= 1 )) { - fprintf( out_logfile, "V-1: Entering file_test...\n" ); - fflush( out_logfile ); - } + VERBOSE(3,5,"Entering file_test on %s", path); MPI_Barrier(testComm); - t[0] = MPI_Wtime(); + t[0] = GetTimeStamp(); /* create phase */ if (create_only ) { @@ -1095,6 +1035,7 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro if (unique_dir_per_task) { unique_dir_access(MK_UNI_DIR, temp_path); + VERBOSE(5,5,"operating on %s", temp_path); if (!time_unique_dir_overhead) { offset_timers(t, 0); } @@ -1102,10 +1043,9 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro sprintf( temp_path, "%s/%s", testdir, path ); } - if (verbose >= 3 && rank == 0) { - fprintf(out_logfile, "V-3: file_test: create path is \"%s\"\n", temp_path ); - fflush( out_logfile ); - } + + + VERBOSE(3,-1,"file_test: create path is '%s'", temp_path ); /* "touch" the files */ if (collective_creates) { @@ -1122,9 +1062,7 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro if (hit){ progress->stone_wall_timer_seconds = 0; - if (verbose > 1){ - printf("stonewall rank %d: %lld of %lld \n", rank, (long long) progress->items_start, (long long) progress->items_per_dir); - } + VERBOSE(1,1,"stonewall: %lld of %lld", (long long) progress->items_start, (long long) progress->items_per_dir); create_remove_items(0, 0, 1, 0, temp_path, 0, progress); // now reset the values progress->stone_wall_timer_seconds = stone_wall_timer_seconds; @@ -1149,17 +1087,15 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro if (rank == 0) { if(expected_items == -1){ fprintf(out_logfile, "WARNING: could not read stonewall status file\n"); - }else if(verbose >= 1){ - fprintf(out_logfile, "Read stonewall status; items: "LLU"\n", items); + }else { + VERBOSE(1,1, "Read stonewall status; items: "LLU"\n", items); } } } } - if (barriers) { - MPI_Barrier(testComm); - } - t[1] = MPI_Wtime(); + phase_end(); + t[1] = GetTimeStamp(); /* stat phase */ if (stat_only ) { @@ -1174,20 +1110,15 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro sprintf( temp_path, "%s/%s", testdir, path ); } - if (verbose >= 3 && rank == 0) { - fprintf(out_logfile, "V-3: file_test: stat path is \"%s\"\n", temp_path ); - fflush( out_logfile ); - } + VERBOSE(3,5,"file_test: stat path is '%s'", temp_path ); /* stat files */ mdtest_stat((random_seed > 0 ? 1 : 0), 0, dir_iter, temp_path, progress); } } - if (barriers) { - MPI_Barrier(testComm); - } - t[2] = MPI_Wtime(); + phase_end(); + t[2] = GetTimeStamp(); /* read phase */ if (read_only ) { @@ -1202,10 +1133,7 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro sprintf( temp_path, "%s/%s", testdir, path ); } - if (verbose >= 3 && rank == 0) { - fprintf(out_logfile, "V-3: file_test: read path is \"%s\"\n", temp_path ); - fflush( out_logfile ); - } + VERBOSE(3,5,"file_test: read path is '%s'", temp_path ); /* read files */ if (random_seed > 0) { @@ -1216,10 +1144,8 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro } } - if (barriers) { - MPI_Barrier(testComm); - } - t[3] = MPI_Wtime(); + phase_end(); + t[3] = GetTimeStamp(); if (remove_only) { progress->items_start = 0; @@ -1235,25 +1161,21 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro sprintf( temp_path, "%s/%s", testdir, path ); } - if (verbose >= 3 && rank == 0) { - fprintf(out_logfile, "V-3: file_test: rm directories path is \"%s\"\n", temp_path ); - fflush( out_logfile ); - } + VERBOSE(3,5,"file_test: rm directories path is '%s'", temp_path ); if (collective_creates) { if (rank == 0) { collective_create_remove(0, 0, ntasks, temp_path, progress); } } else { + VERBOSE(3,5,"gonna create %s", temp_path); create_remove_items(0, 0, 0, 0, temp_path, 0, progress); } } } - if (barriers) { - MPI_Barrier(testComm); - } - t[4] = MPI_Wtime(); + phase_end(); + t[4] = GetTimeStamp(); if (remove_only) { if (unique_dir_per_task) { unique_dir_access(RM_UNI_DIR, temp_path); @@ -1261,10 +1183,7 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro strcpy( temp_path, path ); } - if (verbose >= 3 && rank == 0) { - fprintf(out_logfile, "V-3: file_test: rm unique directories path is \"%s\"\n", temp_path ); - fflush( out_logfile ); - } + VERBOSE(3,5,"file_test: rm unique directories path is '%s'", temp_path ); } if (unique_dir_per_task && !time_unique_dir_overhead) { @@ -1301,78 +1220,16 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro summary_table[iteration].stonewall_last_item[7] = items; } - if (verbose >= 1 && rank == 0) { - fprintf(out_logfile, "V-1: File creation : %14.3f sec, %14.3f ops/sec\n", - t[1] - t[0], summary_table[iteration].rate[4]); - fprintf(out_logfile, "V-1: File stat : %14.3f sec, %14.3f ops/sec\n", - t[2] - t[1], summary_table[iteration].rate[5]); - fprintf(out_logfile, "V-1: File read : %14.3f sec, %14.3f ops/sec\n", - t[3] - t[2], summary_table[iteration].rate[6]); - fprintf(out_logfile, "V-1: File removal : %14.3f sec, %14.3f ops/sec\n", - t[4] - t[3], summary_table[iteration].rate[7]); - fflush(out_logfile); + VERBOSE(1,-1," File creation : %14.3f sec, %14.3f ops/sec", t[1] - t[0], summary_table[iteration].rate[4]); + if(summary_table[iteration].stonewall_time[MDTEST_FILE_CREATE_NUM]){ + VERBOSE(1,-1," File creation (stonewall): %14.3f sec, %14.3f ops/sec", summary_table[iteration].stonewall_time[MDTEST_FILE_CREATE_NUM], summary_table[iteration].stonewall_item_sum[MDTEST_FILE_CREATE_NUM]); } + VERBOSE(1,-1," File stat : %14.3f sec, %14.3f ops/sec", t[2] - t[1], summary_table[iteration].rate[5]); + VERBOSE(1,-1," File read : %14.3f sec, %14.3f ops/sec", t[3] - t[2], summary_table[iteration].rate[6]); + VERBOSE(1,-1," File removal : %14.3f sec, %14.3f ops/sec", t[4] - t[3], summary_table[iteration].rate[7]); } -void print_help (void) { - int j; - - char APIs[1024]; - aiori_supported_apis(APIs); - char apiStr[1024]; - sprintf(apiStr, "API for I/O [%s]", APIs); - - fprintf(out_logfile, - "Usage: mdtest [-b branching_factor] [-B] [-c] [-C] [-d testdir] [-D] [-e number_of_bytes_to_read]\n" - " [-E] [-f first] [-F] [-h] [-i iterations] [-I items_per_dir] [-l last] [-L]\n" - " [-n number_of_items] [-N stride_length] [-p seconds] [-r]\n" - " [-R[seed]] [-s stride] [-S] [-t] [-T] [-u] [-v] [-a API]\n" - " [-V verbosity_value] [-w number_of_bytes_to_write] [-W seconds] [-y] [-z depth] -Z\n" - "\t-a: %s\n" - "\t-b: branching factor of hierarchical directory structure\n" - "\t-B: no barriers between phases\n" - "\t-c: collective creates: task 0 does all creates\n" - "\t-C: only create files/dirs\n" - "\t-d: the directory in which the tests will run\n" - "\t-D: perform test on directories only (no files)\n" - "\t-e: bytes to read from each file\n" - "\t-E: only read files/dir\n" - "\t-f: first number of tasks on which the test will run\n" - "\t-F: perform test on files only (no directories)\n" - "\t-h: prints this help message\n" - "\t-i: number of iterations the test will run\n" - "\t-I: number of items per directory in tree\n" - "\t-l: last number of tasks on which the test will run\n" - "\t-L: files only at leaf level of tree\n" - "\t-n: every process will creat/stat/read/remove # directories and files\n" - "\t-N: stride # between neighbor tasks for file/dir operation (local=0)\n" - "\t-p: pre-iteration delay (in seconds)\n" - "\t-r: only remove files or directories left behind by previous runs\n" - "\t-R: randomly stat files (optional argument for random seed)\n" - "\t-s: stride between the number of tasks for each test\n" - "\t-S: shared file access (file only, no directories)\n" - "\t-t: time unique working directory overhead\n" - "\t-T: only stat files/dirs\n" - "\t-u: unique working directory for each task\n" - "\t-v: verbosity (each instance of option increments by one)\n" - "\t-V: verbosity value\n" - "\t-w: bytes to write to each file after it is created\n" - "\t-W: number in seconds; stonewall timer, write as many seconds and ensure all processes did the same number of operations (currently only stops during create phase)\n" - "\t-x: StoneWallingStatusFile; contains the number of iterations of the creation phase, can be used to split phases across runs\n" - "\t-y: sync file after writing\n" - "\t-z: depth of hierarchical directory structure\n" - "\t-Z: print time instead of rate\n", - apiStr - ); - - MPI_Initialized(&j); - if (j) { - MPI_Finalize(); - } - exit(0); -} - -void summarize_results(int iterations) { +void summarize_results(int iterations, int print_time) { char access[MAX_PATHLEN]; int i, j, k; int start, stop, tableSize = MDTEST_LAST_NUM; @@ -1381,10 +1238,7 @@ void summarize_results(int iterations) { double all[iterations * size * tableSize]; - if (( rank == 0 ) && ( verbose >= 1 )) { - fprintf( out_logfile, "V-1: Entering summarize_results...\n" ); - fflush( out_logfile ); - } + VERBOSE(1,-1,"Entering summarize_results..." ); MPI_Barrier(testComm); for(int i=0; i < iterations; i++){ @@ -1395,187 +1249,137 @@ void summarize_results(int iterations) { } } - if (rank == 0) { - - fprintf(out_logfile, "\nSUMMARY %s: (of %d iterations)\n", print_time ? "time": "rate", iterations); - fprintf(out_logfile, - " Operation Max Min Mean Std Dev\n"); - fprintf(out_logfile, - " --------- --- --- ---- -------\n"); - fflush(out_logfile); - - /* if files only access, skip entries 0-3 (the dir tests) */ - if (files_only && !dirs_only) { - start = 4; - } else { - start = 0; - } - - /* if directories only access, skip entries 4-7 (the file tests) */ - if (dirs_only && !files_only) { - stop = 4; - } else { - stop = 8; - } + if (rank != 0) { + return; + } - /* special case: if no directory or file tests, skip all */ - if (!dirs_only && !files_only) { - start = stop = 0; - } + VERBOSE(0,-1,"\nSUMMARY %s: (of %d iterations)", print_time ? "time": "rate", iterations); + VERBOSE(0,-1," Operation Max Min Mean Std Dev"); + VERBOSE(0,-1," --------- --- --- ---- -------"); - /* calculate aggregates */ - if (barriers) { - double maxes[iterations]; + /* if files only access, skip entries 0-3 (the dir tests) */ + if (files_only && !dirs_only) { + start = 4; + } else { + start = 0; + } + /* if directories only access, skip entries 4-7 (the file tests) */ + if (dirs_only && !files_only) { + stop = 4; + } else { + stop = 8; + } - /* Because each proc times itself, in the case of barriers we - * have to backwards calculate the time to simulate the use - * of barriers. - */ - for (i = start; i < stop; i++) { - for (j=0; j maxes[j]) { - min = maxes[j]; + for (i = start; i < stop; i++) { + min = max = all[i]; + for (k=0; k < size; k++) { + for (j = 0; j < iterations; j++) { + curr = all[(k*tableSize*iterations) + + (j*tableSize) + i]; + if (min > curr) { + min = curr; } - if (max < maxes[j]) { - max = maxes[j]; + if (max < curr) { + max = curr; } - sum += maxes[j]; + sum += curr; } - mean = sum / iterations; - for (j=0; j curr) { - min = curr; - } - if (max < curr) { - max = curr; - } - sum += curr; - } - } - mean = sum / (iterations * size); - for (k=0; k curr) { - min = curr; - } - if (max < curr) { - max = curr; - } - sum += curr; + // TODO generalize once more stonewall timers are supported + double stonewall_time = 0; + uint64_t stonewall_items = 0; + for(int i=0; i < iterations; i++){ + if(summary_table[i].stonewall_time[MDTEST_FILE_CREATE_NUM]){ + stonewall_time += summary_table[i].stonewall_time[MDTEST_FILE_CREATE_NUM]; + stonewall_items += summary_table[i].stonewall_item_sum[MDTEST_FILE_CREATE_NUM]; + } + } + if(stonewall_items != 0){ + fprintf(out_logfile, " File create (stonewall) : "); + fprintf(out_logfile, "%14s %14s %14.3f %14s\n", "NA", "NA", print_time ? stonewall_time : stonewall_items / stonewall_time, "NA"); + } + + /* calculate tree create/remove rates */ + for (i = 8; i < tableSize; i++) { + min = max = all[i]; + for (j = 0; j < iterations; j++) { + if(print_time){ + curr = summary_table[j].time[i]; + }else{ + curr = summary_table[j].rate[i]; } - mean = sum / (iterations); - for (j = 0; j < iterations; j++) { - if(print_time){ - curr = summary_table[j].time[i]; - }else{ - curr = summary_table[j].rate[i]; - } - var += pow((mean - curr), 2); + if (min > curr) { + min = curr; } - var = var / (iterations); - sd = sqrt(var); - switch (i) { - case 8: strcpy(access, "Tree creation :"); break; - case 9: strcpy(access, "Tree removal :"); break; - default: strcpy(access, "ERR"); break; + if (max < curr) { + max = curr; } - fprintf(out_logfile, " %s ", access); - fprintf(out_logfile, "%14.3f ", max); - fprintf(out_logfile, "%14.3f ", min); - fprintf(out_logfile, "%14.3f ", mean); - fprintf(out_logfile, "%14.3f\n", sd); - fflush(out_logfile); - sum = var = 0; + sum += curr; + } + mean = sum / (iterations); + for (j = 0; j < iterations; j++) { + if(print_time){ + curr = summary_table[j].time[i]; + }else{ + curr = summary_table[j].rate[i]; + } + + var += pow((mean - curr), 2); + } + var = var / (iterations); + sd = sqrt(var); + switch (i) { + case 8: strcpy(access, "Tree creation :"); break; + case 9: strcpy(access, "Tree removal :"); break; + default: strcpy(access, "ERR"); break; } + fprintf(out_logfile, " %s ", access); + fprintf(out_logfile, "%14.3f ", max); + fprintf(out_logfile, "%14.3f ", min); + fprintf(out_logfile, "%14.3f ", mean); + fprintf(out_logfile, "%14.3f\n", sd); + fflush(out_logfile); + sum = var = 0; } } @@ -1583,22 +1387,15 @@ void summarize_results(int iterations) { void valid_tests() { if (((stone_wall_timer_seconds > 0) && (branch_factor > 1)) || ! barriers) { - fprintf(out_logfile, "Error, stone wall timer does only work with a branch factor <= 1 and with barriers\n"); - MPI_Abort(testComm, 1); + FAIL( "Error, stone wall timer does only work with a branch factor <= 1 (current is %d) and with barriers\n", branch_factor); } if (!create_only && !stat_only && !read_only && !remove_only) { create_only = stat_only = read_only = remove_only = 1; - if (( rank == 0 ) && ( verbose >= 1 )) { - fprintf( out_logfile, "V-1: main: Setting create/stat/read/remove_only to True\n" ); - fflush( out_logfile ); - } + VERBOSE(1,-1,"main: Setting create/stat/read/remove_only to True" ); } - if (( rank == 0 ) && ( verbose >= 1 )) { - fprintf( out_logfile, "V-1: Entering valid_tests...\n" ); - fflush( out_logfile ); - } + VERBOSE(1,-1,"Entering valid_tests..." ); /* if dirs_only and files_only were both left unset, set both now */ if (!dirs_only && !files_only) { @@ -1630,10 +1427,6 @@ void valid_tests() { FAIL("-c not compatible with -B"); } - if ( strcasecmp(backend_name, "POSIX") != 0 && strcasecmp(backend_name, "DUMMY") != 0) { - FAIL("-a only supported interface is POSIX (and DUMMY) right now!"); - } - /* check for shared file incompatibilities */ if (unique_dir_per_task && shared_file && rank == 0) { FAIL("-u not compatible with -S"); @@ -1672,7 +1465,10 @@ void valid_tests() { FAIL("items + items_per_dir can only be set without stonewalling"); } } - + /* check for using mknod */ + if (write_bytes > 0 && make_node) { + FAIL("-k not compatible with -w"); + } } void show_file_system_size(char *file_system) { @@ -1691,14 +1487,11 @@ void show_file_system_size(char *file_system) { ior_aiori_statfs_t stat_buf; int ret; - if (( rank == 0 ) && ( verbose >= 1 )) { - fprintf( out_logfile, "V-1: Entering show_file_system_size...\n" ); - fflush( out_logfile ); - } + VERBOSE(1,-1,"Entering show_file_system_size on %s", file_system ); ret = backend->statfs (file_system, &stat_buf, ¶m); if (0 != ret) { - FAIL("unable to stat file system"); + FAIL("unable to stat file system %s", file_system); } total_file_system_size = stat_buf.f_blocks * stat_buf.f_bsize; @@ -1721,19 +1514,15 @@ void show_file_system_size(char *file_system) { * 100; if (realpath(file_system, real_path) == NULL) { - FAIL("unable to use realpath()"); + WARN("unable to use realpath() on file system"); } /* show results */ - fprintf(out_logfile, "Path: %s\n", real_path); - fprintf(out_logfile, "FS: %.1f %s Used FS: %2.1f%% ", - total_file_system_size_hr, file_system_unit_str, - used_file_system_percentage); - fprintf(out_logfile, "Inodes: %.1f %s Used Inodes: %2.1f%%\n", - (double)total_inodes / (double)inode_unit_val, - inode_unit_str, used_inode_percentage); - fflush(out_logfile); + VERBOSE(0,-1,"Path: %s", real_path); + VERBOSE(0,-1,"FS: %.1f %s Used FS: %2.1f%% Inodes: %.1f %s Used Inodes: %2.1f%%\n", + total_file_system_size_hr, file_system_unit_str, used_file_system_percentage, + (double)total_inodes / (double)inode_unit_val, inode_unit_str, used_inode_percentage); return; } @@ -1745,15 +1534,7 @@ void display_freespace(char *testdirpath) int directoryFound = 0; - if (( rank == 0 ) && ( verbose >= 1 )) { - fprintf( out_logfile, "V-1: Entering display_freespace...\n" ); - fflush( out_logfile ); - } - - if (verbose >= 3 && rank == 0) { - fprintf(out_logfile, "V-3: testdirpath is \"%s\"\n", testdirpath ); - fflush( out_logfile ); - } + VERBOSE(3,5,"Entering display_freespace on %s...", testdirpath ); strcpy(dirpath, testdirpath); @@ -1772,17 +1553,12 @@ void display_freespace(char *testdirpath) strcpy(dirpath, "."); } - if (verbose >= 3 && rank == 0) { - fprintf(out_logfile, "V-3: Before show_file_system_size, dirpath is \"%s\"\n", dirpath ); - fflush( out_logfile ); - } + if (param.api && strcasecmp(param.api, "DFS") == 0) + return; + VERBOSE(3,5,"Before show_file_system_size, dirpath is '%s'", dirpath ); show_file_system_size(dirpath); - - if (verbose >= 3 && rank == 0) { - fprintf(out_logfile, "V-3: After show_file_system_size, dirpath is \"%s\"\n", dirpath ); - fflush( out_logfile ); - } + VERBOSE(3,5, "After show_file_system_size, dirpath is '%s'\n", dirpath ); return; } @@ -1794,35 +1570,33 @@ void create_remove_directory_tree(int create, char dir[MAX_PATHLEN]; - if (( rank == 0 ) && ( verbose >= 1 )) { - fprintf( out_logfile, "V-1: Entering create_remove_directory_tree, currDepth = %d...\n", currDepth ); - fflush( out_logfile ); - } + VERBOSE(1,5,"Entering create_remove_directory_tree on %s, currDepth = %d...", path, currDepth ); if (currDepth == 0) { sprintf(dir, "%s/%s.%d/", path, base_tree_name, dirNum); if (create) { - if (rank == 0 && verbose >= 2) { - fprintf(out_logfile, "V-2: Making directory \"%s\"\n", dir); - fflush(out_logfile); - } - + VERBOSE(2,5,"Making directory '%s'", dir); if (-1 == backend->mkdir (dir, DIRMODE, ¶m)) { - fprintf(out_logfile, "error could not create directory \"%s\"\n", dir); + fprintf(out_logfile, "error could not create directory '%s'\n", dir); } +#ifdef HAVE_LUSTRE_LUSTREAPI + /* internal node for branching, can be non-striped for children */ + if (global_dir_layout && \ + llapi_dir_set_default_lmv_stripe(dir, -1, 0, + LMV_HASH_TYPE_FNV_1A_64, + NULL) == -1) { + FAIL("Unable to reset to global default directory layout"); + } +#endif /* HAVE_LUSTRE_LUSTREAPI */ } create_remove_directory_tree(create, ++currDepth, dir, ++dirNum, progress); if (!create) { - if (rank == 0 && verbose >= 2) { - fprintf(out_logfile, "V-2: Remove directory \"%s\"\n", dir); - fflush(out_logfile); - } - + VERBOSE(2,5,"Remove directory '%s'", dir); if (-1 == backend->rmdir(dir, ¶m)) { - FAIL("Unable to remove directory"); + FAIL("Unable to remove directory %s", dir); } } } else if (currDepth <= depth) { @@ -1836,13 +1610,9 @@ void create_remove_directory_tree(int create, strcat(temp_path, dir); if (create) { - if (rank == 0 && verbose >= 2) { - fprintf(out_logfile, "V-2: Making directory \"%s\"\n", temp_path); - fflush(out_logfile); - } - + VERBOSE(2,5,"Making directory '%s'", temp_path); if (-1 == backend->mkdir(temp_path, DIRMODE, ¶m)) { - FAIL("Unable to create directory"); + FAIL("Unable to create directory %s", temp_path); } } @@ -1851,13 +1621,9 @@ void create_remove_directory_tree(int create, currDepth--; if (!create) { - if (rank == 0 && verbose >= 2) { - fprintf(out_logfile, "V-2: Remove directory \"%s\"\n", temp_path); - fflush(out_logfile); - } - + VERBOSE(2,5,"Remove directory '%s'", temp_path); if (-1 == backend->rmdir(temp_path, ¶m)) { - FAIL("Unable to remove directory"); + FAIL("Unable to remove directory %s", temp_path); } } @@ -1879,22 +1645,22 @@ static void mdtest_iteration(int i, int j, MPI_Group testgroup, mdtest_results_t double startCreate, endCreate; int k; - if (rank == 0 && verbose >= 1) { - fprintf(out_logfile, "V-1: main: * iteration %d *\n", j+1); - fflush(out_logfile); - } + VERBOSE(1,-1,"main: * iteration %d *", j+1); for (int dir_iter = 0; dir_iter < directory_loops; dir_iter ++){ prep_testdir(j, dir_iter); - if (verbose >= 2 && rank == 0) { - fprintf(out_logfile, "V-2: main (for j loop): making testdir, \"%s\"\n", testdir ); - fflush( out_logfile ); - } + VERBOSE(2,5,"main (for j loop): making testdir, '%s'", testdir ); if ((rank < path_count) && backend->access(testdir, F_OK, ¶m) != 0) { if (backend->mkdir(testdir, DIRMODE, ¶m) != 0) { - FAIL("Unable to create test directory"); + FAIL("Unable to create test directory %s", testdir); } +#ifdef HAVE_LUSTRE_LUSTREAPI + /* internal node for branching, can be non-striped for children */ + if (global_dir_layout && unique_dir_per_task && llapi_dir_set_default_lmv_stripe(testdir, -1, 0, LMV_HASH_TYPE_FNV_1A_64, NULL) == -1) { + FAIL("Unable to reset to global default directory layout"); + } +#endif /* HAVE_LUSTRE_LUSTREAPI */ } } @@ -1902,7 +1668,7 @@ static void mdtest_iteration(int i, int j, MPI_Group testgroup, mdtest_results_t /* create hierarchical directory structure */ MPI_Barrier(testComm); - startCreate = MPI_Wtime(); + startCreate = GetTimeStamp(); for (int dir_iter = 0; dir_iter < directory_loops; dir_iter ++){ prep_testdir(j, dir_iter); @@ -1915,13 +1681,7 @@ static void mdtest_iteration(int i, int j, MPI_Group testgroup, mdtest_results_t for (k=0; k= 3 && rank == 0) { - fprintf(out_logfile, - "V-3: main (create hierarchical directory loop-collective): Calling create_remove_directory_tree with \"%s\"\n", - testdir ); - fflush( out_logfile ); - } - + VERBOSE(3,5,"main (create hierarchical directory loop-collective): Calling create_remove_directory_tree with '%s'", testdir ); /* * Let's pass in the path to the directory we most recently made so that we can use * full paths in the other calls. @@ -1933,13 +1693,7 @@ static void mdtest_iteration(int i, int j, MPI_Group testgroup, mdtest_results_t } } } else if (!collective_creates) { - if (verbose >= 3 && rank == 0) { - fprintf(out_logfile, - "V-3: main (create hierarchical directory loop-!collective_creates): Calling create_remove_directory_tree with \"%s\"\n", - testdir ); - fflush( out_logfile ); - } - + VERBOSE(3,5,"main (create hierarchical directory loop-!collective_creates): Calling create_remove_directory_tree with '%s'", testdir ); /* * Let's pass in the path to the directory we most recently made so that we can use * full paths in the other calls. @@ -1948,12 +1702,7 @@ static void mdtest_iteration(int i, int j, MPI_Group testgroup, mdtest_results_t } } else { if (rank == 0) { - if (verbose >= 3 && rank == 0) { - fprintf(out_logfile, - "V-3: main (create hierarchical directory loop-!unque_dir_per_task): Calling create_remove_directory_tree with \"%s\"\n", - testdir ); - fflush( out_logfile ); - } + VERBOSE(3,5,"main (create hierarchical directory loop-!unque_dir_per_task): Calling create_remove_directory_tree with '%s'", testdir ); /* * Let's pass in the path to the directory we most recently made so that we can use @@ -1964,17 +1713,13 @@ static void mdtest_iteration(int i, int j, MPI_Group testgroup, mdtest_results_t } } MPI_Barrier(testComm); - endCreate = MPI_Wtime(); + endCreate = GetTimeStamp(); summary_table->rate[8] = num_dirs_in_tree / (endCreate - startCreate); summary_table->time[8] = (endCreate - startCreate); summary_table->items[8] = num_dirs_in_tree; summary_table->stonewall_last_item[8] = num_dirs_in_tree; - if (verbose >= 1 && rank == 0) { - fprintf(out_logfile, "V-1: main: Tree creation : %14.3f sec, %14.3f ops/sec\n", - (endCreate - startCreate), summary_table->rate[8]); - fflush(out_logfile); - } + VERBOSE(1,-1,"V-1: main: Tree creation : %14.3f sec, %14.3f ops/sec", (endCreate - startCreate), summary_table->rate[8]); } sprintf(unique_mk_dir, "%s.0", base_tree_name); sprintf(unique_chdir_dir, "%s.0", base_tree_name); @@ -1984,10 +1729,7 @@ static void mdtest_iteration(int i, int j, MPI_Group testgroup, mdtest_results_t unique_rm_uni_dir[0] = 0; if (!unique_dir_per_task) { - if (verbose >= 3 && rank == 0) { - fprintf(out_logfile, "V-3: main: Using unique_mk_dir, \"%s\"\n", unique_mk_dir ); - fflush( out_logfile ); - } + VERBOSE(3,-1,"V-3: main: Using unique_mk_dir, '%s'", unique_mk_dir ); } if (rank < i) { @@ -1998,18 +1740,17 @@ static void mdtest_iteration(int i, int j, MPI_Group testgroup, mdtest_results_t sprintf(rm_name, "mdtest.%d.", (rank+(3*nstride))%i); } if (unique_dir_per_task) { + VERBOSE(3,5,"i %d nstride %d", i, nstride); sprintf(unique_mk_dir, "mdtest_tree.%d.0", (rank+(0*nstride))%i); sprintf(unique_chdir_dir, "mdtest_tree.%d.0", (rank+(1*nstride))%i); sprintf(unique_stat_dir, "mdtest_tree.%d.0", (rank+(2*nstride))%i); sprintf(unique_read_dir, "mdtest_tree.%d.0", (rank+(3*nstride))%i); sprintf(unique_rm_dir, "mdtest_tree.%d.0", (rank+(4*nstride))%i); unique_rm_uni_dir[0] = 0; + VERBOSE(5,5,"mk_dir %s chdir %s stat_dir %s read_dir %s rm_dir %s\n", unique_mk_dir,unique_chdir_dir,unique_stat_dir,unique_read_dir,unique_rm_dir); } - if (verbose >= 3 && rank == 0) { - fprintf(out_logfile, "V-3: main: Copied unique_mk_dir, \"%s\", to topdir\n", unique_mk_dir ); - fflush( out_logfile ); - } + VERBOSE(3,-1,"V-3: main: Copied unique_mk_dir, '%s', to topdir", unique_mk_dir ); if (dirs_only && !shared_file) { if (pre_delay) { @@ -2021,22 +1762,20 @@ static void mdtest_iteration(int i, int j, MPI_Group testgroup, mdtest_results_t if (pre_delay) { DelaySecs(pre_delay); } + VERBOSE(3,5,"will file_test on %s", unique_mk_dir); file_test(j, i, unique_mk_dir, progress); } } /* remove directory structure */ if (!unique_dir_per_task) { - if (verbose >= 3 && rank == 0) { - fprintf(out_logfile, "V-3: main: Using testdir, \"%s\"\n", testdir ); - fflush( out_logfile ); - } + VERBOSE(3,-1,"main: Using testdir, '%s'", testdir ); } MPI_Barrier(testComm); if (remove_only) { progress->items_start = 0; - startCreate = MPI_Wtime(); + startCreate = GetTimeStamp(); for (int dir_iter = 0; dir_iter < directory_loops; dir_iter ++){ prep_testdir(j, dir_iter); if (unique_dir_per_task) { @@ -2048,12 +1787,7 @@ static void mdtest_iteration(int i, int j, MPI_Group testgroup, mdtest_results_t for (k=0; k= 3 && rank == 0) { - fprintf(out_logfile, - "V-3: main (remove hierarchical directory loop-collective): Calling create_remove_directory_tree with \"%s\"\n", - testdir ); - fflush( out_logfile ); - } + VERBOSE(3,-1,"main (remove hierarchical directory loop-collective): Calling create_remove_directory_tree with '%s'", testdir ); /* * Let's pass in the path to the directory we most recently made so that we can use @@ -2066,12 +1800,7 @@ static void mdtest_iteration(int i, int j, MPI_Group testgroup, mdtest_results_t } } } else if (!collective_creates) { - if (verbose >= 3 && rank == 0) { - fprintf(out_logfile, - "V-3: main (remove hierarchical directory loop-!collective): Calling create_remove_directory_tree with \"%s\"\n", - testdir ); - fflush( out_logfile ); - } + VERBOSE(3,-1,"main (remove hierarchical directory loop-!collective): Calling create_remove_directory_tree with '%s'", testdir ); /* * Let's pass in the path to the directory we most recently made so that we can use @@ -2081,12 +1810,7 @@ static void mdtest_iteration(int i, int j, MPI_Group testgroup, mdtest_results_t } } else { if (rank == 0) { - if (verbose >= 3 && rank == 0) { - fprintf(out_logfile, - "V-3: main (remove hierarchical directory loop-!unique_dir_per_task): Calling create_remove_directory_tree with \"%s\"\n", - testdir ); - fflush( out_logfile ); - } + VERBOSE(3,-1,"V-3: main (remove hierarchical directory loop-!unique_dir_per_task): Calling create_remove_directory_tree with '%s'", testdir ); /* * Let's pass in the path to the directory we most recently made so that we can use @@ -2098,28 +1822,20 @@ static void mdtest_iteration(int i, int j, MPI_Group testgroup, mdtest_results_t } MPI_Barrier(testComm); - endCreate = MPI_Wtime(); + endCreate = GetTimeStamp(); summary_table->rate[9] = num_dirs_in_tree / (endCreate - startCreate); summary_table->time[9] = endCreate - startCreate; summary_table->items[9] = num_dirs_in_tree; summary_table->stonewall_last_item[8] = num_dirs_in_tree; - if (verbose >= 1 && rank == 0) { - fprintf(out_logfile, "V-1: main Tree removal : %14.3f sec, %14.3f ops/sec\n", - (endCreate - startCreate), summary_table->rate[9]); - fflush(out_logfile); - } - - if (( rank == 0 ) && ( verbose >=2 )) { - fprintf( out_logfile, "V-2: main (at end of for j loop): Removing testdir of \"%s\"\n", testdir ); - fflush( out_logfile ); - } + VERBOSE(1,-1,"main Tree removal : %14.3f sec, %14.3f ops/sec", (endCreate - startCreate), summary_table->rate[9]); + VERBOSE(2,-1,"main (at end of for j loop): Removing testdir of '%s'\n", testdir ); for (int dir_iter = 0; dir_iter < directory_loops; dir_iter ++){ prep_testdir(j, dir_iter); if ((rank < path_count) && backend->access(testdir, F_OK, ¶m) == 0) { //if (( rank == 0 ) && access(testdir, F_OK) == 0) { if (backend->rmdir(testdir, ¶m) == -1) { - FAIL("unable to remove directory"); + FAIL("unable to remove directory %s", testdir); } } } @@ -2136,6 +1852,8 @@ void mdtest_init_args(){ create_only = 0; stat_only = 0; read_only = 0; + verify_read = 0; + verification_error = 0; remove_only = 0; leaf_only = 0; depth = 0; @@ -2143,6 +1861,7 @@ void mdtest_init_args(){ items_per_dir = 0; random_seed = 0; print_time = 0; + print_rate_and_time = 0; shared_file = 0; files_only = 0; dirs_only = 0; @@ -2156,11 +1875,15 @@ void mdtest_init_args(){ stone_wall_timer_seconds = 0; read_bytes = 0; sync_file = 0; + call_sync = 0; path_count = 0; nstride = 0; + make_node = 0; +#ifdef HAVE_LUSTRE_LUSTREAPI + global_dir_layout = 0; +#endif /* HAVE_LUSTRE_LUSTREAPI */ } - mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * world_out) { testComm = world_com; out_logfile = world_out; @@ -2170,7 +1893,8 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * mdtest_init_args(); int i, j; - int nodeCount; + int numNodes; + int numTasksOnNode0 = 0; MPI_Group worldgroup, testgroup; struct { int first; @@ -2187,12 +1911,13 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * char * path = "./out"; int randomize = 0; char APIs[1024]; - aiori_supported_apis(APIs); + char APIs_legacy[1024]; + aiori_supported_apis(APIs, APIs_legacy, MDTEST); char apiStr[1024]; sprintf(apiStr, "API for I/O [%s]", APIs); option_help options [] = { - {'a', NULL, apiStr, OPTION_OPTIONAL_ARGUMENT, 's', & backend_name}, + {'a', NULL, apiStr, OPTION_OPTIONAL_ARGUMENT, 's', & param.api}, {'b', NULL, "branching factor of hierarchical directory structure", OPTION_OPTIONAL_ARGUMENT, 'd', & branch_factor}, {'d', NULL, "the directory in which the tests will run", OPTION_OPTIONAL_ARGUMENT, 's', & path}, {'B', NULL, "no barriers between phases", OPTION_OPTIONAL_ARGUMENT, 'd', & no_barriers}, @@ -2204,13 +1929,18 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * {'e', NULL, "bytes to read from each file", OPTION_OPTIONAL_ARGUMENT, 'l', & read_bytes}, {'f', NULL, "first number of tasks on which the test will run", OPTION_OPTIONAL_ARGUMENT, 'd', & first}, {'F', NULL, "perform test on files only (no directories)", OPTION_FLAG, 'd', & files_only}, +#ifdef HAVE_LUSTRE_LUSTREAPI + {'g', NULL, "global default directory layout for test subdirectories (deletes inherited striping layout)", OPTION_FLAG, 'd', & global_dir_layout}, +#endif /* HAVE_LUSTRE_LUSTREAPI */ {'i', NULL, "number of iterations the test will run", OPTION_OPTIONAL_ARGUMENT, 'd', & iterations}, {'I', NULL, "number of items per directory in tree", OPTION_OPTIONAL_ARGUMENT, 'l', & items_per_dir}, + {'k', NULL, "use mknod to create file", OPTION_FLAG, 'd', & make_node}, {'l', NULL, "last number of tasks on which the test will run", OPTION_OPTIONAL_ARGUMENT, 'd', & last}, {'L', NULL, "files only at leaf level of tree", OPTION_FLAG, 'd', & leaf_only}, {'n', NULL, "every process will creat/stat/read/remove # directories and files", OPTION_OPTIONAL_ARGUMENT, 'l', & items}, - {'N', NULL, "stride # between neighbor tasks for file/dir operation (local=0)", OPTION_OPTIONAL_ARGUMENT, 'd', & nstride}, + {'N', NULL, "stride # between tasks for file/dir operation (local=0; set to 1 to avoid client cache)", OPTION_OPTIONAL_ARGUMENT, 'd', & nstride}, {'p', NULL, "pre-iteration delay (in seconds)", OPTION_OPTIONAL_ARGUMENT, 'd', & pre_delay}, + {'P', NULL, "print rate AND time", OPTION_FLAG, 'd', & print_rate_and_time}, {'R', NULL, "random access to files (only for stat)", OPTION_FLAG, 'd', & randomize}, {0, "random-seed", "random seed for -R", OPTION_OPTIONAL_ARGUMENT, 'd', & random_seed}, {'s', NULL, "stride between the number of tasks for each test", OPTION_OPTIONAL_ARGUMENT, 'd', & stride}, @@ -2223,41 +1953,42 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * {'w', NULL, "bytes to write to each file after it is created", OPTION_OPTIONAL_ARGUMENT, 'l', & write_bytes}, {'W', NULL, "number in seconds; stonewall timer, write as many seconds and ensure all processes did the same number of operations (currently only stops during create phase)", OPTION_OPTIONAL_ARGUMENT, 'd', & stone_wall_timer_seconds}, {'x', NULL, "StoneWallingStatusFile; contains the number of iterations of the creation phase, can be used to split phases across runs", OPTION_OPTIONAL_ARGUMENT, 's', & stoneWallingStatusFile}, + {'X', "verify-read", "Verify the data read", OPTION_FLAG, 'd', & verify_read}, {'y', NULL, "sync file after writing", OPTION_FLAG, 'd', & sync_file}, + {'Y', NULL, "call the sync command after each phase (included in the timing; note it causes all IO to be flushed from your node)", OPTION_FLAG, 'd', & call_sync}, {'z', NULL, "depth of hierarchical directory structure", OPTION_OPTIONAL_ARGUMENT, 'd', & depth}, {'Z', NULL, "print time instead of rate", OPTION_FLAG, 'd', & print_time}, LAST_OPTION }; - airoi_parse_options(argc, argv, options); + options_all_t * global_options = airoi_create_all_module_options(options); + option_parse(argc, argv, global_options); + updateParsedOptions(& param, global_options); - backend = aiori_select(backend_name); - if (NULL == backend) { - FAIL("Could not find suitable backend to use"); - } + free(global_options->modules); + free(global_options); + backend = param.backend; MPI_Comm_rank(testComm, &rank); MPI_Comm_size(testComm, &size); + if (backend->initialize) + backend->initialize(); + pid = getpid(); uid = getuid(); - nodeCount = size / CountTasksPerNode(testComm); + numNodes = GetNumNodes(testComm); + numTasksOnNode0 = GetNumTasksOnNode0(testComm); - if (rank == 0) { - fprintf(out_logfile, "-- started at %s --\n\n", PrintTimestamp()); - fprintf(out_logfile, "mdtest-%s was launched with %d total task(s) on %d node(s)\n", - RELEASE_VERS, size, nodeCount); - fflush(out_logfile); + char cmd_buffer[4096]; + strncpy(cmd_buffer, argv[0], 4096); + for (i = 1; i < argc; i++) { + snprintf(&cmd_buffer[strlen(cmd_buffer)], 4096-strlen(cmd_buffer), " '%s'", argv[i]); } - if (rank == 0) { - fprintf(out_logfile, "Command line used: %s", argv[0]); - for (i = 1; i < argc; i++) { - fprintf(out_logfile, " \"%s\"", argv[i]); - } - fprintf(out_logfile, "\n"); - fflush(out_logfile); - } + VERBOSE(0,-1,"-- started at %s --\n", PrintTimestamp()); + VERBOSE(0,-1,"mdtest-%s was launched with %d total task(s) on %d node(s)", RELEASE_VERS, size, numNodes); + VERBOSE(0,-1,"Command line used: %s", cmd_buffer); /* adjust special variables */ barriers = ! no_barriers; @@ -2280,41 +2011,43 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * } valid_tests(); - if (( rank == 0 ) && ( verbose >= 1 )) { - // option_print_current(options); - fprintf (out_logfile, "api : %s\n", backend_name); - fprintf( out_logfile, "barriers : %s\n", ( barriers ? "True" : "False" )); - fprintf( out_logfile, "collective_creates : %s\n", ( collective_creates ? "True" : "False" )); - fprintf( out_logfile, "create_only : %s\n", ( create_only ? "True" : "False" )); - fprintf( out_logfile, "dirpath(s):\n" ); - for ( i = 0; i < path_count; i++ ) { - fprintf( out_logfile, "\t%s\n", filenames[i] ); - } - fprintf( out_logfile, "dirs_only : %s\n", ( dirs_only ? "True" : "False" )); - fprintf( out_logfile, "read_bytes : "LLU"\n", read_bytes ); - fprintf( out_logfile, "read_only : %s\n", ( read_only ? "True" : "False" )); - fprintf( out_logfile, "first : %d\n", first ); - fprintf( out_logfile, "files_only : %s\n", ( files_only ? "True" : "False" )); - fprintf( out_logfile, "iterations : %d\n", iterations ); - fprintf( out_logfile, "items_per_dir : "LLU"\n", items_per_dir ); - fprintf( out_logfile, "last : %d\n", last ); - fprintf( out_logfile, "leaf_only : %s\n", ( leaf_only ? "True" : "False" )); - fprintf( out_logfile, "items : "LLU"\n", items ); - fprintf( out_logfile, "nstride : %d\n", nstride ); - fprintf( out_logfile, "pre_delay : %d\n", pre_delay ); - fprintf( out_logfile, "remove_only : %s\n", ( leaf_only ? "True" : "False" )); - fprintf( out_logfile, "random_seed : %d\n", random_seed ); - fprintf( out_logfile, "stride : %d\n", stride ); - fprintf( out_logfile, "shared_file : %s\n", ( shared_file ? "True" : "False" )); - fprintf( out_logfile, "time_unique_dir_overhead: %s\n", ( time_unique_dir_overhead ? "True" : "False" )); - fprintf( out_logfile, "stone_wall_timer_seconds: %d\n", stone_wall_timer_seconds); - fprintf( out_logfile, "stat_only : %s\n", ( stat_only ? "True" : "False" )); - fprintf( out_logfile, "unique_dir_per_task : %s\n", ( unique_dir_per_task ? "True" : "False" )); - fprintf( out_logfile, "write_bytes : "LLU"\n", write_bytes ); - fprintf( out_logfile, "sync_file : %s\n", ( sync_file ? "True" : "False" )); - fprintf( out_logfile, "depth : %d\n", depth ); - fflush( out_logfile ); - } + // option_print_current(options); + VERBOSE(1,-1, "api : %s", param.api); + VERBOSE(1,-1, "barriers : %s", ( barriers ? "True" : "False" )); + VERBOSE(1,-1, "collective_creates : %s", ( collective_creates ? "True" : "False" )); + VERBOSE(1,-1, "create_only : %s", ( create_only ? "True" : "False" )); + VERBOSE(1,-1, "dirpath(s):" ); + for ( i = 0; i < path_count; i++ ) { + VERBOSE(1,-1, "\t%s", filenames[i] ); + } + VERBOSE(1,-1, "dirs_only : %s", ( dirs_only ? "True" : "False" )); + VERBOSE(1,-1, "read_bytes : "LLU"", read_bytes ); + VERBOSE(1,-1, "read_only : %s", ( read_only ? "True" : "False" )); + VERBOSE(1,-1, "first : %d", first ); + VERBOSE(1,-1, "files_only : %s", ( files_only ? "True" : "False" )); +#ifdef HAVE_LUSTRE_LUSTREAPI + VERBOSE(1,-1, "global_dir_layout : %s", ( global_dir_layout ? "True" : "False" )); +#endif /* HAVE_LUSTRE_LUSTREAPI */ + VERBOSE(1,-1, "iterations : %d", iterations ); + VERBOSE(1,-1, "items_per_dir : "LLU"", items_per_dir ); + VERBOSE(1,-1, "last : %d", last ); + VERBOSE(1,-1, "leaf_only : %s", ( leaf_only ? "True" : "False" )); + VERBOSE(1,-1, "items : "LLU"", items ); + VERBOSE(1,-1, "nstride : %d", nstride ); + VERBOSE(1,-1, "pre_delay : %d", pre_delay ); + VERBOSE(1,-1, "remove_only : %s", ( leaf_only ? "True" : "False" )); + VERBOSE(1,-1, "random_seed : %d", random_seed ); + VERBOSE(1,-1, "stride : %d", stride ); + VERBOSE(1,-1, "shared_file : %s", ( shared_file ? "True" : "False" )); + VERBOSE(1,-1, "time_unique_dir_overhead: %s", ( time_unique_dir_overhead ? "True" : "False" )); + VERBOSE(1,-1, "stone_wall_timer_seconds: %d", stone_wall_timer_seconds); + VERBOSE(1,-1, "stat_only : %s", ( stat_only ? "True" : "False" )); + VERBOSE(1,-1, "unique_dir_per_task : %s", ( unique_dir_per_task ? "True" : "False" )); + VERBOSE(1,-1, "write_bytes : "LLU"", write_bytes ); + VERBOSE(1,-1, "sync_file : %s", ( sync_file ? "True" : "False" )); + VERBOSE(1,-1, "call_sync : %s", ( call_sync ? "True" : "False" )); + VERBOSE(1,-1, "depth : %d", depth ); + VERBOSE(1,-1, "make_node : %d", make_node ); /* setup total number of items and number of items per dir */ if (depth <= 0) { @@ -2330,7 +2063,11 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * } if (items_per_dir > 0) { if(items == 0){ - items = items_per_dir * num_dirs_in_tree; + if (leaf_only) { + items = items_per_dir * (uint64_t) pow(branch_factor, depth); + } else { + items = items_per_dir * num_dirs_in_tree; + } }else{ num_dirs_in_tree_calc = num_dirs_in_tree; } @@ -2388,18 +2125,18 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * /* allocate and initialize write buffer with # */ if (write_bytes > 0) { - write_buffer = (char *)malloc(write_bytes); - if (write_buffer == NULL) { + int alloc_res = posix_memalign((void**)&write_buffer, sysconf(_SC_PAGESIZE), write_bytes); + if (alloc_res) { FAIL("out of memory"); } - memset(write_buffer, 0x23, write_bytes); + generate_memory_pattern(write_buffer, write_bytes); } /* setup directory path to work in */ if (path_count == 0) { /* special case where no directory path provided with '-d' option */ char *ret = getcwd(testdirpath, MAX_PATHLEN); if (ret == NULL) { - FAIL("Unable to get current working directory"); + FAIL("Unable to get current working directory on %s", testdirpath); } path_count = 1; } else { @@ -2409,26 +2146,31 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * /* if directory does not exist, create it */ if ((rank < path_count) && backend->access(testdirpath, F_OK, ¶m) != 0) { if (backend->mkdir(testdirpath, DIRMODE, ¶m) != 0) { - FAIL("Unable to create test directory path"); + FAIL("Unable to create test directory path %s", testdirpath); } } /* display disk usage */ - if (verbose >= 3 && rank == 0) { - fprintf(out_logfile, "V-3: main (before display_freespace): testdirpath is \"%s\"\n", testdirpath ); - fflush( out_logfile ); - } + VERBOSE(3,-1,"main (before display_freespace): testdirpath is '%s'", testdirpath ); if (rank == 0) display_freespace(testdirpath); + int tasksBlockMapping = QueryNodeMapping(testComm, true); - if (verbose >= 3 && rank == 0) { - fprintf(out_logfile, "V-3: main (after display_freespace): testdirpath is \"%s\"\n", testdirpath ); - fflush( out_logfile ); + /* set the shift to mimic IOR and shift by procs per node */ + if (nstride > 0) { + if ( numNodes > 1 && tasksBlockMapping ) { + /* the user set the stride presumably to get the consumer tasks on a different node than the producer tasks + however, if the mpirun scheduler placed the tasks by-slot (in a contiguous block) then we need to adjust the shift by ppn */ + nstride *= numTasksOnNode0; + } + VERBOSE(0,5,"Shifting ranks by %d for each phase.", nstride); } + VERBOSE(3,-1,"main (after display_freespace): testdirpath is '%s'", testdirpath ); + if (rank == 0) { if (random_seed > 0) { - fprintf(out_logfile, "random seed: %d\n", random_seed); + VERBOSE(0,-1,"random seed: %d", random_seed); } } @@ -2444,7 +2186,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * /* setup summary table for recording results */ summary_table = (mdtest_results_t *) malloc(iterations * sizeof(mdtest_results_t)); - memset(summary_table, 0, sizeof(mdtest_results_t)); + memset(summary_table, 0, iterations * sizeof(mdtest_results_t)); for(int i=0; i < iterations; i++){ for(int j=0; j < MDTEST_LAST_NUM; j++){ summary_table[i].rate[j] = 0.0; @@ -2481,41 +2223,48 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * items_all *= num_dirs_in_tree_calc; } if (files_only && dirs_only) { - fprintf(out_logfile, "\n%d tasks, "LLU" files/directories\n", i, items_all); + VERBOSE(0,-1,"%d tasks, "LLU" files/directories", i, items_all); } else if (files_only) { if (!shared_file) { - fprintf(out_logfile, "\n%d tasks, "LLU" files\n", i, items_all); + VERBOSE(0,-1,"%d tasks, "LLU" files", i, items_all); } else { - fprintf(out_logfile, "\n%d tasks, 1 file\n", i); + VERBOSE(0,-1,"%d tasks, 1 file", i); } } else if (dirs_only) { - fprintf(out_logfile, "\n%d tasks, "LLU" directories\n", i, items_all); + VERBOSE(0,-1,"%d tasks, "LLU" directories", i, items_all); } } - if (rank == 0 && verbose >= 1) { - fprintf(out_logfile, "\n"); - fprintf(out_logfile, " Operation Duration Rate\n"); - fprintf(out_logfile, " --------- -------- ----\n"); - } + VERBOSE(1,-1,""); + VERBOSE(1,-1," Operation Duration Rate"); + VERBOSE(1,-1," --------- -------- ----"); for (j = 0; j < iterations; j++) { // keep track of the current status for stonewalling mdtest_iteration(i, j, testgroup, & summary_table[j]); } - summarize_results(iterations); + if (print_rate_and_time){ + summarize_results(iterations, 0); + summarize_results(iterations, 1); + }else{ + summarize_results(iterations, print_time); + } if (i == 1 && stride > 1) { i = 0; } } - if (rank == 0) { - fprintf(out_logfile, "\n-- finished at %s --\n", PrintTimestamp()); - fflush(out_logfile); + if(verification_error){ + VERBOSE(0, -1, "\nERROR: verifying the data read! Take the performance values with care!\n"); } + VERBOSE(0,-1,"-- finished at %s --\n", PrintTimestamp()); if (random_seed > 0) { free(rand_array); } + + if (backend->finalize) + backend->finalize(); + return summary_table; } diff --git a/src/option.c b/src/option.c index 4e880555..2c3e8ef3 100644 --- a/src/option.c +++ b/src/option.c @@ -89,6 +89,10 @@ static int print_value(option_help * o){ pos += printf("=%lld", *(long long*) o->variable); break; } + case('u'):{ + pos += printf("=%lu", *(uint64_t*) o->variable); + break; + } } } if (o->arg == OPTION_FLAG && (*(int*)o->variable) != 0){ @@ -180,6 +184,10 @@ static int print_option_value(option_help * o){ pos += printf("=%lld", *(long long*) o->variable); break; } + case('u'):{ + pos += printf("=%lu", *(uint64_t*) o->variable); + break; + } } }else{ //printf(" "); @@ -220,146 +228,187 @@ void option_print_current(option_help * args){ print_current_option_section(args, OPTION_FLAG); } -int option_parse(int argc, char ** argv, options_all * opt_all){ - int error = 0; - int requiredArgsSeen = 0; - int requiredArgsNeeded = 0; - int i; - int printhelp = 0; +static void option_parse_token(char ** argv, int * flag_parsed_next, int * requiredArgsSeen, options_all_t * opt_all, int * error, int * print_help){ + char * txt = argv[0]; + char * arg = strstr(txt, "="); + + int replaced_equal = 0; + int i = 0; + if(arg != NULL){ + arg[0] = 0; + arg++; + replaced_equal = 1; + } + *flag_parsed_next = 0; for(int m = 0; m < opt_all->module_count; m++ ){ option_help * args = opt_all->modules[m].options; if(args == NULL) continue; - for(option_help * o = args; o->shortVar != 0 || o->longVar != 0 ; o++ ){ - if(o->arg == OPTION_REQUIRED_ARGUMENT){ - requiredArgsNeeded++; + // try to find matching option help + for(option_help * o = args; o->shortVar != 0 || o->longVar != 0 || o->help != NULL ; o++ ){ + if( o->shortVar == 0 && o->longVar == 0 ){ + // section + continue; } - } - } - - for(i=1; i < argc; i++){ - char * txt = argv[i]; - int foundOption = 0; - char * arg = strstr(txt, "="); - int replaced_equal = 0; - if(arg != NULL){ - arg[0] = 0; - arg++; - replaced_equal = 1; - } - - for(int m = 0; m < opt_all->module_count; m++ ){ - option_help * args = opt_all->modules[m].options; - if(args == NULL) continue; - // try to find matching option help - for(option_help * o = args; o->shortVar != 0 || o->longVar != 0 || o->help != NULL ; o++ ){ - if( o->shortVar == 0 && o->longVar == 0 ){ - // section - continue; - } - if ( (txt[0] == '-' && o->shortVar == txt[1]) || (strlen(txt) > 2 && txt[0] == '-' && txt[1] == '-' && o->longVar != NULL && strcmp(txt + 2, o->longVar) == 0)){ - foundOption = 1; - - // now process the option. - switch(o->arg){ - case (OPTION_FLAG):{ - assert(o->type == 'd'); + if ( (txt[0] == '-' && o->shortVar == txt[1]) || (strlen(txt) > 2 && txt[0] == '-' && txt[1] == '-' && o->longVar != NULL && strcmp(txt + 2, o->longVar) == 0)){ + // now process the option. + switch(o->arg){ + case (OPTION_FLAG):{ + assert(o->type == 'd'); + if(arg != NULL){ + int val = atoi(arg); + (*(int*) o->variable) = (val < 0) ? 0 : val; + }else{ (*(int*) o->variable)++; - break; } - case (OPTION_OPTIONAL_ARGUMENT): - case (OPTION_REQUIRED_ARGUMENT):{ - // check if next is an argument - if(arg == NULL){ - if(o->shortVar == txt[1] && txt[2] != 0){ - arg = & txt[2]; - }else{ - // simply take the next value as argument - i++; - arg = argv[i]; - } + break; + } + case (OPTION_OPTIONAL_ARGUMENT): + case (OPTION_REQUIRED_ARGUMENT):{ + // check if next is an argument + if(arg == NULL){ + if(o->shortVar == txt[1] && txt[2] != 0){ + arg = & txt[2]; + }else{ + // simply take the next value as argument + i++; + arg = argv[1]; + *flag_parsed_next = 1; } + } - if(arg == NULL){ - const char str[] = {o->shortVar, 0}; - printf("Error, argument missing for option %s\n", (o->longVar != NULL) ? o->longVar : str); - exit(1); - } + if(arg == NULL){ + const char str[] = {o->shortVar, 0}; + printf("Error, argument missing for option %s\n", (o->longVar != NULL) ? o->longVar : str); + exit(1); + } - switch(o->type){ - case('p'):{ - // call the function in the variable - void(*fp)() = o->variable; - fp(arg); - break; - } - case('F'):{ - *(double*) o->variable = atof(arg); - break; - } - case('f'):{ - *(float*) o->variable = atof(arg); - break; - } - case('d'):{ - int64_t val = string_to_bytes(arg); - if (val > INT_MAX || val < INT_MIN){ - printf("WARNING: parsing the number %s to integer, this produced an overflow!\n", arg); - } - *(int*) o->variable = val; - break; - } - case('H'): - case('s'):{ - (*(char **) o->variable) = strdup(arg); - break; - } - case('c'):{ - (*(char *)o->variable) = arg[0]; - if(strlen(arg) > 1){ - printf("Error, ignoring remainder of string for option %c (%s).\n", o->shortVar, o->longVar); - } - break; + switch(o->type){ + case('p'):{ + // call the function in the variable + void(*fp)() = o->variable; + fp(arg); + break; + } + case('F'):{ + *(double*) o->variable = atof(arg); + break; + } + case('f'):{ + *(float*) o->variable = atof(arg); + break; + } + case('d'):{ + int64_t val = string_to_bytes(arg); + if (val > INT_MAX || val < INT_MIN){ + printf("WARNING: parsing the number %s to integer, this produced an overflow!\n", arg); } - case('l'):{ - *(long long*) o->variable = string_to_bytes(arg); - break; + *(int*) o->variable = val; + break; + } + case('H'): + case('s'):{ + (*(char **) o->variable) = strdup(arg); + break; + } + case('c'):{ + (*(char *)o->variable) = arg[0]; + if(strlen(arg) > 1){ + printf("Error, ignoring remainder of string for option %c (%s).\n", o->shortVar, o->longVar); } - default: - printf("ERROR: Unknown option type %c\n", o->type); + break; + } + case('l'):{ + *(long long*) o->variable = string_to_bytes(arg); + break; + } + case('u'):{ + *(uint64_t*) o->variable = string_to_bytes(arg); + break; } + default: + printf("ERROR: Unknown option type %c\n", o->type); } } - if(replaced_equal){ - arg[-1] = '='; - } - - if(o->arg == OPTION_REQUIRED_ARGUMENT){ - requiredArgsSeen++; - } + } + if(replaced_equal){ + arg[-1] = '='; + } - break; + if(o->arg == OPTION_REQUIRED_ARGUMENT){ + (*requiredArgsSeen)++; } + + return; } } - if (! foundOption){ - if(strcmp(txt, "-h") == 0 || strcmp(txt, "--help") == 0){ - printhelp = 1; - }else{ - printf("Error invalid argument: %s\n", txt); - error = 1; - } + } + + if(strcmp(txt, "-h") == 0 || strcmp(txt, "--help") == 0){ + *print_help = 1; + }else{ + *error = 1; + } +} + +int option_parse_str(char*val, options_all_t * opt_all){ + int flag_parsed_next; + int error = 0; + int requiredArgsSeen = 0; + int print_help = 0; + char * argv[2] = {val, NULL}; + option_parse_token(argv, & flag_parsed_next, & requiredArgsSeen, opt_all, & error, & print_help); + return error; +} + +int option_parse_key_value(char * key, char *val, options_all_t * opt_all){ + int flag_parsed_next; + int error = 0; + int requiredArgsSeen = 0; + int print_help = 0; + char value[1024]; + sprintf(value, "%s=%s", key, val); + char * argv[2] = {value, NULL}; + option_parse_token(argv, & flag_parsed_next, & requiredArgsSeen, opt_all, & error, & print_help); + return error; +} + +int option_parse(int argc, char ** argv, options_all_t * opt_all){ + int error = 0; + int requiredArgsSeen = 0; + int requiredArgsNeeded = 0; + int i; + int printhelp = 0; + + for(int m = 0; m < opt_all->module_count; m++ ){ + option_help * args = opt_all->modules[m].options; + if(args == NULL) continue; + for(option_help * o = args; o->shortVar != 0 || o->longVar != 0 ; o++ ){ + if(o->arg == OPTION_REQUIRED_ARGUMENT){ + requiredArgsNeeded++; + } + } + } + + for(i=1; i < argc; i++){ + int flag_parsed_next; + option_parse_token(& argv[i], & flag_parsed_next, & requiredArgsSeen, opt_all, & error, & printhelp); + if (flag_parsed_next){ + i++; + } + if(error){ + printf("Error invalid argument: %s\n", argv[i]); } } if( requiredArgsSeen != requiredArgsNeeded ){ printf("Error: Missing some required arguments\n\n"); - printhelp = -1; + printhelp = 1; } if(error != 0){ printf("Invalid options\n"); - printhelp = -1; + printhelp = 1; } if(printhelp == 1){ diff --git a/src/option.h b/src/option.h index 53a3732b..624da510 100644 --- a/src/option.h +++ b/src/option.h @@ -26,19 +26,23 @@ typedef struct{ typedef struct{ char * prefix; // may be NULL to include it in the standard name option_help * options; + void * defaults; // these default values are taken from the command line } option_module; typedef struct{ int module_count; option_module * modules; -} options_all; +} options_all_t; #define LAST_OPTION {0, 0, 0, (option_value_type) 0, 0, NULL} int64_t string_to_bytes(char *size_str); void option_print_current(option_help * args); - //@return the number of parsed arguments -int option_parse(int argc, char ** argv, options_all * args); +int option_parse(int argc, char ** argv, options_all_t * args); +int option_parse_str(char*val, options_all_t * opt_all); + +/* Parse a single line */ +int option_parse_key_value(char * key, char * value, options_all_t * opt_all); #endif diff --git a/src/parse_options.c b/src/parse_options.c index 3964d25c..607d0147 100755 --- a/src/parse_options.c +++ b/src/parse_options.c @@ -21,6 +21,9 @@ #include #include +#if defined(HAVE_STRINGS_H) +#include +#endif #include "utilities.h" #include "ior.h" @@ -33,33 +36,11 @@ IOR_param_t initialTestParams; +option_help * createGlobalOptions(IOR_param_t * params); -static size_t NodeMemoryStringToBytes(char *size_str) -{ - int percent; - int rc; - long page_size; - long num_pages; - long long mem; - - rc = sscanf(size_str, " %d %% ", &percent); - if (rc == 0) - return (size_t) string_to_bytes(size_str); - if (percent > 100 || percent < 0) - ERR("percentage must be between 0 and 100"); - - page_size = sysconf(_SC_PAGESIZE); -#ifdef _SC_PHYS_PAGES - num_pages = sysconf(_SC_PHYS_PAGES); - if (num_pages == -1) - ERR("sysconf(_SC_PHYS_PAGES) is not supported"); -#else - ERR("sysconf(_SC_PHYS_PAGES) is not supported"); -#endif - mem = page_size * num_pages; - return mem / 100 * percent; -} +static IOR_param_t * parameters; +static options_all_t * global_options; /* @@ -88,9 +69,8 @@ static void CheckRunSettings(IOR_test_t *tests) * (We assume int-valued params are exclusively 0 or 1.) */ if ((params->openFlags & IOR_RDWR) - && ((params->readFile | params->checkRead) - ^ (params->writeFile | params->checkWrite)) - && (params->openFlags & IOR_RDWR)) { + && ((params->readFile | params->checkRead | params->checkWrite) + ^ params->writeFile)) { params->openFlags &= ~(IOR_RDWR); if (params->readFile | params->checkRead) { @@ -100,14 +80,13 @@ static void CheckRunSettings(IOR_test_t *tests) else params->openFlags |= IOR_WRONLY; } - } } /* * Set flags from commandline string/value pairs. */ -void DecodeDirective(char *line, IOR_param_t *params) +void DecodeDirective(char *line, IOR_param_t *params, options_all_t * module_options) { char option[MAX_STR]; char value[MAX_STR]; @@ -126,6 +105,12 @@ void DecodeDirective(char *line, IOR_param_t *params) } if (strcasecmp(option, "api") == 0) { params->api = strdup(value); + + params->backend = aiori_select(params->api); + if (params->backend == NULL){ + fprintf(out_logfile, "Could not load backend API %s\n", params->api); + exit(-1); + } } else if (strcasecmp(option, "summaryFile") == 0) { if (rank == 0){ out_resultfile = fopen(value, "w"); @@ -166,12 +151,18 @@ void DecodeDirective(char *line, IOR_param_t *params) params->maxTimeDuration = atoi(value); } else if (strcasecmp(option, "outlierthreshold") == 0) { params->outlierThreshold = atoi(value); - } else if (strcasecmp(option, "nodes") == 0) { - params->nodes = atoi(value); + } else if (strcasecmp(option, "numnodes") == 0) { + params->numNodes = atoi(value); + } else if (strcasecmp(option, "numtasks") == 0) { + params->numTasks = atoi(value); + } else if (strcasecmp(option, "numtasksonnode0") == 0) { + params->numTasksOnNode0 = atoi(value); } else if (strcasecmp(option, "repetitions") == 0) { params->repetitions = atoi(value); } else if (strcasecmp(option, "intertestdelay") == 0) { params->interTestDelay = atoi(value); + } else if (strcasecmp(option, "interiodelay") == 0) { + params->interIODelay = atoi(value); } else if (strcasecmp(option, "readfile") == 0) { params->readFile = atoi(value); } else if (strcasecmp(option, "writefile") == 0) { @@ -232,8 +223,6 @@ void DecodeDirective(char *line, IOR_param_t *params) params->useFileView = atoi(value); } else if (strcasecmp(option, "usesharedfilepointer") == 0) { params->useSharedFilePointer = atoi(value); - } else if (strcasecmp(option, "useo_direct") == 0) { - params->useO_DIRECT = atoi(value); } else if (strcasecmp(option, "usestrideddatatype") == 0) { params->useStridedDatatype = atoi(value); } else if (strcasecmp(option, "showhints") == 0) { @@ -301,37 +290,60 @@ void DecodeDirective(char *line, IOR_param_t *params) params->beegfs_chunkSize = string_to_bytes(value); if (!ISPOWEROFTWO(params->beegfs_chunkSize) || params->beegfs_chunkSize < (1<<16)) ERR("beegfsChunkSize must be a power of two and >64k"); - } else if (strcasecmp(option, "numtasks") == 0) { - params->numTasks = atoi(value); } else if (strcasecmp(option, "summaryalways") == 0) { params->summary_every_test = atoi(value); } else { - if (rank == 0) - fprintf(out_logfile, "Unrecognized parameter \"%s\"\n", - option); - MPI_CHECK(MPI_Initialized(&initialized), "MPI_Initialized() error"); - if (initialized) - MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); - else - exit(-1); + // backward compatibility for now + if (strcasecmp(option, "useo_direct") == 0) { + strcpy(option, "--posix.odirect"); + } + int parsing_error = option_parse_key_value(option, value, module_options); + if(parsing_error){ + if (rank == 0) + fprintf(out_logfile, "Unrecognized parameter \"%s\"\n", + option); + MPI_CHECK(MPI_Initialized(&initialized), "MPI_Initialized() error"); + if (initialized) + MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); + else + exit(-1); + } } } + /* * Parse a single line, which may contain multiple comma-seperated directives */ -void ParseLine(char *line, IOR_param_t * test) +void ParseLine(char *line, IOR_param_t * test, options_all_t * module_options) { char *start, *end; - start = line; + char * newline = strdup(line); + start = newline; do { + end = strchr(start, '#'); + if (end != NULL){ + *end = '\0'; + end = NULL; // stop parsing after comment + } end = strchr(start, ','); - if (end != NULL) - *end = '\0'; - DecodeDirective(start, test); + if (end != NULL){ + *end = '\0'; + } + if(strlen(start) < 3){ + fprintf(out_logfile, "Invalid option substring string: \"%s\" in \"%s\"\n", start, line); + exit(1); + } + DecodeDirective(start, test, module_options); start = end + 1; } while (end != NULL); + free(newline); +} + + +static void decodeDirectiveWrapper(char *line){ + ParseLine(line, parameters, global_options); } /* @@ -370,13 +382,17 @@ IOR_test_t *ReadConfigScript(char *scriptName) int runflag = 0; char linebuf[MAX_STR]; char empty[MAX_STR]; + char *ptr; FILE *file; IOR_test_t *head = NULL; IOR_test_t *tail = NULL; + option_help ** option_p = & global_options->modules[0].options; + /* Initialize the first test */ - head = CreateTest(&initialTestParams, test_num++); + head = CreateTest(& initialTestParams, test_num++); tail = head; + *option_p = createGlobalOptions(& ((IOR_test_t*) head)->params); /* The current options */ /* open the script */ file = fopen(scriptName, "r"); @@ -392,21 +408,31 @@ IOR_test_t *ReadConfigScript(char *scriptName) /* Iterate over a block of IOR commands */ while (fgets(linebuf, MAX_STR, file) != NULL) { + /* skip over leading whitespace */ + ptr = linebuf; + while (isspace(*ptr)) + ptr++; + /* skip empty lines */ - if (sscanf(linebuf, "%s", empty) == -1) + if (sscanf(ptr, "%s", empty) == -1) continue; + /* skip lines containing only comments */ - if (sscanf(linebuf, " #%s", empty) == 1) + if (sscanf(ptr, " #%s", empty) == 1) continue; - if (contains_only(linebuf, "ior stop")) { + + if (contains_only(ptr, "ior stop")) { break; - } else if (contains_only(linebuf, "run")) { + } else if (contains_only(ptr, "run")) { if (runflag) { /* previous line was a "run" as well create duplicate test */ tail->next = CreateTest(&tail->params, test_num++); AllocResults(tail); + ((IOR_test_t*) tail)->params.backend_options = airoi_update_module_options(((IOR_test_t*) tail)->params.backend, global_options); + tail = tail->next; + *option_p = createGlobalOptions(& ((IOR_test_t*) tail->next)->params); } runflag = 1; } else if (runflag) { @@ -414,154 +440,126 @@ IOR_test_t *ReadConfigScript(char *scriptName) create and initialize a new test structure */ runflag = 0; tail->next = CreateTest(&tail->params, test_num++); + *option_p = createGlobalOptions(& ((IOR_test_t*) tail->next)->params); AllocResults(tail); + ((IOR_test_t*) tail)->params.backend_options = airoi_update_module_options(((IOR_test_t*) tail)->params.backend, global_options); + tail = tail->next; - ParseLine(linebuf, &tail->params); + ParseLine(ptr, &tail->params, global_options); } else { - ParseLine(linebuf, &tail->params); + ParseLine(ptr, &tail->params, global_options); } } /* close the script */ if (fclose(file) != 0) ERR("fclose() of script file failed"); - AllocResults(tail); + AllocResults(tail); /* copy the actual module options into the test */ + ((IOR_test_t*) tail)->params.backend_options = airoi_update_module_options(((IOR_test_t*) tail)->params.backend, global_options); return head; } -static IOR_param_t * parameters; - -static void decodeDirectiveWrapper(char *line){ - DecodeDirective(line, parameters); +option_help * createGlobalOptions(IOR_param_t * params){ + char APIs[1024]; + char APIs_legacy[1024]; + aiori_supported_apis(APIs, APIs_legacy, IOR); + char apiStr[1024]; + sprintf(apiStr, "API for I/O [%s]", APIs); + + option_help o [] = { + {'a', NULL, apiStr, OPTION_OPTIONAL_ARGUMENT, 's', & params->api}, + {'A', NULL, "refNum -- user supplied reference number to include in the summary", OPTION_OPTIONAL_ARGUMENT, 'd', & params->referenceNumber}, + {'b', NULL, "blockSize -- contiguous bytes to write per task (e.g.: 8, 4k, 2m, 1g)", OPTION_OPTIONAL_ARGUMENT, 'l', & params->blockSize}, + {'c', NULL, "collective -- collective I/O", OPTION_FLAG, 'd', & params->collective}, + {'C', NULL, "reorderTasks -- changes task ordering for readback (useful to avoid client cache)", OPTION_FLAG, 'd', & params->reorderTasks}, + {'d', NULL, "interTestDelay -- delay between reps in seconds", OPTION_OPTIONAL_ARGUMENT, 'd', & params->interTestDelay}, + {'D', NULL, "deadlineForStonewalling -- seconds before stopping write or read phase", OPTION_OPTIONAL_ARGUMENT, 'd', & params->deadlineForStonewalling}, + {.help=" -O stoneWallingWearOut=1 -- once the stonewalling timout is over, all process finish to access the amount of data", .arg = OPTION_OPTIONAL_ARGUMENT}, + {.help=" -O stoneWallingWearOutIterations=N -- stop after processing this number of iterations, needed for reading data back written with stoneWallingWearOut", .arg = OPTION_OPTIONAL_ARGUMENT}, + {.help=" -O stoneWallingStatusFile=FILE -- this file keeps the number of iterations from stonewalling during write and allows to use them for read", .arg = OPTION_OPTIONAL_ARGUMENT}, + {'e', NULL, "fsync -- perform a fsync() operation at the end of each read/write phase", OPTION_FLAG, 'd', & params->fsync}, + {'E', NULL, "useExistingTestFile -- do not remove test file before write access", OPTION_FLAG, 'd', & params->useExistingTestFile}, + {'f', NULL, "scriptFile -- test script name", OPTION_OPTIONAL_ARGUMENT, 's', & params->testscripts}, + {'F', NULL, "filePerProc -- file-per-process", OPTION_FLAG, 'd', & params->filePerProc}, + {'g', NULL, "intraTestBarriers -- use barriers between open, write/read, and close", OPTION_FLAG, 'd', & params->intraTestBarriers}, + /* This option toggles between Incompressible Seed and Time stamp sig based on -l, + * so we'll toss the value in both for now, and sort it out in initialization + * after all the arguments are in and we know which it keep. + */ + {'G', NULL, "setTimeStampSignature -- set value for time stamp signature/random seed", OPTION_OPTIONAL_ARGUMENT, 'd', & params->setTimeStampSignature}, + {'H', NULL, "showHints -- show hints", OPTION_FLAG, 'd', & params->showHints}, + {'i', NULL, "repetitions -- number of repetitions of test", OPTION_OPTIONAL_ARGUMENT, 'd', & params->repetitions}, + {'I', NULL, "individualDataSets -- datasets not shared by all procs [not working]", OPTION_FLAG, 'd', & params->individualDataSets}, + {'j', NULL, "outlierThreshold -- warn on outlier N seconds from mean", OPTION_OPTIONAL_ARGUMENT, 'd', & params->outlierThreshold}, + {'J', NULL, "setAlignment -- HDF5 alignment in bytes (e.g.: 8, 4k, 2m, 1g)", OPTION_OPTIONAL_ARGUMENT, 'd', & params->setAlignment}, + {'k', NULL, "keepFile -- don't remove the test file(s) on program exit", OPTION_FLAG, 'd', & params->keepFile}, + {'K', NULL, "keepFileWithError -- keep error-filled file(s) after data-checking", OPTION_FLAG, 'd', & params->keepFileWithError}, + {'l', NULL, "datapacket type-- type of packet that will be created [offset|incompressible|timestamp|o|i|t]", OPTION_OPTIONAL_ARGUMENT, 's', & params->buffer_type}, + {'m', NULL, "multiFile -- use number of reps (-i) for multiple file count", OPTION_FLAG, 'd', & params->multiFile}, + {'M', NULL, "memoryPerNode -- hog memory on the node (e.g.: 2g, 75%)", OPTION_OPTIONAL_ARGUMENT, 's', & params->memoryPerNodeStr}, + {'n', NULL, "noFill -- no fill in HDF5 file creation", OPTION_FLAG, 'd', & params->noFill}, + {'N', NULL, "numTasks -- number of tasks that are participating in the test (overrides MPI)", OPTION_OPTIONAL_ARGUMENT, 'd', & params->numTasks}, + {'o', NULL, "testFile -- full name for test", OPTION_OPTIONAL_ARGUMENT, 's', & params->testFileName}, + {'O', NULL, "string of IOR directives (e.g. -O checkRead=1,lustreStripeCount=32)", OPTION_OPTIONAL_ARGUMENT, 'p', & decodeDirectiveWrapper}, + {'p', NULL, "preallocate -- preallocate file size", OPTION_FLAG, 'd', & params->preallocate}, + {'P', NULL, "useSharedFilePointer -- use shared file pointer [not working]", OPTION_FLAG, 'd', & params->useSharedFilePointer}, + {'q', NULL, "quitOnError -- during file error-checking, abort on error", OPTION_FLAG, 'd', & params->quitOnError}, + {'Q', NULL, "taskPerNodeOffset for read tests use with -C & -Z options (-C constant N, -Z at least N)", OPTION_OPTIONAL_ARGUMENT, 'd', & params->taskPerNodeOffset}, + {'r', NULL, "readFile -- read existing file", OPTION_FLAG, 'd', & params->readFile}, + {'R', NULL, "checkRead -- verify that the output of read matches the expected signature (used with -G)", OPTION_FLAG, 'd', & params->checkRead}, + {'s', NULL, "segmentCount -- number of segments", OPTION_OPTIONAL_ARGUMENT, 'd', & params->segmentCount}, + {'S', NULL, "useStridedDatatype -- put strided access into datatype [not working]", OPTION_FLAG, 'd', & params->useStridedDatatype}, + {'t', NULL, "transferSize -- size of transfer in bytes (e.g.: 8, 4k, 2m, 1g)", OPTION_OPTIONAL_ARGUMENT, 'l', & params->transferSize}, + {'T', NULL, "maxTimeDuration -- max time in minutes executing repeated test; it aborts only between iterations and not within a test!", OPTION_OPTIONAL_ARGUMENT, 'd', & params->maxTimeDuration}, + {'u', NULL, "uniqueDir -- use unique directory name for each file-per-process", OPTION_FLAG, 'd', & params->uniqueDir}, + {'U', NULL, "hintsFileName -- full name for hints file", OPTION_OPTIONAL_ARGUMENT, 's', & params->hintsFileName}, + {'v', NULL, "verbose -- output information (repeating flag increases level)", OPTION_FLAG, 'd', & params->verbose}, + {'V', NULL, "useFileView -- use MPI_File_set_view", OPTION_FLAG, 'd', & params->useFileView}, + {'w', NULL, "writeFile -- write file", OPTION_FLAG, 'd', & params->writeFile}, + {'W', NULL, "checkWrite -- check read after write", OPTION_FLAG, 'd', & params->checkWrite}, + {'x', NULL, "singleXferAttempt -- do not retry transfer if incomplete", OPTION_FLAG, 'd', & params->singleXferAttempt}, + {'X', NULL, "reorderTasksRandomSeed -- random seed for -Z option", OPTION_OPTIONAL_ARGUMENT, 'd', & params->reorderTasksRandomSeed}, + {'Y', NULL, "fsyncPerWrite -- perform sync operation after every write operation", OPTION_FLAG, 'd', & params->fsyncPerWrite}, + {'z', NULL, "randomOffset -- access is to random, not sequential, offsets within a file", OPTION_FLAG, 'd', & params->randomOffset}, + {'Z', NULL, "reorderTasksRandom -- changes task ordering to random ordering for readback", OPTION_FLAG, 'd', & params->reorderTasksRandom}, + {.help=" -O summaryFile=FILE -- store result data into this file", .arg = OPTION_OPTIONAL_ARGUMENT}, + {.help=" -O summaryFormat=[default,JSON,CSV] -- use the format for outputing the summary", .arg = OPTION_OPTIONAL_ARGUMENT}, + {0, "dryRun", "do not perform any I/Os just run evtl. inputs print dummy output", OPTION_FLAG, 'd', & params->dryRun}, + LAST_OPTION, + }; + option_help * options = malloc(sizeof(o)); + memcpy(options, & o, sizeof(o)); + return options; } + /* * Parse Commandline. */ IOR_test_t *ParseCommandLine(int argc, char **argv) { - char * testscripts = NULL; - int toggleG = FALSE; - char * buffer_type = ""; - char * memoryPerNode = NULL; init_IOR_Param_t(& initialTestParams); - parameters = & initialTestParams; - char APIs[1024]; - aiori_supported_apis(APIs); - char apiStr[1024]; - sprintf(apiStr, "API for I/O [%s]", APIs); - - option_help options [] = { - {'a', NULL, apiStr, OPTION_OPTIONAL_ARGUMENT, 's', & initialTestParams.api}, - {'A', NULL, "refNum -- user supplied reference number to include in the summary", OPTION_OPTIONAL_ARGUMENT, 'd', & initialTestParams.referenceNumber}, - {'b', NULL, "blockSize -- contiguous bytes to write per task (e.g.: 8, 4k, 2m, 1g)", OPTION_OPTIONAL_ARGUMENT, 'l', & initialTestParams.blockSize}, - {'B', NULL, "useO_DIRECT -- uses O_DIRECT for POSIX, bypassing I/O buffers", OPTION_FLAG, 'd', & initialTestParams.useO_DIRECT}, - {'c', NULL, "collective -- collective I/O", OPTION_FLAG, 'd', & initialTestParams.collective}, - {'C', NULL, "reorderTasks -- changes task ordering to n+1 ordering for readback", OPTION_FLAG, 'd', & initialTestParams.reorderTasks}, - {'d', NULL, "interTestDelay -- delay between reps in seconds", OPTION_OPTIONAL_ARGUMENT, 'd', & initialTestParams.interTestDelay}, - {'D', NULL, "deadlineForStonewalling -- seconds before stopping write or read phase", OPTION_OPTIONAL_ARGUMENT, 'd', & initialTestParams.deadlineForStonewalling}, - {.help=" -O stoneWallingWearOut=1 -- once the stonewalling timout is over, all process finish to access the amount of data", .arg = OPTION_OPTIONAL_ARGUMENT}, - {.help=" -O stoneWallingWearOutIterations=N -- stop after processing this number of iterations, needed for reading data back written with stoneWallingWearOut", .arg = OPTION_OPTIONAL_ARGUMENT}, - {.help=" -O stoneWallingStatusFile=FILE -- this file keeps the number of iterations from stonewalling during write and allows to use them for read", .arg = OPTION_OPTIONAL_ARGUMENT}, - {'e', NULL, "fsync -- perform sync operation after each block write", OPTION_FLAG, 'd', & initialTestParams.fsync}, - {'E', NULL, "useExistingTestFile -- do not remove test file before write access", OPTION_FLAG, 'd', & initialTestParams.useExistingTestFile}, - {'f', NULL, "scriptFile -- test script name", OPTION_OPTIONAL_ARGUMENT, 's', & testscripts}, - {'F', NULL, "filePerProc -- file-per-process", OPTION_FLAG, 'd', & initialTestParams.filePerProc}, - {'g', NULL, "intraTestBarriers -- use barriers between open, write/read, and close", OPTION_FLAG, 'd', & initialTestParams.intraTestBarriers}, - /* This option toggles between Incompressible Seed and Time stamp sig based on -l, - * so we'll toss the value in both for now, and sort it out in initialization - * after all the arguments are in and we know which it keep. - */ - {'G', NULL, "setTimeStampSignature -- set value for time stamp signature/random seed", OPTION_OPTIONAL_ARGUMENT, 'd', & toggleG}, - {'H', NULL, "showHints -- show hints", OPTION_FLAG, 'd', & initialTestParams.showHints}, - {'i', NULL, "repetitions -- number of repetitions of test", OPTION_OPTIONAL_ARGUMENT, 'd', & initialTestParams.repetitions}, - {'I', NULL, "individualDataSets -- datasets not shared by all procs [not working]", OPTION_FLAG, 'd', & initialTestParams.individualDataSets}, - {'j', NULL, "outlierThreshold -- warn on outlier N seconds from mean", OPTION_OPTIONAL_ARGUMENT, 'd', & initialTestParams.outlierThreshold}, - {'J', NULL, "setAlignment -- HDF5 alignment in bytes (e.g.: 8, 4k, 2m, 1g)", OPTION_OPTIONAL_ARGUMENT, 'd', & initialTestParams.setAlignment}, - {'k', NULL, "keepFile -- don't remove the test file(s) on program exit", OPTION_FLAG, 'd', & initialTestParams.keepFile}, - {'K', NULL, "keepFileWithError -- keep error-filled file(s) after data-checking", OPTION_FLAG, 'd', & initialTestParams.keepFileWithError}, - {'l', NULL, "datapacket type-- type of packet that will be created [offset|incompressible|timestamp|o|i|t]", OPTION_OPTIONAL_ARGUMENT, 's', & buffer_type}, - {'m', NULL, "multiFile -- use number of reps (-i) for multiple file count", OPTION_FLAG, 'd', & initialTestParams.multiFile}, - {'M', NULL, "memoryPerNode -- hog memory on the node (e.g.: 2g, 75%)", OPTION_OPTIONAL_ARGUMENT, 's', & memoryPerNode}, - {'n', NULL, "noFill -- no fill in HDF5 file creation", OPTION_FLAG, 'd', & initialTestParams.noFill}, - {'N', NULL, "numTasks -- number of tasks that should participate in the test", OPTION_OPTIONAL_ARGUMENT, 'd', & initialTestParams.numTasks}, - {'o', NULL, "testFile -- full name for test", OPTION_OPTIONAL_ARGUMENT, 's', & initialTestParams.testFileName}, - {'O', NULL, "string of IOR directives (e.g. -O checkRead=1,lustreStripeCount=32)", OPTION_OPTIONAL_ARGUMENT, 'p', & decodeDirectiveWrapper}, - {'p', NULL, "preallocate -- preallocate file size", OPTION_FLAG, 'd', & initialTestParams.preallocate}, - {'P', NULL, "useSharedFilePointer -- use shared file pointer [not working]", OPTION_FLAG, 'd', & initialTestParams.useSharedFilePointer}, - {'q', NULL, "quitOnError -- during file error-checking, abort on error", OPTION_FLAG, 'd', & initialTestParams.quitOnError}, - {'Q', NULL, "taskPerNodeOffset for read tests use with -C & -Z options (-C constant N, -Z at least N)", OPTION_OPTIONAL_ARGUMENT, 'd', & initialTestParams.taskPerNodeOffset}, - {'r', NULL, "readFile -- read existing file", OPTION_FLAG, 'd', & initialTestParams.readFile}, - {'R', NULL, "checkRead -- verify that the output of read matches the expected signature (used with -G)", OPTION_FLAG, 'd', & initialTestParams.checkRead}, - {'s', NULL, "segmentCount -- number of segments", OPTION_OPTIONAL_ARGUMENT, 'd', & initialTestParams.segmentCount}, - {'S', NULL, "useStridedDatatype -- put strided access into datatype [not working]", OPTION_FLAG, 'd', & initialTestParams.useStridedDatatype}, - {'t', NULL, "transferSize -- size of transfer in bytes (e.g.: 8, 4k, 2m, 1g)", OPTION_OPTIONAL_ARGUMENT, 'l', & initialTestParams.transferSize}, - {'T', NULL, "maxTimeDuration -- max time in minutes executing repeated test; it aborts only between iterations and not within a test!", OPTION_OPTIONAL_ARGUMENT, 'd', & initialTestParams.maxTimeDuration}, - {'u', NULL, "uniqueDir -- use unique directory name for each file-per-process", OPTION_FLAG, 'd', & initialTestParams.uniqueDir}, - {'U', NULL, "hintsFileName -- full name for hints file", OPTION_OPTIONAL_ARGUMENT, 's', & initialTestParams.hintsFileName}, - {'v', NULL, "verbose -- output information (repeating flag increases level)", OPTION_FLAG, 'd', & initialTestParams.verbose}, - {'V', NULL, "useFileView -- use MPI_File_set_view", OPTION_FLAG, 'd', & initialTestParams.useFileView}, - {'w', NULL, "writeFile -- write file", OPTION_FLAG, 'd', & initialTestParams.writeFile}, - {'W', NULL, "checkWrite -- check read after write", OPTION_FLAG, 'd', & initialTestParams.checkWrite}, - {'x', NULL, "singleXferAttempt -- do not retry transfer if incomplete", OPTION_FLAG, 'd', & initialTestParams.singleXferAttempt}, - {'X', NULL, "reorderTasksRandomSeed -- random seed for -Z option", OPTION_OPTIONAL_ARGUMENT, 'd', & initialTestParams.reorderTasksRandomSeed}, - {'Y', NULL, "fsyncPerWrite -- perform sync operation after every write operation", OPTION_FLAG, 'd', & initialTestParams.fsyncPerWrite}, - {'z', NULL, "randomOffset -- access is to random, not sequential, offsets within a file", OPTION_FLAG, 'd', & initialTestParams.randomOffset}, - {'Z', NULL, "reorderTasksRandom -- changes task ordering to random ordering for readback", OPTION_FLAG, 'd', & initialTestParams.reorderTasksRandom}, - {.help=" -O summaryFile=FILE -- store result data into this file", .arg = OPTION_OPTIONAL_ARGUMENT}, - {.help=" -O summaryFormat=[default,JSON,CSV] -- use the format for outputing the summary", .arg = OPTION_OPTIONAL_ARGUMENT}, - LAST_OPTION, - }; - - IOR_test_t *tests = NULL; - - GetPlatformName(initialTestParams.platform); - airoi_parse_options(argc, argv, options); - - if (toggleG){ - initialTestParams.setTimeStampSignature = toggleG; - initialTestParams.incompressibleSeed = toggleG; - } + IOR_test_t *tests = NULL; - if (buffer_type[0] != 0){ - switch(buffer_type[0]) { - case 'i': /* Incompressible */ - initialTestParams.dataPacketType = incompressible; - break; - case 't': /* timestamp */ - initialTestParams.dataPacketType = timestamp; - break; - case 'o': /* offset packet */ - initialTestParams.storeFileOffset = TRUE; - initialTestParams.dataPacketType = offset; - break; - default: - fprintf(out_logfile, - "Unknown arguement for -l %s; generic assumed\n", buffer_type); - break; - } - } - if (memoryPerNode){ - initialTestParams.memoryPerNode = NodeMemoryStringToBytes(optarg); - } - const ior_aiori_t * backend = aiori_select(initialTestParams.api); - if (backend == NULL) - ERR_SIMPLE("unrecognized I/O API"); - - initialTestParams.backend = backend; - initialTestParams.apiVersion = backend->get_version(); - - if (testscripts){ - tests = ReadConfigScript(testscripts); - }else{ - tests = CreateTest(&initialTestParams, 0); - AllocResults(tests); - } + initialTestParams.platform = GetPlatformName(); + + option_help * options = createGlobalOptions( & initialTestParams); + parameters = & initialTestParams; + global_options = airoi_create_all_module_options(options); + option_parse(argc, argv, global_options); + updateParsedOptions(& initialTestParams, global_options); + + if (initialTestParams.testscripts){ + tests = ReadConfigScript(initialTestParams.testscripts); + }else{ + tests = CreateTest(&initialTestParams, 0); + AllocResults(tests); + } - CheckRunSettings(tests); + CheckRunSettings(tests); - return (tests); + return (tests); } diff --git a/src/test/Makefile.am b/src/test/Makefile.am new file mode 100755 index 00000000..1f2b1413 --- /dev/null +++ b/src/test/Makefile.am @@ -0,0 +1,8 @@ +LDFLAGS = $(extraLDFLAGS) +LDADD = ../libaiori.a $(extraLDADD) + +# Add test here +TESTS = testlib testexample +check_PROGRAMS = $(TESTS) +testexample_SOURCES = example.c +testlib_SOURCES = lib.c diff --git a/src/test/example.c b/src/test/example.c new file mode 100644 index 00000000..5bb4b2b5 --- /dev/null +++ b/src/test/example.c @@ -0,0 +1,31 @@ +#include + +#include +#include + +// build a single test via, e.g., mpicc example.c -I ../src/ ../src/libaiori.a -lm + +int main(){ + IOR_param_t test; + init_IOR_Param_t(& test); + test.blockSize = 10; + test.transferSize = 10; + test.segmentCount = 5; + test.numTasks = 2; + + // having an individual file + test.filePerProc = 1; + + IOR_offset_t * offsets; + offsets = GetOffsetArraySequential(& test, 0); + assert(offsets[0] == 0); + assert(offsets[1] == 10); + assert(offsets[2] == 20); + assert(offsets[3] == 30); + assert(offsets[4] == 40); + // for(int i = 0; i < test.segmentCount; i++){ + // printf("%lld\n", (long long int) offsets[i]); + // } + printf("OK\n"); + return 0; +} diff --git a/src/test/lib.c b/src/test/lib.c index 55ca6f00..c163f54b 100644 --- a/src/test/lib.c +++ b/src/test/lib.c @@ -16,6 +16,11 @@ int main(int argc, char ** argv){ fprintf(stderr, "Could not run ior\n"); ret = 1; } + else + { + free(res->params.platform); + free(res); + } } if (rank == 0){ char * param[] = {"./mdtest", "-a", "DUMMY"}; diff --git a/src/utilities.c b/src/utilities.c index b71ce41d..bcb1e03c 100755 --- a/src/utilities.c +++ b/src/utilities.c @@ -20,6 +20,7 @@ # define _GNU_SOURCE /* Needed for O_DIRECT in fcntl */ #endif /* __linux__ */ +#include #include #include #include @@ -52,11 +53,9 @@ extern int errno; extern int numTasks; -/* globals used by other files, also defined "extern" in ior.h */ -int numTasksWorld = 0; +/* globals used by other files, also defined "extern" in utilities.h */ int rank = 0; int rankOffset = 0; -int tasksPerNode = 0; /* tasks per node */ int verbose = VERBOSE_0; /* verbose output */ MPI_Comm testComm; MPI_Comm mpi_comm_world; @@ -66,6 +65,94 @@ enum OutputFormat_t outputFormat; /***************************** F U N C T I O N S ******************************/ +void* safeMalloc(uint64_t size){ + void * d = malloc(size); + if (d == NULL){ + ERR("Could not malloc an array"); + } + memset(d, 0, size); + return d; +} + +void FailMessage(int rank, const char *location, char *format, ...) { + char msg[4096]; + va_list args; + va_start(args, format); + vsnprintf(msg, 4096, format, args); + va_end(args); + fprintf(out_logfile, "%s: Process %d: FAILED in %s, %s: %s\n", + PrintTimestamp(), rank, location, msg, strerror(errno)); + fflush(out_logfile); + MPI_Abort(testComm, 1); +} + +size_t NodeMemoryStringToBytes(char *size_str) +{ + int percent; + int rc; + long page_size; + long num_pages; + long long mem; + + rc = sscanf(size_str, " %d %% ", &percent); + if (rc == 0) + return (size_t) string_to_bytes(size_str); + if (percent > 100 || percent < 0) + ERR("percentage must be between 0 and 100"); + +#ifdef HAVE_SYSCONF + page_size = sysconf(_SC_PAGESIZE); +#else + page_size = getpagesize(); +#endif + +#ifdef _SC_PHYS_PAGES + num_pages = sysconf(_SC_PHYS_PAGES); + if (num_pages == -1) + ERR("sysconf(_SC_PHYS_PAGES) is not supported"); +#else + ERR("sysconf(_SC_PHYS_PAGES) is not supported"); +#endif + mem = page_size * num_pages; + + return mem / 100 * percent; +} + +void updateParsedOptions(IOR_param_t * options, options_all_t * global_options){ + if (options->setTimeStampSignature){ + options->incompressibleSeed = options->setTimeStampSignature; + } + + if (options->buffer_type && options->buffer_type[0] != 0){ + switch(options->buffer_type[0]) { + case 'i': /* Incompressible */ + options->dataPacketType = incompressible; + break; + case 't': /* timestamp */ + options->dataPacketType = timestamp; + break; + case 'o': /* offset packet */ + options->storeFileOffset = TRUE; + options->dataPacketType = offset; + break; + default: + fprintf(out_logfile, + "Unknown argument for -l %s; generic assumed\n", options->buffer_type); + break; + } + } + if (options->memoryPerNodeStr){ + options->memoryPerNode = NodeMemoryStringToBytes(options->memoryPerNodeStr); + } + const ior_aiori_t * backend = aiori_select(options->api); + if (backend == NULL) + ERR_SIMPLE("unrecognized I/O API"); + + options->backend = backend; + /* copy the actual module options into the test */ + options->backend_options = airoi_update_module_options(backend, global_options); + options->apiVersion = backend->get_version(); +} /* Used in aiori-POSIX.c and aiori-PLFS.c */ @@ -136,46 +223,169 @@ void DumpBuffer(void *buffer, return; } /* DumpBuffer() */ +/* a function that prints an int array where each index corresponds to a rank + and the value is whether that rank is on the same host as root. + Also returns 1 if rank 1 is on same host and 0 otherwise +*/ +int QueryNodeMapping(MPI_Comm comm, int print_nodemap) { + char localhost[MAX_PATHLEN], roothost[MAX_PATHLEN]; + int num_ranks; + MPI_Comm_size(comm, &num_ranks); + int *node_map = (int*)malloc(sizeof(int) * num_ranks); + if ( ! node_map ) { + FAIL("malloc"); + } + if (gethostname(localhost, MAX_PATHLEN) != 0) { + FAIL("gethostname()"); + } + if (rank==0) { + strncpy(roothost,localhost,MAX_PATHLEN); + } + + /* have rank 0 broadcast out its hostname */ + MPI_Bcast(roothost, MAX_PATHLEN, MPI_CHAR, 0, comm); + //printf("Rank %d received root host as %s\n", rank, roothost); + /* then every rank figures out whether it is same host as root and then gathers that */ + int same_as_root = strcmp(roothost,localhost) == 0; + MPI_Gather( &same_as_root, 1, MPI_INT, node_map, 1, MPI_INT, 0, comm); + if ( print_nodemap && rank==0) { + fprintf( out_logfile, "Nodemap: " ); + for ( int i = 0; i < num_ranks; i++ ) { + fprintf( out_logfile, "%d", node_map[i] ); + } + fprintf( out_logfile, "\n" ); + } + int ret = 1; + if(num_ranks>1) + ret = node_map[1] == 1; + MPI_Bcast(&ret, 1, MPI_INT, 0, comm); + free(node_map); + return ret; +} + +/* + * There is a more direct way to determine the node count in modern MPI + * versions so we use that if possible. + * + * For older versions we use a method which should still provide accurate + * results even if the total number of tasks is not evenly divisible by the + * tasks on node rank 0. + */ +int GetNumNodes(MPI_Comm comm) { + if (getenv("IOR_FAKE_NODES")){ + int numNodes = atoi(getenv("IOR_FAKE_NODES")); + int rank; + MPI_Comm_rank(comm, & rank); + if(rank == 0){ + printf("Fake number of node: using %d\n", numNodes); + } + return numNodes; + } #if MPI_VERSION >= 3 -int CountTasksPerNode(MPI_Comm comm) { - /* modern MPI provides a simple way to get the local process count */ - MPI_Comm shared_comm; - int count; + MPI_Comm shared_comm; + int shared_rank = 0; + int local_result = 0; + int numNodes = 0; + + MPI_CHECK(MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &shared_comm), + "MPI_Comm_split_type() error"); + MPI_CHECK(MPI_Comm_rank(shared_comm, &shared_rank), "MPI_Comm_rank() error"); + local_result = shared_rank == 0? 1 : 0; + MPI_CHECK(MPI_Allreduce(&local_result, &numNodes, 1, MPI_INT, MPI_SUM, comm), + "MPI_Allreduce() error"); + MPI_CHECK(MPI_Comm_free(&shared_comm), "MPI_Comm_free() error"); + + return numNodes; +#else + int numTasks = 0; + int numTasksOnNode0 = 0; - MPI_Comm_split_type (comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &shared_comm); - MPI_Comm_size (shared_comm, &count); - MPI_Comm_free (&shared_comm); + numTasks = GetNumTasks(comm); + numTasksOnNode0 = GetNumTasksOnNode0(comm); - return count; + return ((numTasks - 1) / numTasksOnNode0) + 1; +#endif } -#else + + +int GetNumTasks(MPI_Comm comm) { + int numTasks = 0; + + MPI_CHECK(MPI_Comm_size(comm, &numTasks), "cannot get number of tasks"); + + return numTasks; +} + + /* - * Count the number of tasks that share a host. + * It's very important that this method provide the same result to every + * process as it's used for redistributing which jobs read from which files. + * It was renamed accordingly. + * + * If different nodes get different results from this method then jobs get + * redistributed unevenly and you no longer have a 1:1 relationship with some + * nodes reading multiple files while others read none. + * + * In the common case the number of tasks on each node (MPI_Comm_size on an + * MPI_COMM_TYPE_SHARED communicator) will be the same. However, there is + * nothing which guarantees this. It's valid to have, for example, 64 jobs + * across 4 systems which can run 20 jobs each. In that scenario you end up + * with 3 MPI_COMM_TYPE_SHARED groups of 20, and one group of 4. + * + * In the (MPI_VERSION < 3) implementation of this method consistency is + * ensured by asking specifically about the number of tasks on the node with + * rank 0. In the original implementation for (MPI_VERSION >= 3) this was + * broken by using the LOCAL process count which differed depending on which + * node you were on. + * + * This was corrected below by first splitting the comm into groups by node + * (MPI_COMM_TYPE_SHARED) and then having only the node with world rank 0 and + * shared rank 0 return the MPI_Comm_size of its shared subgroup. This yields + * the original consistent behavior no matter which node asks. * - * This function employees the gethostname() call, rather than using + * In the common case where every node has the same number of tasks this + * method will return the same value it always has. + */ +int GetNumTasksOnNode0(MPI_Comm comm) { + if (getenv("IOR_FAKE_TASK_PER_NODES")){ + int tasksPerNode = atoi(getenv("IOR_FAKE_TASK_PER_NODES")); + int rank; + MPI_Comm_rank(comm, & rank); + if(rank == 0){ + printf("Fake tasks per node: using %d\n", tasksPerNode); + } + return tasksPerNode; + } +#if MPI_VERSION >= 3 + MPI_Comm shared_comm; + int shared_rank = 0; + int tasks_on_node_rank0 = 0; + int local_result = 0; + + MPI_CHECK(MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &shared_comm), + "MPI_Comm_split_type() error"); + MPI_CHECK(MPI_Comm_rank(shared_comm, &shared_rank), "MPI_Comm_rank() error"); + if (rank == 0 && shared_rank == 0) { + MPI_CHECK(MPI_Comm_size(shared_comm, &local_result), "MPI_Comm_size() error"); + } + MPI_CHECK(MPI_Allreduce(&local_result, &tasks_on_node_rank0, 1, MPI_INT, MPI_SUM, comm), + "MPI_Allreduce() error"); + MPI_CHECK(MPI_Comm_free(&shared_comm), "MPI_Comm_free() error"); + + return tasks_on_node_rank0; +#else +/* + * This version employs the gethostname() call, rather than using * MPI_Get_processor_name(). We are interested in knowing the number * of tasks that share a file system client (I/O node, compute node, * whatever that may be). However on machines like BlueGene/Q, * MPI_Get_processor_name() uniquely identifies a cpu in a compute node, * not the node where the I/O is function shipped to. gethostname() * is assumed to identify the shared filesystem client in more situations. - * - * NOTE: This also assumes that the task count on all nodes is equal - * to the task count on the host running MPI task 0. */ -int CountTasksPerNode(MPI_Comm comm) { int size; MPI_Comm_size(comm, & size); /* for debugging and testing */ - if (getenv("IOR_FAKE_TASK_PER_NODES")){ - int tasksPerNode = atoi(getenv("IOR_FAKE_TASK_PER_NODES")); - int rank; - MPI_Comm_rank(comm, & rank); - if(rank == 0){ - printf("Fake tasks per node: using %d\n", tasksPerNode); - } - return tasksPerNode; - } char localhost[MAX_PATHLEN], hostname[MAX_PATHLEN]; int count = 1, @@ -206,8 +416,8 @@ int CountTasksPerNode(MPI_Comm comm) { MPI_Bcast(&count, 1, MPI_INT, 0, comm); return(count); -} #endif +} /* @@ -215,21 +425,23 @@ int CountTasksPerNode(MPI_Comm comm) { */ void ExtractHint(char *settingVal, char *valueVal, char *hintString) { - char *settingPtr, *valuePtr, *tmpPtr1, *tmpPtr2; + char *settingPtr, *valuePtr, *tmpPtr2; + /* find the value */ settingPtr = (char *)strtok(hintString, " ="); valuePtr = (char *)strtok(NULL, " =\t\r\n"); - tmpPtr1 = settingPtr; - tmpPtr2 = (char *)strstr(settingPtr, "IOR_HINT__MPI__"); - if (tmpPtr1 == tmpPtr2) { + /* is this an MPI hint? */ + tmpPtr2 = (char *) strstr(settingPtr, "IOR_HINT__MPI__"); + if (settingPtr == tmpPtr2) { settingPtr += strlen("IOR_HINT__MPI__"); - } else { - tmpPtr2 = (char *)strstr(settingPtr, "IOR_HINT__GPFS__"); - if (tmpPtr1 == tmpPtr2) { - settingPtr += strlen("IOR_HINT__GPFS__"); - fprintf(out_logfile, - "WARNING: Unable to set GPFS hints (not implemented.)\n"); + tmpPtr2 = (char *) strstr(hintString, "IOR_HINT__GPFS__"); + /* is it an GPFS hint? */ + if (settingPtr == tmpPtr2) { + settingPtr += strlen("IOR_HINT__GPFS__"); + }else{ + fprintf(out_logfile, "WARNING: Unable to set unknown hint type (not implemented.)\n"); + return; } } strcpy(settingVal, settingPtr); @@ -353,7 +565,7 @@ IOR_offset_t StringToBytes(char *size_str) /* * Displays size of file system and percent of data blocks and inodes used. */ -void ShowFileSystemSize(char *fileSystem) +void ShowFileSystemSize(char *fileSystem) // this might be converted to an AIORI call { #ifndef _WIN32 /* FIXME */ char realPath[PATH_MAX]; @@ -373,11 +585,13 @@ void ShowFileSystemSize(char *fileSystem) #ifdef __sun if (statvfs(fileSystem, &statusBuffer) != 0) { - ERR("unable to statvfs() file system"); + WARN("unable to statvfs() file system"); + return; } #else /* !__sun */ if (statfs(fileSystem, &statusBuffer) != 0) { - ERR("unable to statfs() file system"); + WARN("unable to statfs() file system"); + return; } #endif /* __sun */ @@ -408,7 +622,8 @@ void ShowFileSystemSize(char *fileSystem) /* show results */ if (realpath(fileSystem, realPath) == NULL) { - ERR("unable to use realpath()"); + WARN("unable to use realpath()"); + return; } if(outputFormat == OUTPUT_DEFAULT){ diff --git a/src/utilities.h b/src/utilities.h index 7933cbc4..2a9abe36 100755 --- a/src/utilities.h +++ b/src/utilities.h @@ -18,10 +18,8 @@ #include #include "ior.h" -extern int numTasksWorld; extern int rank; extern int rankOffset; -extern int tasksPerNode; extern int verbose; extern MPI_Comm testComm; extern MPI_Comm mpi_comm_world; @@ -36,23 +34,15 @@ extern enum OutputFormat_t outputFormat; /* format of the output */ #ifdef __linux__ -#define FAIL(msg) do { \ - fprintf(out_logfile, "%s: Process %d: FAILED in %s, %s: %s\n", \ - PrintTimestamp(), rank, __func__, \ - msg, strerror(errno)); \ - fflush(out_logfile); \ - MPI_Abort(testComm, 1); \ - } while(0) +#define ERROR_LOCATION __func__ #else -#define FAIL(msg) do { \ - fprintf(out_logfile, "%s: Process %d: FAILED at %d, %s: %s\n", \ - PrintTimestamp(), rank, __LINE__, \ - msg, strerror(errno)); \ - fflush(out_logfile); \ - MPI_Abort(testComm, 1); \ - } while(0) +#define ERROR_LOCATION __LINE__ #endif +#define FAIL(...) FailMessage(rank, ERROR_LOCATION, __VA_ARGS__) +void FailMessage(int rank, const char *location, char *format, ...); + +void* safeMalloc(uint64_t size); void set_o_direct_flag(int *fd); char *CurrentTimeString(void); @@ -63,8 +53,13 @@ void SeedRandGen(MPI_Comm); void SetHints (MPI_Info *, char *); void ShowHints (MPI_Info *); char *HumanReadable(IOR_offset_t value, int base); -int CountTasksPerNode(MPI_Comm comm); +int QueryNodeMapping(MPI_Comm comm, int print_nodemap); +int GetNumNodes(MPI_Comm); +int GetNumTasks(MPI_Comm); +int GetNumTasksOnNode0(MPI_Comm); void DelaySecs(int delay); +void updateParsedOptions(IOR_param_t * options, options_all_t * global_options); +size_t NodeMemoryStringToBytes(char *size_str); /* Returns -1, if cannot be read */ int64_t ReadStoneWallingIterations(char * const filename); diff --git a/testing/basic-tests.sh b/testing/basic-tests.sh index 8dd3befe..91dba4b1 100755 --- a/testing/basic-tests.sh +++ b/testing/basic-tests.sh @@ -7,11 +7,14 @@ # Example: export IOR_EXTRA="-v -v -v" ROOT="$(dirname ${BASH_SOURCE[0]})" +TYPE="basic" source $ROOT/test-lib.sh MDTEST 1 -a POSIX MDTEST 2 -a POSIX -W 2 +MDTEST 1 -C -T -r -F -I 1 -z 1 -b 1 -L -u +MDTEST 1 -C -T -I 1 -z 1 -b 1 -u IOR 1 -a POSIX -w -z -F -Y -e -i1 -m -t 100k -b 1000k IOR 1 -a POSIX -w -z -F -k -e -i2 -m -t 100k -b 100k @@ -23,4 +26,7 @@ IOR 2 -a POSIX -r -z -Z -Q 2 -F -k -e -i1 -m -t 100k -b 100k IOR 2 -a POSIX -r -z -Z -Q 3 -X 13 -F -k -e -i1 -m -t 100k -b 100k IOR 2 -a POSIX -w -z -Z -Q 1 -X -13 -F -e -i1 -m -t 100k -b 100k + +IOR 2 -f "$ROOT/test_comments.ior" + END diff --git a/testing/complex-tests.sh b/testing/complex-tests.sh index 951dc32f..c314cf95 100755 --- a/testing/complex-tests.sh +++ b/testing/complex-tests.sh @@ -5,6 +5,7 @@ # Example: export IOR_EXTRA="-v -v -v" ROOT=${0%/*} +TYPE="advanced" source $ROOT/test-lib.sh diff --git a/testing/mdtest-patterns/advanced/3.txt b/testing/mdtest-patterns/advanced/3.txt new file mode 100644 index 00000000..4c459416 --- /dev/null +++ b/testing/mdtest-patterns/advanced/3.txt @@ -0,0 +1,95 @@ +V-3: Rank 0 Line 2082 main (before display_freespace): testdirpath is '/dev/shm/mdest' +V-3: Rank 0 Line 1506 Entering display_freespace on /dev/shm/mdest... +V-3: Rank 0 Line 1525 Before show_file_system_size, dirpath is '/dev/shm' +V-3: Rank 0 Line 1527 After show_file_system_size, dirpath is '/dev/shm' +V-3: Rank 0 Line 2097 main (after display_freespace): testdirpath is '/dev/shm/mdest' +V-3: Rank 0 Line 1656 main (create hierarchical directory loop-!unque_dir_per_task): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' +V-3: Rank 0 Line 1683 V-3: main: Using unique_mk_dir, 'mdtest_tree.0' +V-3: Rank 0 Line 1704 V-3: main: Copied unique_mk_dir, 'mdtest_tree.0', to topdir +V-3: Rank 0 Line 801 directory_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.0' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.1' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.2' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.3' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.4' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.5' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.6' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.7' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.8' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.9' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.10' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.11' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.12' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.13' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.14' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.15' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.16' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.17' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.18' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.19' +V-3: Rank 0 Line 1716 will file_test on mdtest_tree.0 +V-3: Rank 0 Line 990 Entering file_test on mdtest_tree.0 +V-3: Rank 0 Line 1012 file_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.0' +V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 Line 373 create_remove_items_helper: close... +V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.1' +V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 Line 373 create_remove_items_helper: close... +V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.2' +V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 Line 373 create_remove_items_helper: close... +V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.3' +V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 Line 373 create_remove_items_helper: close... +V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.4' +V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 Line 373 create_remove_items_helper: close... +V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.5' +V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 Line 373 create_remove_items_helper: close... +V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.6' +V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 Line 373 create_remove_items_helper: close... +V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.7' +V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 Line 373 create_remove_items_helper: close... +V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.8' +V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 Line 373 create_remove_items_helper: close... +V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.9' +V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 Line 373 create_remove_items_helper: close... +V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.10' +V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 Line 373 create_remove_items_helper: close... +V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.11' +V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 Line 373 create_remove_items_helper: close... +V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.12' +V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 Line 373 create_remove_items_helper: close... +V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.13' +V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 Line 373 create_remove_items_helper: close... +V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.14' +V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 Line 373 create_remove_items_helper: close... +V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.15' +V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 Line 373 create_remove_items_helper: close... +V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.16' +V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 Line 373 create_remove_items_helper: close... +V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.17' +V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 Line 373 create_remove_items_helper: close... +V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.18' +V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 Line 373 create_remove_items_helper: close... +V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.19' +V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 Line 373 create_remove_items_helper: close... +V-3: Rank 0 Line 1723 main: Using testdir, '/dev/shm/mdest/test-dir.0-0' diff --git a/testing/mdtest-patterns/advanced/4.txt b/testing/mdtest-patterns/advanced/4.txt new file mode 100644 index 00000000..5d3b7da0 --- /dev/null +++ b/testing/mdtest-patterns/advanced/4.txt @@ -0,0 +1,52 @@ +V-3: Rank 0 Line 2082 main (before display_freespace): testdirpath is '/dev/shm/mdest' +V-3: Rank 0 Line 1506 Entering display_freespace on /dev/shm/mdest... +V-3: Rank 0 Line 1525 Before show_file_system_size, dirpath is '/dev/shm' +V-3: Rank 0 Line 1527 After show_file_system_size, dirpath is '/dev/shm' +V-3: Rank 0 Line 2097 main (after display_freespace): testdirpath is '/dev/shm/mdest' +V-3: Rank 0 Line 1683 V-3: main: Using unique_mk_dir, 'mdtest_tree.0' +V-3: Rank 0 Line 1704 V-3: main: Copied unique_mk_dir, 'mdtest_tree.0', to topdir +V-3: Rank 0 Line 833 stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.0 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.1 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.2 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.3 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.4 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.5 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.6 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.7 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.8 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.9 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.10 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.11 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.12 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.13 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.14 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.15 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.16 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.17 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.18 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.19 +V-3: Rank 0 Line 1716 will file_test on mdtest_tree.0 +V-3: Rank 0 Line 990 Entering file_test on mdtest_tree.0 +V-3: Rank 0 Line 1079 file_test: stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.0 +V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.1 +V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.2 +V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.3 +V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.4 +V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.5 +V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.6 +V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.7 +V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.8 +V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.9 +V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.10 +V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.11 +V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.12 +V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.13 +V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.14 +V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.15 +V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.16 +V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.17 +V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.18 +V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.19 +V-3: Rank 0 Line 1723 main: Using testdir, '/dev/shm/mdest/test-dir.0-0' diff --git a/testing/mdtest-patterns/advanced/5.txt b/testing/mdtest-patterns/advanced/5.txt new file mode 100644 index 00000000..e87ae0a5 --- /dev/null +++ b/testing/mdtest-patterns/advanced/5.txt @@ -0,0 +1,77 @@ +V-3: Rank 0 Line 2082 main (before display_freespace): testdirpath is '/dev/shm/mdest' +V-3: Rank 0 Line 1506 Entering display_freespace on /dev/shm/mdest... +V-3: Rank 0 Line 1525 Before show_file_system_size, dirpath is '/dev/shm' +V-3: Rank 0 Line 1527 After show_file_system_size, dirpath is '/dev/shm' +V-3: Rank 0 Line 2097 main (after display_freespace): testdirpath is '/dev/shm/mdest' +V-3: Rank 0 Line 1656 main (create hierarchical directory loop-!unque_dir_per_task): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' +V-3: Rank 0 Line 1683 V-3: main: Using unique_mk_dir, 'mdtest_tree.0' +V-3: Rank 0 Line 1704 V-3: main: Copied unique_mk_dir, 'mdtest_tree.0', to topdir +V-3: Rank 0 Line 801 directory_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.0' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.1' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.2' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.3' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.4' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.5' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.6' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.7' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.8' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.9' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.10' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.11' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.12' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.13' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.14' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.15' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.16' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.17' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.18' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.19' +V-3: Rank 0 Line 833 stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.0 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.1 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.2 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.3 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.4 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.5 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.6 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.7 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.8 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.9 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.10 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.11 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.12 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.13 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.14 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.15 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.16 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.17 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.18 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.19 +V-3: Rank 0 Line 862 directory_test: read path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 890 directory_test: remove directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.0' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.1' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.2' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.3' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.4' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.5' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.6' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.7' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.8' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.9' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.10' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.11' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.12' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.13' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.14' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.15' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.16' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.17' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.18' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.19' +V-3: Rank 0 Line 915 directory_test: remove unique directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 1723 main: Using testdir, '/dev/shm/mdest/test-dir.0-0' +V-3: Rank 0 Line 1764 V-3: main (remove hierarchical directory loop-!unique_dir_per_task): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' diff --git a/testing/mdtest-patterns/basic/0.txt b/testing/mdtest-patterns/basic/0.txt new file mode 100644 index 00000000..ebe0f144 --- /dev/null +++ b/testing/mdtest-patterns/basic/0.txt @@ -0,0 +1,27 @@ +V-3: Rank 0 Line 2082 main (before display_freespace): testdirpath is '/dev/shm/mdest' +V-3: Rank 0 Line 1506 Entering display_freespace on /dev/shm/mdest... +V-3: Rank 0 Line 1525 Before show_file_system_size, dirpath is '/dev/shm' +V-3: Rank 0 Line 1527 After show_file_system_size, dirpath is '/dev/shm' +V-3: Rank 0 Line 2097 main (after display_freespace): testdirpath is '/dev/shm/mdest' +V-3: Rank 0 Line 1656 main (create hierarchical directory loop-!unque_dir_per_task): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' +V-3: Rank 0 Line 1683 V-3: main: Using unique_mk_dir, 'mdtest_tree.0' +V-3: Rank 0 Line 1704 V-3: main: Copied unique_mk_dir, 'mdtest_tree.0', to topdir +V-3: Rank 0 Line 801 directory_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 833 stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 862 directory_test: read path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 890 directory_test: remove directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 915 directory_test: remove unique directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 1716 will file_test on mdtest_tree.0 +V-3: Rank 0 Line 990 Entering file_test on mdtest_tree.0 +V-3: Rank 0 Line 1012 file_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 1079 file_test: stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 1104 file_test: read path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 1134 file_test: rm directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 1141 gonna create /dev/shm/mdest/test-dir.0-0/mdtest_tree.0 +V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 1158 file_test: rm unique directories path is 'mdtest_tree.0' +V-3: Rank 0 Line 1723 main: Using testdir, '/dev/shm/mdest/test-dir.0-0' +V-3: Rank 0 Line 1764 V-3: main (remove hierarchical directory loop-!unique_dir_per_task): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' diff --git a/testing/mdtest-patterns/basic/1.txt b/testing/mdtest-patterns/basic/1.txt new file mode 100644 index 00000000..ebe0f144 --- /dev/null +++ b/testing/mdtest-patterns/basic/1.txt @@ -0,0 +1,27 @@ +V-3: Rank 0 Line 2082 main (before display_freespace): testdirpath is '/dev/shm/mdest' +V-3: Rank 0 Line 1506 Entering display_freespace on /dev/shm/mdest... +V-3: Rank 0 Line 1525 Before show_file_system_size, dirpath is '/dev/shm' +V-3: Rank 0 Line 1527 After show_file_system_size, dirpath is '/dev/shm' +V-3: Rank 0 Line 2097 main (after display_freespace): testdirpath is '/dev/shm/mdest' +V-3: Rank 0 Line 1656 main (create hierarchical directory loop-!unque_dir_per_task): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' +V-3: Rank 0 Line 1683 V-3: main: Using unique_mk_dir, 'mdtest_tree.0' +V-3: Rank 0 Line 1704 V-3: main: Copied unique_mk_dir, 'mdtest_tree.0', to topdir +V-3: Rank 0 Line 801 directory_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 833 stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 862 directory_test: read path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 890 directory_test: remove directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 915 directory_test: remove unique directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 1716 will file_test on mdtest_tree.0 +V-3: Rank 0 Line 990 Entering file_test on mdtest_tree.0 +V-3: Rank 0 Line 1012 file_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 1079 file_test: stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 1104 file_test: read path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 1134 file_test: rm directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 1141 gonna create /dev/shm/mdest/test-dir.0-0/mdtest_tree.0 +V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 1158 file_test: rm unique directories path is 'mdtest_tree.0' +V-3: Rank 0 Line 1723 main: Using testdir, '/dev/shm/mdest/test-dir.0-0' +V-3: Rank 0 Line 1764 V-3: main (remove hierarchical directory loop-!unique_dir_per_task): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' diff --git a/testing/mdtest-patterns/basic/2.txt b/testing/mdtest-patterns/basic/2.txt new file mode 100644 index 00000000..77f5c781 --- /dev/null +++ b/testing/mdtest-patterns/basic/2.txt @@ -0,0 +1,29 @@ +V-3: Rank 0 Line 2082 main (before display_freespace): testdirpath is '/dev/shm/mdest' +V-3: Rank 0 Line 1506 Entering display_freespace on /dev/shm/mdest... +V-3: Rank 0 Line 1525 Before show_file_system_size, dirpath is '/dev/shm' +V-3: Rank 0 Line 1527 After show_file_system_size, dirpath is '/dev/shm' +V-3: Rank 0 Line 2097 main (after display_freespace): testdirpath is '/dev/shm/mdest' +V-3: Rank 0 Line 1647 main (create hierarchical directory loop-!collective_creates): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' +V-3: Rank 0 Line 1694 i 1 nstride 0 +V-3: Rank 0 Line 1704 V-3: main: Copied unique_mk_dir, 'mdtest_tree.0.0', to topdir +V-3: Rank 0 Line 1716 will file_test on mdtest_tree.0.0 +V-3: Rank 0 Line 990 Entering file_test on mdtest_tree.0.0 +V-3: Rank 0 Line 1012 file_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' +V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' +V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' +V-3: Rank 0 Line 483 create_remove_items (for loop): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/' +V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1//file.mdtest.0.1' +V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 Line 373 create_remove_items_helper: close... +V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/' +V-3: Rank 0 Line 1079 file_test: stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' +V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/file.mdtest.0.1 +V-3: Rank 0 Line 1134 file_test: rm directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' +V-3: Rank 0 Line 1141 gonna create /dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0 +V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' +V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' +V-3: Rank 0 Line 483 create_remove_items (for loop): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/' +V-3: Rank 0 Line 310 create_remove_items_helper (non-dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1//file.mdtest.0.1' +V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/' +V-3: Rank 0 Line 1158 file_test: rm unique directories path is '/dev/shm/mdest/test-dir.0-0/' +V-3: Rank 0 Line 1754 main (remove hierarchical directory loop-!collective): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' diff --git a/testing/mdtest-patterns/basic/3.txt b/testing/mdtest-patterns/basic/3.txt new file mode 100644 index 00000000..eafadc1e --- /dev/null +++ b/testing/mdtest-patterns/basic/3.txt @@ -0,0 +1,34 @@ +V-3: Rank 0 Line 2082 main (before display_freespace): testdirpath is '/dev/shm/mdest' +V-3: Rank 0 Line 1506 Entering display_freespace on /dev/shm/mdest... +V-3: Rank 0 Line 1525 Before show_file_system_size, dirpath is '/dev/shm' +V-3: Rank 0 Line 1527 After show_file_system_size, dirpath is '/dev/shm' +V-3: Rank 0 Line 2097 main (after display_freespace): testdirpath is '/dev/shm/mdest' +V-3: Rank 0 Line 1647 main (create hierarchical directory loop-!collective_creates): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' +V-3: Rank 0 Line 1694 i 1 nstride 0 +V-3: Rank 0 Line 1704 V-3: main: Copied unique_mk_dir, 'mdtest_tree.0.0', to topdir +V-3: Rank 0 Line 801 directory_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' +V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/dir.mdtest.0.0' +V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' +V-3: Rank 0 Line 483 create_remove_items (for loop): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1//dir.mdtest.0.1' +V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/' +V-3: Rank 0 Line 833 stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/dir.mdtest.0.0 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/dir.mdtest.0.1 +V-3: Rank 0 Line 1716 will file_test on mdtest_tree.0.0 +V-3: Rank 0 Line 990 Entering file_test on mdtest_tree.0.0 +V-3: Rank 0 Line 1012 file_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' +V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' +V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/file.mdtest.0.0' +V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 Line 373 create_remove_items_helper: close... +V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' +V-3: Rank 0 Line 483 create_remove_items (for loop): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/' +V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1//file.mdtest.0.1' +V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 Line 373 create_remove_items_helper: close... +V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/' +V-3: Rank 0 Line 1079 file_test: stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' +V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/file.mdtest.0.0 +V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/file.mdtest.0.1 diff --git a/testing/test-lib.sh b/testing/test-lib.sh index 965c92a5..444873d2 100644 --- a/testing/test-lib.sh +++ b/testing/test-lib.sh @@ -12,6 +12,7 @@ IOR_OUT=${IOR_OUT:-./test_logs} IOR_TMP=${IOR_TMP:-/dev/shm} IOR_EXTRA=${IOR_EXTRA:-} # Add global options like verbosity MDTEST_EXTRA=${MDTEST_EXTRA:-} +MDTEST_TEST_PATTERNS=${MDTEST_TEST_PATTERNS:-../testing/mdtest-patterns/$TYPE} ################################################################################ mkdir -p ${IOR_OUT} @@ -54,12 +55,27 @@ function IOR(){ function MDTEST(){ RANKS=$1 shift - WHAT="${IOR_MPIRUN} $RANKS ${IOR_BIN_DIR}/mdtest ${@} ${MDTEST_EXTRA} -d ${IOR_TMP}/mdest" + rm -rf ${IOR_TMP}/mdest + WHAT="${IOR_MPIRUN} $RANKS ${IOR_BIN_DIR}/mdtest ${@} ${MDTEST_EXTRA} -d ${IOR_TMP}/mdest -V=4" $WHAT 1>"${IOR_OUT}/test_out.$I" 2>&1 if [[ $? != 0 ]]; then echo -n "ERR" ERRORS=$(($ERRORS + 1)) else + # compare basic pattern + if [[ -r ${MDTEST_TEST_PATTERNS}/$I.txt ]] ; then + grep "V-3" "${IOR_OUT}/test_out.$I" > "${IOR_OUT}/tmp" + cmp -s "${IOR_OUT}/tmp" ${MDTEST_TEST_PATTERNS}/$I.txt + if [[ $? != 0 ]]; then + mv "${IOR_OUT}/tmp" ${IOR_OUT}/tmp.$I + echo -n "Pattern differs! check: diff -u ${MDTEST_TEST_PATTERNS}/$I.txt ${IOR_OUT}/tmp.$I " + fi + else + if [[ ! -e ${MDTEST_TEST_PATTERNS} ]] ; then + mkdir -p ${MDTEST_TEST_PATTERNS} + fi + grep "V-3" "${IOR_OUT}/test_out.$I" > ${MDTEST_TEST_PATTERNS}/$I.txt + fi echo -n "OK " fi echo " $WHAT" diff --git a/testing/test_comments.ior b/testing/test_comments.ior new file mode 100644 index 00000000..eaf7997e --- /dev/null +++ b/testing/test_comments.ior @@ -0,0 +1,21 @@ +# test to ensure that leading whitespace is ignored +IOR START +api=posix +writeFile =1 + randomOffset=1 +reorderTasks=1 + filePerProc=1 + keepFile=1 +fsync=1 + repetitions=1 +multiFile=1 + # tab-prefixed comment +transferSize=100k +blockSize=100k + # space-prefixed comment +run +--dummy.delay-create=1000 +useo_direct=0 +#--posix.odirect=0 +api=dummy + ior stop diff --git a/travis-test.sh b/travis-test.sh index dfc12357..7bd84a0d 100755 --- a/travis-test.sh +++ b/travis-test.sh @@ -5,7 +5,7 @@ # # These options will be passed directly to the autoconf configure script -CONFIGURE_OPTS="${CONFIGURE_OPTS:-""}" +CONFIGURE_OPTS="${CONFIGURE_OPTS:-"CFLAGS=-std=c99 --disable-silent-rules"}" BASE_DIR="$(cd "${0%/*}" && pwd)" if [ -z "$BASE_DIR" -o ! -d "$BASE_DIR" ]; then