diff --git a/src/XrdCeph/XrdCephBuffers/CephIOAdapterRaw.cc b/src/XrdCeph/XrdCephBuffers/CephIOAdapterRaw.cc index 28815b77..cb9b2d7f 100644 --- a/src/XrdCeph/XrdCephBuffers/CephIOAdapterRaw.cc +++ b/src/XrdCeph/XrdCephBuffers/CephIOAdapterRaw.cc @@ -11,8 +11,10 @@ using namespace XrdCephBuffer; using myclock = std::chrono::steady_clock; //using myseconds = std::chrono::duration(end-start); diff --git a/src/XrdCeph/XrdCephBuffers/CephIOAdapterRaw.hh b/src/XrdCeph/XrdCephBuffers/CephIOAdapterRaw.hh index 3c7011ef..f893bb02 100644 --- a/src/XrdCeph/XrdCephBuffers/CephIOAdapterRaw.hh +++ b/src/XrdCeph/XrdCephBuffers/CephIOAdapterRaw.hh @@ -27,7 +27,8 @@ namespace XrdCephBuffer { */ class CephIOAdapterRaw: public virtual ICephIOAdapter { public: - CephIOAdapterRaw(IXrdCephBufferData * bufferdata, int fd); + CephIOAdapterRaw(IXrdCephBufferData * bufferdata, int fd, + bool useStriperlessReads); virtual ~CephIOAdapterRaw(); /** @@ -57,6 +58,7 @@ class CephIOAdapterRaw: public virtual ICephIOAdapter { private: IXrdCephBufferData * m_bufferdata; //!< no ownership of pointer (consider shared ptrs, etc) int m_fd; + bool m_useStriperlessReads {true}; //!< use the striperless read code // timer and counter info std::atomic< long> m_stats_read_timer{0}, m_stats_write_timer{0}; diff --git a/src/XrdCeph/XrdCephBuffers/XrdCephBufferAlgSimple.cc b/src/XrdCeph/XrdCephBuffers/XrdCephBufferAlgSimple.cc index 1485ff8f..894034e6 100644 --- a/src/XrdCeph/XrdCephBuffers/XrdCephBufferAlgSimple.cc +++ b/src/XrdCeph/XrdCephBuffers/XrdCephBufferAlgSimple.cc @@ -18,8 +18,10 @@ using namespace XrdCephBuffer; XrdCephBufferAlgSimple::XrdCephBufferAlgSimple(std::unique_ptr buffer, - std::unique_ptr cephio, int fd ): -m_bufferdata(std::move(buffer)), m_cephio(std::move(cephio)), m_fd(fd){ + std::unique_ptr cephio, int fd, + bool useStriperlessReads): +m_bufferdata(std::move(buffer)), m_cephio(std::move(cephio)), m_fd(fd), +m_useStriperlessReads(useStriperlessReads) { } @@ -111,7 +113,8 @@ ssize_t XrdCephBufferAlgSimple::read(volatile void *buf, off_t offset, size_t m_bufferdata->invalidate(); m_bufferLength =0; // ensure cached data is set to zero length // #FIXME JW: const_cast is probably a bit poor. - ssize_t rc = ceph_posix_pread(m_fd, const_cast(buf), blen, offset); + + ssize_t rc = ceph_posix_maybestriper_pread (m_fd, const_cast(buf), blen, offset, m_useStriperlessReads); if (rc > 0) { m_stats_bytes_fromceph += rc; m_stats_bytes_toclient += rc; diff --git a/src/XrdCeph/XrdCephBuffers/XrdCephBufferAlgSimple.hh b/src/XrdCeph/XrdCephBuffers/XrdCephBufferAlgSimple.hh index fdd0a222..e96bd401 100644 --- a/src/XrdCeph/XrdCephBuffers/XrdCephBufferAlgSimple.hh +++ b/src/XrdCeph/XrdCephBuffers/XrdCephBufferAlgSimple.hh @@ -26,7 +26,8 @@ namespace XrdCephBuffer { class XrdCephBufferAlgSimple : public virtual IXrdCephBufferAlg { public: - XrdCephBufferAlgSimple(std::unique_ptr buffer, std::unique_ptr cephio, int fd ); + XrdCephBufferAlgSimple(std::unique_ptr buffer, std::unique_ptr cephio, int fd, + bool useStriperlessReads = true ); virtual ~XrdCephBufferAlgSimple(); virtual ssize_t read_aio (XrdSfsAio *aoip) override; @@ -49,6 +50,7 @@ class XrdCephBufferAlgSimple : public virtual IXrdCephBufferAlg { std::unique_ptr m_bufferdata; //! this algorithm takes ownership of the buffer, and will delete it on destruction std::unique_ptr m_cephio ; // no ownership is taken here int m_fd = -1; + bool m_useStriperlessReads {true}; off_t m_bufferStartingOffset = 0; size_t m_bufferLength = 0; diff --git a/src/XrdCeph/XrdCephOssBufferedFile.cc b/src/XrdCeph/XrdCephOssBufferedFile.cc index 1b230317..83476cb3 100644 --- a/src/XrdCeph/XrdCephOssBufferedFile.cc +++ b/src/XrdCeph/XrdCephOssBufferedFile.cc @@ -327,7 +327,8 @@ std::unique_ptr XrdCephOssBufferedFile::create if (m_bufferIOmode == "aio") { cephio = std::unique_ptr(new CephIOAdapterAIORaw(cephbuffer.get(),m_fd)); } else if (m_bufferIOmode == "io") { - cephio = std::unique_ptr(new CephIOAdapterRaw(cephbuffer.get(),m_fd)); + cephio = std::unique_ptr(new CephIOAdapterRaw(cephbuffer.get(),m_fd, + !m_cephoss->m_useDefaultPreadAlg)); } else { BUFLOG("XrdCephOssBufferedFile: buffer mode needs to be one of aio|io " ); m_xrdOssDF->Close(); diff --git a/src/XrdCeph/XrdCephPosix.cc b/src/XrdCeph/XrdCephPosix.cc index 01bd213a..5640b9f7 100644 --- a/src/XrdCeph/XrdCephPosix.cc +++ b/src/XrdCeph/XrdCephPosix.cc @@ -1089,6 +1089,26 @@ ssize_t ceph_posix_pread(int fd, void *buf, size_t count, off64_t offset) { } } +ssize_t ceph_posix_maybestriper_pread(int fd, void *buf, size_t count, off64_t offset, bool allowStriper) { + ssize_t rc {0}; + if (!allowStriper) { + rc = ceph_posix_pread(fd,buf,count,offset); + return rc; + } + rc = ceph_posix_nonstriper_pread(fd, buf, count,offset); + if (-ENOENT == rc || -ENOTSUP == rc) { + //This might be a sparse file or nbstripes > 1, so let's try striper read + rc = ceph_posix_pread(fd, buf, count,offset); + if (rc >= 0) { + char err_str[100]; //99 symbols should be enough for the short message + snprintf(err_str, 100, "WARNING! The file (fd %d) seem to be sparse, this is not expected", fd); + logwrapper(err_str); + } + } + return rc; +} + + static void ceph_aio_read_complete(rados_completion_t c, void *arg) { AioArgs *awa = reinterpret_cast(arg); size_t rc = rados_aio_get_return_value(c); diff --git a/src/XrdCeph/XrdCephPosix.hh b/src/XrdCeph/XrdCephPosix.hh index 9b9c2e0f..25a7ea01 100644 --- a/src/XrdCeph/XrdCephPosix.hh +++ b/src/XrdCeph/XrdCephPosix.hh @@ -69,6 +69,8 @@ ssize_t ceph_striper_readv(int fd, XrdOucIOVec *readV, int n); ssize_t ceph_posix_read(int fd, void *buf, size_t count); ssize_t ceph_posix_nonstriper_pread(int fd, void *buf, size_t count, off64_t offset); ssize_t ceph_posix_pread(int fd, void *buf, size_t count, off64_t offset); +ssize_t ceph_posix_maybestriper_pread(int fd, void *buf, size_t count, off64_t offset, bool allowStriper=true); + ssize_t ceph_aio_read(int fd, XrdSfsAio *aiop, AioCB *cb); int ceph_posix_fstat(int fd, struct stat *buf); int ceph_posix_stat(XrdOucEnv* env, const char *pathname, struct stat *buf);