From bf16b54f870ddebb59aa3025d063f53859708623 Mon Sep 17 00:00:00 2001 From: Alex Akselrod Date: Mon, 9 Dec 2024 00:15:38 -0800 Subject: [PATCH 01/12] go.mod: update btcwallet to latest to eliminate waddrmgr deadlock --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 36766d0d81..f7762e15ad 100644 --- a/go.mod +++ b/go.mod @@ -11,7 +11,7 @@ require ( github.com/btcsuite/btcd/chaincfg/chainhash v1.1.0 github.com/btcsuite/btclog v0.0.0-20241003133417-09c4e92e319c github.com/btcsuite/btclog/v2 v2.0.0 - github.com/btcsuite/btcwallet v0.16.10-0.20241113134707-b4ff60753aaa + github.com/btcsuite/btcwallet v0.16.10-0.20241127094224-93c858b2ad63 github.com/btcsuite/btcwallet/wallet/txauthor v1.3.5 github.com/btcsuite/btcwallet/wallet/txrules v1.2.2 github.com/btcsuite/btcwallet/walletdb v1.4.4 diff --git a/go.sum b/go.sum index 69c57c20e8..6a3611cf67 100644 --- a/go.sum +++ b/go.sum @@ -95,8 +95,8 @@ github.com/btcsuite/btclog v0.0.0-20241003133417-09c4e92e319c/go.mod h1:w7xnGOhw github.com/btcsuite/btclog/v2 v2.0.0 h1:ZfOBItEeLWfU0voi88K72j8vtxP4/dHhxRFf2bxZkVo= github.com/btcsuite/btclog/v2 v2.0.0/go.mod h1:XItGUfVOxotJL8kkuk2Hj3EVow5KCugXl3wWfQ6K0AE= github.com/btcsuite/btcutil v0.0.0-20190425235716-9e5f4b9a998d/go.mod h1:+5NJ2+qvTyV9exUAL/rxXi3DcLg2Ts+ymUAY5y4NvMg= -github.com/btcsuite/btcwallet v0.16.10-0.20241113134707-b4ff60753aaa h1:x7vYpwkPL5zeJEWPPaRunybH9ERRMGWeNf7x/0aU/38= -github.com/btcsuite/btcwallet v0.16.10-0.20241113134707-b4ff60753aaa/go.mod h1:1HJXYbjJzgumlnxOC2+ViR1U+gnHWoOn7WeK5OfY1eU= +github.com/btcsuite/btcwallet v0.16.10-0.20241127094224-93c858b2ad63 h1:YN+PekOLlLoGxE3P5RJaGgodZD5DDJSU8eXQZVwwCxM= +github.com/btcsuite/btcwallet v0.16.10-0.20241127094224-93c858b2ad63/go.mod h1:1HJXYbjJzgumlnxOC2+ViR1U+gnHWoOn7WeK5OfY1eU= github.com/btcsuite/btcwallet/wallet/txauthor v1.3.5 h1:Rr0njWI3r341nhSPesKQ2JF+ugDSzdPoeckS75SeDZk= github.com/btcsuite/btcwallet/wallet/txauthor v1.3.5/go.mod h1:+tXJ3Ym0nlQc/iHSwW1qzjmPs3ev+UVWMbGgfV1OZqU= github.com/btcsuite/btcwallet/wallet/txrules v1.2.2 h1:YEO+Lx1ZJJAtdRrjuhXjWrYsmAk26wLTlNzxt2q0lhk= From 3cebd370de65945f5f248bf9ac27a553c895a797 Mon Sep 17 00:00:00 2001 From: Alex Akselrod Date: Wed, 30 Oct 2024 15:01:59 -0700 Subject: [PATCH 02/12] go.mod: use local kvdb to reapply removal of global postgres lock --- go.mod | 3 +++ go.sum | 2 -- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/go.mod b/go.mod index f7762e15ad..082c56a626 100644 --- a/go.mod +++ b/go.mod @@ -203,6 +203,9 @@ replace github.com/ulikunitz/xz => github.com/ulikunitz/xz v0.5.11 // https://deps.dev/advisory/OSV/GO-2021-0053?from=%2Fgo%2Fgithub.com%252Fgogo%252Fprotobuf%2Fv1.3.1 replace github.com/gogo/protobuf => github.com/gogo/protobuf v1.3.2 +// Use local kvdb package until new version is tagged. +replace github.com/lightningnetwork/lnd/kvdb => ./kvdb + // We want to format raw bytes as hex instead of base64. The forked version // allows us to specify that as an option. replace google.golang.org/protobuf => github.com/lightninglabs/protobuf-go-hex-display v1.30.0-hex-display diff --git a/go.sum b/go.sum index 6a3611cf67..0ebc5fa2f0 100644 --- a/go.sum +++ b/go.sum @@ -460,8 +460,6 @@ github.com/lightningnetwork/lnd/fn/v2 v2.0.2 h1:M7o2lYrh/zCp+lntPB3WP/rWTu5U+4ss github.com/lightningnetwork/lnd/fn/v2 v2.0.2/go.mod h1:TOzwrhjB/Azw1V7aa8t21ufcQmdsQOQMDtxVOQWNl8s= github.com/lightningnetwork/lnd/healthcheck v1.2.6 h1:1sWhqr93GdkWy4+6U7JxBfcyZIE78MhIHTJZfPx7qqI= github.com/lightningnetwork/lnd/healthcheck v1.2.6/go.mod h1:Mu02um4CWY/zdTOvFje7WJgJcHyX2zq/FG3MhOAiGaQ= -github.com/lightningnetwork/lnd/kvdb v1.4.11 h1:fk1HMVFrsVK3xqU7q+JWHRgBltw/a2qIg1E3zazMb/8= -github.com/lightningnetwork/lnd/kvdb v1.4.11/go.mod h1:a5cMDKMjbJA8dD06ZqqnYkmSh5DhEbbG8C1YHM3NN+k= github.com/lightningnetwork/lnd/queue v1.1.1 h1:99ovBlpM9B0FRCGYJo6RSFDlt8/vOkQQZznVb18iNMI= github.com/lightningnetwork/lnd/queue v1.1.1/go.mod h1:7A6nC1Qrm32FHuhx/mi1cieAiBZo5O6l8IBIoQxvkz4= github.com/lightningnetwork/lnd/sqldb v1.0.5 h1:ax5vBPf44tN/uD6C5+hBPBjOJ7cRMrUL+sVOdzmLVt4= From f3bb7095b22875a74c05fcd0fadaa3eb853777c1 Mon Sep 17 00:00:00 2001 From: Alex Akselrod Date: Wed, 30 Oct 2024 14:40:23 -0700 Subject: [PATCH 03/12] Reapply "kvdb/postgres: remove global application level lock" This reverts commit 67419a7c0c6410761ee369c1d24aba8641b8e400. --- kvdb/postgres/db.go | 1 - kvdb/sqlbase/db.go | 8 ------- kvdb/sqlbase/readwrite_tx.go | 44 ------------------------------------ 3 files changed, 53 deletions(-) diff --git a/kvdb/postgres/db.go b/kvdb/postgres/db.go index 90ca8324a8..425ba16225 100644 --- a/kvdb/postgres/db.go +++ b/kvdb/postgres/db.go @@ -28,7 +28,6 @@ func newPostgresBackend(ctx context.Context, config *Config, prefix string) ( Schema: "public", TableNamePrefix: prefix, SQLiteCmdReplacements: sqliteCmdReplacements, - WithTxLevelLock: true, } return sqlbase.NewSqlBackend(ctx, cfg) diff --git a/kvdb/sqlbase/db.go b/kvdb/sqlbase/db.go index 221a77bfd6..6ef085712a 100644 --- a/kvdb/sqlbase/db.go +++ b/kvdb/sqlbase/db.go @@ -55,10 +55,6 @@ type Config struct { // commands. Note that the sqlite keywords to be replaced are // case-sensitive. SQLiteCmdReplacements SQLiteCmdReplacements - - // WithTxLevelLock when set will ensure that there is a transaction - // level lock. - WithTxLevelLock bool } // db holds a reference to the sql db connection. @@ -79,10 +75,6 @@ type db struct { // db is the underlying database connection instance. db *sql.DB - // lock is the global write lock that ensures single writer. This is - // only used if cfg.WithTxLevelLock is set. - lock sync.RWMutex - // table is the name of the table that contains the data for all // top-level buckets that have keys that cannot be mapped to a distinct // sql table. diff --git a/kvdb/sqlbase/readwrite_tx.go b/kvdb/sqlbase/readwrite_tx.go index ec761931ad..18a6a682c9 100644 --- a/kvdb/sqlbase/readwrite_tx.go +++ b/kvdb/sqlbase/readwrite_tx.go @@ -5,7 +5,6 @@ package sqlbase import ( "context" "database/sql" - "sync" "github.com/btcsuite/btcwallet/walletdb" ) @@ -20,28 +19,11 @@ type readWriteTx struct { // active is true if the transaction hasn't been committed yet. active bool - - // locker is a pointer to the global db lock. - locker sync.Locker } // newReadWriteTx creates an rw transaction using a connection from the // specified pool. func newReadWriteTx(db *db, readOnly bool) (*readWriteTx, error) { - locker := newNoopLocker() - if db.cfg.WithTxLevelLock { - // Obtain the global lock instance. An alternative here is to - // obtain a database lock from Postgres. Unfortunately there is - // no database-level lock in Postgres, meaning that each table - // would need to be locked individually. Perhaps an advisory - // lock could perform this function too. - locker = &db.lock - if readOnly { - locker = db.lock.RLocker() - } - } - locker.Lock() - // Start the transaction. Don't use the timeout context because it would // be applied to the transaction as a whole. If possible, mark the // transaction as read-only to make sure that potential programming @@ -54,7 +36,6 @@ func newReadWriteTx(db *db, readOnly bool) (*readWriteTx, error) { }, ) if err != nil { - locker.Unlock() return nil, err } @@ -62,7 +43,6 @@ func newReadWriteTx(db *db, readOnly bool) (*readWriteTx, error) { db: db, tx: tx, active: true, - locker: locker, }, nil } @@ -94,7 +74,6 @@ func (tx *readWriteTx) Rollback() error { // Unlock the transaction regardless of the error result. tx.active = false - tx.locker.Unlock() return err } @@ -162,7 +141,6 @@ func (tx *readWriteTx) Commit() error { // Unlock the transaction regardless of the error result. tx.active = false - tx.locker.Unlock() return err } @@ -204,25 +182,3 @@ func (tx *readWriteTx) Exec(query string, args ...interface{}) (sql.Result, return tx.tx.ExecContext(ctx, query, args...) } - -// noopLocker is an implementation of a no-op sync.Locker. -type noopLocker struct{} - -// newNoopLocker creates a new noopLocker. -func newNoopLocker() sync.Locker { - return &noopLocker{} -} - -// Lock is a noop. -// -// NOTE: this is part of the sync.Locker interface. -func (n *noopLocker) Lock() { -} - -// Unlock is a noop. -// -// NOTE: this is part of the sync.Locker interface. -func (n *noopLocker) Unlock() { -} - -var _ sync.Locker = (*noopLocker)(nil) From a0e5a14574e1bf763f55183711604e603f844747 Mon Sep 17 00:00:00 2001 From: Alex Akselrod Date: Thu, 12 Dec 2024 10:38:02 -0800 Subject: [PATCH 04/12] log: add sub-logger for kvdb/sqlbase --- log.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/log.go b/log.go index a3efd03335..ae2d2d811f 100644 --- a/log.go +++ b/log.go @@ -25,6 +25,7 @@ import ( "github.com/lightningnetwork/lnd/healthcheck" "github.com/lightningnetwork/lnd/htlcswitch" "github.com/lightningnetwork/lnd/invoices" + "github.com/lightningnetwork/lnd/kvdb/sqlbase" "github.com/lightningnetwork/lnd/lncfg" "github.com/lightningnetwork/lnd/lnrpc/autopilotrpc" "github.com/lightningnetwork/lnd/lnrpc/chainrpc" @@ -150,6 +151,7 @@ func SetupLoggers(root *build.SubLoggerManager, interceptor signal.Interceptor) AddSubLogger(root, "DISC", interceptor, discovery.UseLogger) AddSubLogger(root, "NTFN", interceptor, chainntnfs.UseLogger) AddSubLogger(root, "CHDB", interceptor, channeldb.UseLogger) + AddSubLogger(root, "SQLB", interceptor, sqlbase.UseLogger) AddSubLogger(root, "HSWC", interceptor, htlcswitch.UseLogger) AddSubLogger(root, "CNCT", interceptor, contractcourt.UseLogger) AddSubLogger(root, "UTXN", interceptor, contractcourt.UseNurseryLogger) From 211dd2108227d2512b068da9498cc96915d15182 Mon Sep 17 00:00:00 2001 From: Alex Akselrod Date: Mon, 18 Nov 2024 15:28:20 -0800 Subject: [PATCH 05/12] itest: fix flake in multi-hop payments To make this itest work reliably with multiple parallel SQL transactions, we need to count both the settle and final HTLC events. Otherwise, sometimes the final events from earlier forwards are counted before the forward events from later forwards, causing a miscount of the settle events. If we expect both the settle and final event for each forward, we don't miscount. --- itest/lnd_multi-hop-payments_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/itest/lnd_multi-hop-payments_test.go b/itest/lnd_multi-hop-payments_test.go index ba7797d7fc..add520e10d 100644 --- a/itest/lnd_multi-hop-payments_test.go +++ b/itest/lnd_multi-hop-payments_test.go @@ -221,11 +221,11 @@ func testMultiHopPayments(ht *lntest.HarnessTest) { // Dave and Alice should both have forwards and settles for // their role as forwarding nodes. ht.AssertHtlcEvents( - daveEvents, numPayments, 0, numPayments, 0, + daveEvents, numPayments, 0, numPayments*2, 0, routerrpc.HtlcEvent_FORWARD, ) ht.AssertHtlcEvents( - aliceEvents, numPayments, 0, numPayments, 0, + aliceEvents, numPayments, 0, numPayments*2, 0, routerrpc.HtlcEvent_FORWARD, ) From c29fb81d1b7e46af0a5e8c2b37b8221cf1a91f94 Mon Sep 17 00:00:00 2001 From: Alex Akselrod Date: Fri, 1 Nov 2024 19:50:05 -0700 Subject: [PATCH 06/12] batch: handle serialization errors correctly --- batch/batch.go | 17 +++++++++-- batch/batch_test.go | 74 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+), 2 deletions(-) create mode 100644 batch/batch_test.go diff --git a/batch/batch.go b/batch/batch.go index fcc691582a..9f4842c655 100644 --- a/batch/batch.go +++ b/batch/batch.go @@ -5,6 +5,7 @@ import ( "sync" "github.com/lightningnetwork/lnd/kvdb" + "github.com/lightningnetwork/lnd/sqldb" ) // errSolo is a sentinel error indicating that the requester should re-run the @@ -55,8 +56,20 @@ func (b *batch) run() { for i, req := range b.reqs { err := req.Update(tx) if err != nil { - failIdx = i - return err + // If we get a serialization error, we + // want the underlying SQL retry + // mechanism to retry the entire batch. + // Otherwise, we can succeed in an + // sqldb retry and still re-execute the + // failing request individually. + dbErr := sqldb.MapSQLError(err) + if !sqldb.IsSerializationError(dbErr) { + failIdx = i + + return err + } + + return dbErr } } return nil diff --git a/batch/batch_test.go b/batch/batch_test.go new file mode 100644 index 0000000000..fef2c55979 --- /dev/null +++ b/batch/batch_test.go @@ -0,0 +1,74 @@ +package batch + +import ( + "errors" + "path/filepath" + "sync" + "testing" + "time" + + "github.com/btcsuite/btcwallet/walletdb" + "github.com/lightningnetwork/lnd/kvdb" + "github.com/stretchr/testify/require" +) + +func TestRetry(t *testing.T) { + dbDir := t.TempDir() + + dbName := filepath.Join(dbDir, "weks.db") + db, err := walletdb.Create( + "bdb", dbName, true, kvdb.DefaultDBTimeout, + ) + if err != nil { + t.Fatalf("unable to create walletdb: %v", err) + } + t.Cleanup(func() { + db.Close() + }) + + var ( + mu sync.Mutex + called int + ) + sched := NewTimeScheduler(db, &mu, time.Second) + + // First, we construct a request that should retry individually and + // execute it non-lazily. It should still return the error the second + // time. + req := &Request{ + Update: func(tx kvdb.RwTx) error { + called++ + + return errors.New("test") + }, + } + err = sched.Execute(req) + + // Check and reset the called counter. + mu.Lock() + require.Equal(t, 2, called) + called = 0 + mu.Unlock() + + require.ErrorContains(t, err, "test") + + // Now, we construct a request that should NOT retry because it returns + // a serialization error, which should cause the underlying postgres + // transaction to retry. Since we aren't using postgres, this will + // cause the transaction to not be retried at all. + req = &Request{ + Update: func(tx kvdb.RwTx) error { + called++ + + return errors.New("could not serialize access") + }, + } + err = sched.Execute(req) + + // Check the called counter. + mu.Lock() + require.Equal(t, 1, called) + mu.Unlock() + + require.ErrorContains(t, err, "could not serialize access") +} From c01dcc2be78297052a9792d1f00b7f6f609c2f16 Mon Sep 17 00:00:00 2001 From: Alex Akselrod Date: Mon, 4 Nov 2024 19:15:35 -0800 Subject: [PATCH 07/12] graph/db: handle previously-unhandled errors --- graph/db/graph.go | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/graph/db/graph.go b/graph/db/graph.go index a6b23e64b3..7608964364 100644 --- a/graph/db/graph.go +++ b/graph/db/graph.go @@ -1127,7 +1127,7 @@ func (c *ChannelGraph) addChannelEdge(tx kvdb.RwTx, err := addLightningNode(tx, &node1Shell) if err != nil { return fmt.Errorf("unable to create shell node "+ - "for: %x", edge.NodeKey1Bytes) + "for: %x: %w", edge.NodeKey1Bytes, err) } case node1Err != nil: return err @@ -1143,7 +1143,7 @@ func (c *ChannelGraph) addChannelEdge(tx kvdb.RwTx, err := addLightningNode(tx, &node2Shell) if err != nil { return fmt.Errorf("unable to create shell node "+ - "for: %x", edge.NodeKey2Bytes) + "for: %x: %w", edge.NodeKey2Bytes, err) } case node2Err != nil: return err @@ -2692,8 +2692,14 @@ func (c *ChannelGraph) delChannelEdgeUnsafe(edges, edgeIndex, chanIndex, // As part of deleting the edge we also remove all disabled entries // from the edgePolicyDisabledIndex bucket. We do that for both // directions. - updateEdgePolicyDisabledIndex(edges, cid, false, false) - updateEdgePolicyDisabledIndex(edges, cid, true, false) + err = updateEdgePolicyDisabledIndex(edges, cid, false, false) + if err != nil { + return err + } + err = updateEdgePolicyDisabledIndex(edges, cid, true, false) + if err != nil { + return err + } // With the edge data deleted, we can purge the information from the two // edge indexes. @@ -4410,11 +4416,14 @@ func putChanEdgePolicy(edges kvdb.RwBucket, edge *models.ChannelEdgePolicy, return err } - updateEdgePolicyDisabledIndex( + err = updateEdgePolicyDisabledIndex( edges, edge.ChannelID, edge.ChannelFlags&lnwire.ChanUpdateDirection > 0, edge.IsDisabled(), ) + if err != nil { + return err + } return edges.Put(edgeKey[:], b.Bytes()[:]) } From 780c271b8091ec9f1866e2011433e3892fbdb36f Mon Sep 17 00:00:00 2001 From: Alex Akselrod Date: Wed, 6 Nov 2024 11:29:47 -0800 Subject: [PATCH 08/12] sqldb: improve serialization error handling --- go.mod | 3 +++ go.sum | 2 -- kvdb/go.mod | 6 +++++- kvdb/go.sum | 14 ++++++-------- sqldb/interfaces.go | 2 +- sqldb/sqlerrors.go | 33 +++++++++++++++++++++++++++++---- 6 files changed, 44 insertions(+), 16 deletions(-) diff --git a/go.mod b/go.mod index 082c56a626..ebef0f40eb 100644 --- a/go.mod +++ b/go.mod @@ -206,6 +206,9 @@ replace github.com/gogo/protobuf => github.com/gogo/protobuf v1.3.2 // Use local kvdb package until new version is tagged. replace github.com/lightningnetwork/lnd/kvdb => ./kvdb +// Use local sqldb package until new version is tagged. +replace github.com/lightningnetwork/lnd/sqldb => ./sqldb + // We want to format raw bytes as hex instead of base64. The forked version // allows us to specify that as an option. replace google.golang.org/protobuf => github.com/lightninglabs/protobuf-go-hex-display v1.30.0-hex-display diff --git a/go.sum b/go.sum index 0ebc5fa2f0..be152b0618 100644 --- a/go.sum +++ b/go.sum @@ -462,8 +462,6 @@ github.com/lightningnetwork/lnd/healthcheck v1.2.6 h1:1sWhqr93GdkWy4+6U7JxBfcyZI github.com/lightningnetwork/lnd/healthcheck v1.2.6/go.mod h1:Mu02um4CWY/zdTOvFje7WJgJcHyX2zq/FG3MhOAiGaQ= github.com/lightningnetwork/lnd/queue v1.1.1 h1:99ovBlpM9B0FRCGYJo6RSFDlt8/vOkQQZznVb18iNMI= github.com/lightningnetwork/lnd/queue v1.1.1/go.mod h1:7A6nC1Qrm32FHuhx/mi1cieAiBZo5O6l8IBIoQxvkz4= -github.com/lightningnetwork/lnd/sqldb v1.0.5 h1:ax5vBPf44tN/uD6C5+hBPBjOJ7cRMrUL+sVOdzmLVt4= -github.com/lightningnetwork/lnd/sqldb v1.0.5/go.mod h1:OG09zL/PHPaBJefp4HsPz2YLUJ+zIQHbpgCtLnOx8I4= github.com/lightningnetwork/lnd/ticker v1.1.1 h1:J/b6N2hibFtC7JLV77ULQp++QLtCwT6ijJlbdiZFbSM= github.com/lightningnetwork/lnd/ticker v1.1.1/go.mod h1:waPTRAAcwtu7Ji3+3k+u/xH5GHovTsCoSVpho0KDvdA= github.com/lightningnetwork/lnd/tlv v1.3.0 h1:exS/KCPEgpOgviIttfiXAPaUqw2rHQrnUOpP7HPBPiY= diff --git a/kvdb/go.mod b/kvdb/go.mod index 3dc4281642..f1faf2a293 100644 --- a/kvdb/go.mod +++ b/kvdb/go.mod @@ -16,7 +16,7 @@ require ( go.etcd.io/etcd/client/v3 v3.5.7 go.etcd.io/etcd/server/v3 v3.5.7 golang.org/x/net v0.22.0 - modernc.org/sqlite v1.29.8 + modernc.org/sqlite v1.29.10 ) require ( @@ -59,6 +59,7 @@ require ( github.com/jackc/pgproto3/v2 v2.3.3 // indirect github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a // indirect github.com/jackc/pgtype v1.14.0 // indirect + github.com/jackc/pgx/v5 v5.3.1 // indirect github.com/jonboulle/clockwork v0.2.2 // indirect github.com/json-iterator/go v1.1.11 // indirect github.com/lib/pq v1.10.9 // indirect @@ -135,6 +136,9 @@ replace github.com/dgrijalva/jwt-go => github.com/golang-jwt/jwt v3.2.1+incompat // This replace is for https://github.com/advisories/GHSA-25xm-hr59-7c27 replace github.com/ulikunitz/xz => github.com/ulikunitz/xz v0.5.11 +// Use local sqldb package until new version is tagged. +replace github.com/lightningnetwork/lnd/sqldb => ../sqldb + // This replace is for // https://deps.dev/advisory/OSV/GO-2021-0053?from=%2Fgo%2Fgithub.com%252Fgogo%252Fprotobuf%2Fv1.3.1 replace github.com/gogo/protobuf => github.com/gogo/protobuf v1.3.2 diff --git a/kvdb/go.sum b/kvdb/go.sum index 329dd2497c..ff51e77dd9 100644 --- a/kvdb/go.sum +++ b/kvdb/go.sum @@ -257,6 +257,8 @@ github.com/jackc/pgx/v4 v4.0.0-pre1.0.20190824185557-6972a5742186/go.mod h1:X+GQ github.com/jackc/pgx/v4 v4.12.1-0.20210724153913-640aa07df17c/go.mod h1:1QD0+tgSXP7iUjYm9C1NxKhny7lq6ee99u/z+IHFcgs= github.com/jackc/pgx/v4 v4.18.1 h1:YP7G1KABtKpB5IHrO9vYwSrCOhs7p3uqhvhhQBptya0= github.com/jackc/pgx/v4 v4.18.1/go.mod h1:FydWkUyadDmdNH/mHnGob881GawxeEm7TcMCzkb+qQE= +github.com/jackc/pgx/v5 v5.3.1 h1:Fcr8QJ1ZeLi5zsPZqQeUZhNhxfkkKBOgJuYkJHoBOtU= +github.com/jackc/pgx/v5 v5.3.1/go.mod h1:t3JDKnCBlYIc0ewLF0Q7B8MXmoIaBOZj/ic7iHozM/8= github.com/jackc/puddle v0.0.0-20190413234325-e4ced69a3a2b/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk= github.com/jackc/puddle v0.0.0-20190608224051-11cab39313c9/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk= github.com/jackc/puddle v1.1.3/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk= @@ -297,8 +299,6 @@ github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw= github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= github.com/lightningnetwork/lnd/healthcheck v1.2.4 h1:lLPLac+p/TllByxGSlkCwkJlkddqMP5UCoawCj3mgFQ= github.com/lightningnetwork/lnd/healthcheck v1.2.4/go.mod h1:G7Tst2tVvWo7cx6mSBEToQC5L1XOGxzZTPB29g9Rv2I= -github.com/lightningnetwork/lnd/sqldb v1.0.2 h1:PfuYzScYMD9/QonKo/QvgsbXfTnH5DfldIimkfdW4Bk= -github.com/lightningnetwork/lnd/sqldb v1.0.2/go.mod h1:V2Xl6JNWLTKE97WJnwfs0d0TYJdIQTqK8/3aAwkd3qI= github.com/lightningnetwork/lnd/ticker v1.1.0 h1:ShoBiRP3pIxZHaETndfQ5kEe+S4NdAY1hiX7YbZ4QE4= github.com/lightningnetwork/lnd/ticker v1.1.0/go.mod h1:ubqbSVCn6RlE0LazXuBr7/Zi6QT0uQo++OgIRBxQUrk= github.com/lightningnetwork/lnd/tor v1.0.0 h1:wvEc7I+Y7IOtPglVP3cVBbYhiVhc7uTd7cMF9gQRzwA= @@ -310,8 +310,6 @@ github.com/mattn/go-isatty v0.0.7/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hd github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= -github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o4kU= -github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= github.com/matttproud/golang_protobuf_extensions v1.0.1 h1:4hp9jkHxhMHkqkrB3Ix0jegS5sx/RkqARlsWZ6pIwiU= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= github.com/miekg/dns v1.1.43 h1:JKfpVSCB84vrAmHzyrsxB5NAr5kLoMXZArPSw7Qlgyg= @@ -383,8 +381,8 @@ github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94 github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= -github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8= -github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= +github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= +github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= github.com/rs/xid v1.2.1/go.mod h1:+uKXf+4Djp6Md1KODXJxgGQPKngRmWyn10oCKFzNHOQ= github.com/rs/zerolog v1.13.0/go.mod h1:YbFCdg8HfsridGWAh22vktObvhZbQsZXe4/zB0OKkWU= github.com/rs/zerolog v1.15.0/go.mod h1:xYTKnLHcpfU2225ny5qZjxnj9NvkumZYjJHlAThCjNc= @@ -738,8 +736,8 @@ modernc.org/opt v0.1.3 h1:3XOZf2yznlhC+ibLltsDGzABUGVx8J6pnFMS3E4dcq4= modernc.org/opt v0.1.3/go.mod h1:WdSiB5evDcignE70guQKxYUl14mgWtbClRi5wmkkTX0= modernc.org/sortutil v1.2.0 h1:jQiD3PfS2REGJNzNCMMaLSp/wdMNieTbKX920Cqdgqc= modernc.org/sortutil v1.2.0/go.mod h1:TKU2s7kJMf1AE84OoiGppNHJwvB753OYfNl2WRb++Ss= -modernc.org/sqlite v1.29.8 h1:nGKglNx9K5v0As+zF0/Gcl1kMkmaU1XynYyq92PbsC8= -modernc.org/sqlite v1.29.8/go.mod h1:lQPm27iqa4UNZpmr4Aor0MH0HkCLbt1huYDfWylLZFk= +modernc.org/sqlite v1.29.10 h1:3u93dz83myFnMilBGCOLbr+HjklS6+5rJLx4q86RDAg= +modernc.org/sqlite v1.29.10/go.mod h1:ItX2a1OVGgNsFh6Dv60JQvGfJfTPHPVpV6DF59akYOA= modernc.org/strutil v1.2.0 h1:agBi9dp1I+eOnxXeiZawM8F4LawKv4NzGWSaLfyeNZA= modernc.org/strutil v1.2.0/go.mod h1:/mdcBmfOibveCTBxUl5B5l6W+TTH1FXPLHZE6bTosX0= modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y= diff --git a/sqldb/interfaces.go b/sqldb/interfaces.go index f81799c577..3c042aa5a7 100644 --- a/sqldb/interfaces.go +++ b/sqldb/interfaces.go @@ -22,7 +22,7 @@ const ( // DefaultNumTxRetries is the default number of times we'll retry a // transaction if it fails with an error that permits transaction // repetition. - DefaultNumTxRetries = 10 + DefaultNumTxRetries = 20 // DefaultRetryDelay is the default delay between retries. This will be // used to generate a random delay between 0 and this value. diff --git a/sqldb/sqlerrors.go b/sqldb/sqlerrors.go index 6cc1b1cf87..59729910ee 100644 --- a/sqldb/sqlerrors.go +++ b/sqldb/sqlerrors.go @@ -17,6 +17,16 @@ var ( // ErrRetriesExceeded is returned when a transaction is retried more // than the max allowed valued without a success. ErrRetriesExceeded = errors.New("db tx retries exceeded") + + // postgresErrMsgs are strings that signify retriable errors resulting + // from serialization failures. + postgresErrMsgs = []string{ + "could not serialize access", + "current transaction is aborted", + "not enough elements in RWConflictPool", + "deadlock detected", + "commit unexpectedly resulted in rollback", + } ) // MapSQLError attempts to interpret a given error as a database agnostic SQL @@ -41,10 +51,11 @@ func MapSQLError(err error) error { // Sometimes the error won't be properly wrapped, so we'll need to // inspect raw error itself to detect something we can wrap properly. // This handles a postgres variant of the error. - const postgresErrMsg = "could not serialize access" - if strings.Contains(err.Error(), postgresErrMsg) { - return &ErrSerializationError{ - DBError: err, + for _, postgresErrMsg := range postgresErrMsgs { + if strings.Contains(err.Error(), postgresErrMsg) { + return &ErrSerializationError{ + DBError: err, + } } } @@ -105,6 +116,20 @@ func parsePostgresError(pqErr *pgconn.PgError) error { DBError: pqErr, } + // In failed SQL transaction because we didn't catch a previous + // serialization error, so return this one as a serialization error. + case pgerrcode.InFailedSQLTransaction: + return &ErrSerializationError{ + DBError: pqErr, + } + + // Deadlock detedted because of a serialization error, so return this + // one as a serialization error. + case pgerrcode.DeadlockDetected: + return &ErrSerializationError{ + DBError: pqErr, + } + default: return fmt.Errorf("unknown postgres error: %w", pqErr) } From 68e9aea40d86e6d835906803b0abdf826c4ebfee Mon Sep 17 00:00:00 2001 From: Alex Akselrod Date: Wed, 6 Nov 2024 10:21:34 -0800 Subject: [PATCH 09/12] Makefile: tune params for db-instance for postgres itests --- Makefile | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index ee9b1de51f..9be920f5dc 100644 --- a/Makefile +++ b/Makefile @@ -195,9 +195,18 @@ ifeq ($(dbbackend),postgres) docker rm lnd-postgres --force || echo "Starting new postgres container" # Start a fresh postgres instance. Allow a maximum of 500 connections so - # that multiple lnd instances with a maximum number of connections of 50 - # each can run concurrently. - docker run --name lnd-postgres -e POSTGRES_PASSWORD=postgres -p 6432:5432 -d postgres:13-alpine -N 500 + # that multiple lnd instances with a maximum number of connections of 20 + # each can run concurrently. Note that many of the settings here are + # specifically for integration testing and are not fit for running + # production nodes. The increase in max connections ensures that there + # are enough entries allocated for the RWConflictPool to allow multiple + # conflicting transactions to track serialization conflicts. The + # increase in predicate locks and locks per transaction is to allow the + # queries to lock individual rows instead of entire tables, helping + # reduce serialization conflicts. Disabling sequential scan for small + # tables also helps prevent serialization conflicts by ensuring lookups + # lock only relevant rows in the index rather than the entire table. + docker run --name lnd-postgres -e POSTGRES_PASSWORD=postgres -p 6432:5432 -d postgres:13-alpine -N 1500 -c max_pred_locks_per_transaction=1024 -c max_locks_per_transaction=128 -c enable_seqscan=off docker logs -f lnd-postgres & # Wait for the instance to be started. From c5d0976375089209383161515b860bb352b882b9 Mon Sep 17 00:00:00 2001 From: Alex Akselrod Date: Thu, 14 Nov 2024 21:48:14 -0800 Subject: [PATCH 10/12] Makefile: log to file instead of console --- Makefile | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index 9be920f5dc..6b28bcc491 100644 --- a/Makefile +++ b/Makefile @@ -207,16 +207,19 @@ ifeq ($(dbbackend),postgres) # tables also helps prevent serialization conflicts by ensuring lookups # lock only relevant rows in the index rather than the entire table. docker run --name lnd-postgres -e POSTGRES_PASSWORD=postgres -p 6432:5432 -d postgres:13-alpine -N 1500 -c max_pred_locks_per_transaction=1024 -c max_locks_per_transaction=128 -c enable_seqscan=off - docker logs -f lnd-postgres & + docker logs -f lnd-postgres >itest/postgres.log 2>&1 & # Wait for the instance to be started. sleep $(POSTGRES_START_DELAY) endif +clean-itest-logs: + rm -rf itest/*.log itest/.logs-* + #? itest-only: Only run integration tests without re-building binaries -itest-only: db-instance +itest-only: clean-itest-logs db-instance @$(call print, "Running integration tests with ${backend} backend.") - rm -rf itest/*.log itest/.logs-*; date + date EXEC_SUFFIX=$(EXEC_SUFFIX) scripts/itest_part.sh 0 1 $(TEST_FLAGS) $(ITEST_FLAGS) -test.v $(COLLECT_ITEST_COVERAGE) @@ -227,9 +230,9 @@ itest: build-itest itest-only itest-race: build-itest-race itest-only #? itest-parallel: Build and run integration tests in parallel mode, running up to ITEST_PARALLELISM test tranches in parallel (default 4) -itest-parallel: build-itest db-instance +itest-parallel: clean-itest-logs build-itest db-instance @$(call print, "Running tests") - rm -rf itest/*.log itest/.logs-*; date + date EXEC_SUFFIX=$(EXEC_SUFFIX) scripts/itest_parallel.sh $(ITEST_PARALLELISM) $(NUM_ITEST_TRANCHES) $(TEST_FLAGS) $(ITEST_FLAGS) $(COLLECT_ITEST_COVERAGE) From 53d34d2c609892b4e402db42fd23fe0b1586bb1d Mon Sep 17 00:00:00 2001 From: Alex Akselrod Date: Fri, 15 Nov 2024 01:41:22 -0800 Subject: [PATCH 11/12] github workflow: save postgres log to zip file --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index b12c5f962e..274f251bd0 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -305,7 +305,7 @@ jobs: - name: Zip log files on failure if: ${{ failure() }} timeout-minutes: 5 # timeout after 5 minute - run: 7z a logs-itest-${{ matrix.name }}.zip itest/**/*.log + run: 7z a logs-itest-${{ matrix.name }}.zip itest/**/*.log itest/postgres.log - name: Upload log files on failure uses: actions/upload-artifact@v3 From c9d217b1443765caa4e326519def255dff69738d Mon Sep 17 00:00:00 2001 From: Alex Akselrod Date: Wed, 27 Nov 2024 14:05:00 -0800 Subject: [PATCH 12/12] docs: update release-notes for 0.19.0 --- docs/release-notes/release-notes-0.19.0.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/release-notes/release-notes-0.19.0.md b/docs/release-notes/release-notes-0.19.0.md index 0b576fc7de..5c9ee1e249 100644 --- a/docs/release-notes/release-notes-0.19.0.md +++ b/docs/release-notes/release-notes-0.19.0.md @@ -216,6 +216,11 @@ The underlying functionality between those two options remain the same. store](https://github.com/lightningnetwork/lnd/pull/9001) so that results are namespaced. All existing results are written to the "default" namespace. +* [Remove global application level lock for + Postgres](https://github.com/lightningnetwork/lnd/pull/9242) so multiple DB + transactions can run at once, increasing efficiency. Includes several bugfixes + to allow this to work properly. + ## Code Health * A code refactor that [moves all the graph related DB code out of the @@ -239,6 +244,7 @@ The underlying functionality between those two options remain the same. # Contributors (Alphabetical Order) * Abdullahi Yunus +* Alex Akselrod * Animesh Bilthare * Boris Nagaev * Carla Kirk-Cohen