Skip to content

Commit a15e763

Browse files
ndyakovCopilotboekkooi-impossiblecloudPika-Gopher
authored
fix(pool): Pool ReAuth should not interfere with handoff (#3547)
* fix(pool): wip, pool reauth should not interfere with handoff * fix credListeners map * fix race in tests * better conn usable timeout * add design decision comment * few small improvements * update marked as queued * add Used to clarify the state of the conn * rename test * fix(test): fix flaky test * lock inside the listeners collection * address pr comments * Update internal/auth/cred_listeners.go Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update internal/pool/buffer_size_test.go Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * wip refactor entraid * fix maintnotif pool hook * fix mocks * fix nil listener * sync and async reauth based on conn lifecycle * be able to reject connection OnGet * pass hooks so the tests can observe reauth * give some time for the background to execute commands * fix tests * only async reauth * Update internal/pool/pool.go Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update internal/auth/streaming/pool_hook.go Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update internal/pool/conn.go Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * chore(redisotel): use metric.WithAttributeSet to avoid copy (#3552) In order to improve performance replace `WithAttributes` with `WithAttributeSet`. This avoids the slice allocation and copy that is done in `WithAttributes`. For more information see https://github.com/open-telemetry/opentelemetry-go/blob/v1.38.0/metric/instrument.go#L357-L376 * chore(docs): explain why MaxRetries is disabled for ClusterClient (#3551) Co-authored-by: Nedyalko Dyakov <1547186+ndyakov@users.noreply.github.com> * exponential backoff * address pr comments * address pr comments * remove rlock * add some comments * add comments --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: Warnar Boekkooi <wboekkooi@impossiblecloud.com> Co-authored-by: Justin <justindsouza80@gmail.com>
1 parent 14a8814 commit a15e763

23 files changed

+1138
-143
lines changed

auth/reauth_credentials_listener.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,4 +44,4 @@ func NewReAuthCredentialsListener(reAuth func(credentials Credentials) error, on
4444
}
4545

4646
// Ensure ReAuthCredentialsListener implements the CredentialsListener interface.
47-
var _ CredentialsListener = (*ReAuthCredentialsListener)(nil)
47+
var _ CredentialsListener = (*ReAuthCredentialsListener)(nil)

error.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -108,10 +108,12 @@ func isRedisError(err error) bool {
108108

109109
func isBadConn(err error, allowTimeout bool, addr string) bool {
110110
switch err {
111-
case nil:
112-
return false
113-
case context.Canceled, context.DeadlineExceeded:
114-
return true
111+
case nil:
112+
return false
113+
case context.Canceled, context.DeadlineExceeded:
114+
return true
115+
case pool.ErrConnUnusableTimeout:
116+
return true
115117
}
116118

117119
if isRedisError(err) {
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
package streaming
2+
3+
import (
4+
"github.com/redis/go-redis/v9/auth"
5+
"github.com/redis/go-redis/v9/internal/pool"
6+
)
7+
8+
// ConnReAuthCredentialsListener is a credentials listener for a specific connection
9+
// that triggers re-authentication when credentials change.
10+
//
11+
// This listener implements the auth.CredentialsListener interface and is subscribed
12+
// to a StreamingCredentialsProvider. When new credentials are received via OnNext,
13+
// it marks the connection for re-authentication through the manager.
14+
//
15+
// The re-authentication is always performed asynchronously to avoid blocking the
16+
// credentials provider and to prevent potential deadlocks with the pool semaphore.
17+
// The actual re-auth happens when the connection is returned to the pool in an idle state.
18+
//
19+
// Lifecycle:
20+
// - Created during connection initialization via Manager.Listener()
21+
// - Subscribed to the StreamingCredentialsProvider
22+
// - Receives credential updates via OnNext()
23+
// - Cleaned up when connection is removed from pool via Manager.RemoveListener()
24+
type ConnReAuthCredentialsListener struct {
25+
// reAuth is the function to re-authenticate the connection with new credentials
26+
reAuth func(conn *pool.Conn, credentials auth.Credentials) error
27+
28+
// onErr is the function to call when re-authentication or acquisition fails
29+
onErr func(conn *pool.Conn, err error)
30+
31+
// conn is the connection this listener is associated with
32+
conn *pool.Conn
33+
34+
// manager is the streaming credentials manager for coordinating re-auth
35+
manager *Manager
36+
}
37+
38+
// OnNext is called when new credentials are received from the StreamingCredentialsProvider.
39+
//
40+
// This method marks the connection for asynchronous re-authentication. The actual
41+
// re-authentication happens in the background when the connection is returned to the
42+
// pool and is in an idle state.
43+
//
44+
// Asynchronous re-auth is used to:
45+
// - Avoid blocking the credentials provider's notification goroutine
46+
// - Prevent deadlocks with the pool's semaphore (especially with small pool sizes)
47+
// - Ensure re-auth happens when the connection is safe to use (not processing commands)
48+
//
49+
// The reAuthFn callback receives:
50+
// - nil if the connection was successfully acquired for re-auth
51+
// - error if acquisition timed out or failed
52+
//
53+
// Thread-safe: Called by the credentials provider's notification goroutine.
54+
func (c *ConnReAuthCredentialsListener) OnNext(credentials auth.Credentials) {
55+
if c.conn == nil || c.conn.IsClosed() || c.manager == nil || c.reAuth == nil {
56+
return
57+
}
58+
59+
// Always use async reauth to avoid complex pool semaphore issues
60+
// The synchronous path can cause deadlocks in the pool's semaphore mechanism
61+
// when called from the Subscribe goroutine, especially with small pool sizes.
62+
// The connection pool hook will re-authenticate the connection when it is
63+
// returned to the pool in a clean, idle state.
64+
c.manager.MarkForReAuth(c.conn, func(err error) {
65+
// err is from connection acquisition (timeout, etc.)
66+
if err != nil {
67+
// Log the error
68+
c.OnError(err)
69+
return
70+
}
71+
// err is from reauth command execution
72+
err = c.reAuth(c.conn, credentials)
73+
if err != nil {
74+
// Log the error
75+
c.OnError(err)
76+
return
77+
}
78+
})
79+
}
80+
81+
// OnError is called when an error occurs during credential streaming or re-authentication.
82+
//
83+
// This method can be called from:
84+
// - The StreamingCredentialsProvider when there's an error in the credentials stream
85+
// - The re-auth process when connection acquisition times out
86+
// - The re-auth process when the AUTH command fails
87+
//
88+
// The error is delegated to the onErr callback provided during listener creation.
89+
//
90+
// Thread-safe: Can be called from multiple goroutines (provider, re-auth worker).
91+
func (c *ConnReAuthCredentialsListener) OnError(err error) {
92+
if c.onErr == nil {
93+
return
94+
}
95+
96+
c.onErr(c.conn, err)
97+
}
98+
99+
// Ensure ConnReAuthCredentialsListener implements the CredentialsListener interface.
100+
var _ auth.CredentialsListener = (*ConnReAuthCredentialsListener)(nil)
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
package streaming
2+
3+
import (
4+
"sync"
5+
6+
"github.com/redis/go-redis/v9/auth"
7+
)
8+
9+
// CredentialsListeners is a thread-safe collection of credentials listeners
10+
// indexed by connection ID.
11+
//
12+
// This collection is used by the Manager to maintain a registry of listeners
13+
// for each connection in the pool. Listeners are reused when connections are
14+
// reinitialized (e.g., after a handoff) to avoid creating duplicate subscriptions
15+
// to the StreamingCredentialsProvider.
16+
//
17+
// The collection supports concurrent access from multiple goroutines during
18+
// connection initialization, credential updates, and connection removal.
19+
type CredentialsListeners struct {
20+
// listeners maps connection ID to credentials listener
21+
listeners map[uint64]auth.CredentialsListener
22+
23+
// lock protects concurrent access to the listeners map
24+
lock sync.RWMutex
25+
}
26+
27+
// NewCredentialsListeners creates a new thread-safe credentials listeners collection.
28+
func NewCredentialsListeners() *CredentialsListeners {
29+
return &CredentialsListeners{
30+
listeners: make(map[uint64]auth.CredentialsListener),
31+
}
32+
}
33+
34+
// Add adds or updates a credentials listener for a connection.
35+
//
36+
// If a listener already exists for the connection ID, it is replaced.
37+
// This is safe because the old listener should have been unsubscribed
38+
// before the connection was reinitialized.
39+
//
40+
// Thread-safe: Can be called concurrently from multiple goroutines.
41+
func (c *CredentialsListeners) Add(connID uint64, listener auth.CredentialsListener) {
42+
c.lock.Lock()
43+
defer c.lock.Unlock()
44+
if c.listeners == nil {
45+
c.listeners = make(map[uint64]auth.CredentialsListener)
46+
}
47+
c.listeners[connID] = listener
48+
}
49+
50+
// Get retrieves the credentials listener for a connection.
51+
//
52+
// Returns:
53+
// - listener: The credentials listener for the connection, or nil if not found
54+
// - ok: true if a listener exists for the connection ID, false otherwise
55+
//
56+
// Thread-safe: Can be called concurrently from multiple goroutines.
57+
func (c *CredentialsListeners) Get(connID uint64) (auth.CredentialsListener, bool) {
58+
c.lock.RLock()
59+
defer c.lock.RUnlock()
60+
if len(c.listeners) == 0 {
61+
return nil, false
62+
}
63+
listener, ok := c.listeners[connID]
64+
return listener, ok
65+
}
66+
67+
// Remove removes the credentials listener for a connection.
68+
//
69+
// This is called when a connection is removed from the pool to prevent
70+
// memory leaks. If no listener exists for the connection ID, this is a no-op.
71+
//
72+
// Thread-safe: Can be called concurrently from multiple goroutines.
73+
func (c *CredentialsListeners) Remove(connID uint64) {
74+
c.lock.Lock()
75+
defer c.lock.Unlock()
76+
delete(c.listeners, connID)
77+
}

internal/auth/streaming/manager.go

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
package streaming
2+
3+
import (
4+
"errors"
5+
"time"
6+
7+
"github.com/redis/go-redis/v9/auth"
8+
"github.com/redis/go-redis/v9/internal/pool"
9+
)
10+
11+
// Manager coordinates streaming credentials and re-authentication for a connection pool.
12+
//
13+
// The manager is responsible for:
14+
// - Creating and managing per-connection credentials listeners
15+
// - Providing the pool hook for re-authentication
16+
// - Coordinating between credentials updates and pool operations
17+
//
18+
// When credentials change via a StreamingCredentialsProvider:
19+
// 1. The credentials listener (ConnReAuthCredentialsListener) receives the update
20+
// 2. It calls MarkForReAuth on the manager
21+
// 3. The manager delegates to the pool hook
22+
// 4. The pool hook schedules background re-authentication
23+
//
24+
// The manager maintains a registry of credentials listeners indexed by connection ID,
25+
// allowing listener reuse when connections are reinitialized (e.g., after handoff).
26+
type Manager struct {
27+
// credentialsListeners maps connection ID to credentials listener
28+
credentialsListeners *CredentialsListeners
29+
30+
// pool is the connection pool being managed
31+
pool pool.Pooler
32+
33+
// poolHookRef is the re-authentication pool hook
34+
poolHookRef *ReAuthPoolHook
35+
}
36+
37+
// NewManager creates a new streaming credentials manager.
38+
//
39+
// Parameters:
40+
// - pl: The connection pool to manage
41+
// - reAuthTimeout: Maximum time to wait for acquiring a connection for re-authentication
42+
//
43+
// The manager creates a ReAuthPoolHook sized to match the pool size, ensuring that
44+
// re-auth operations don't exhaust the connection pool.
45+
func NewManager(pl pool.Pooler, reAuthTimeout time.Duration) *Manager {
46+
m := &Manager{
47+
pool: pl,
48+
poolHookRef: NewReAuthPoolHook(pl.Size(), reAuthTimeout),
49+
credentialsListeners: NewCredentialsListeners(),
50+
}
51+
m.poolHookRef.manager = m
52+
return m
53+
}
54+
55+
// PoolHook returns the pool hook for re-authentication.
56+
//
57+
// This hook should be registered with the connection pool to enable
58+
// automatic re-authentication when credentials change.
59+
func (m *Manager) PoolHook() pool.PoolHook {
60+
return m.poolHookRef
61+
}
62+
63+
// Listener returns or creates a credentials listener for a connection.
64+
//
65+
// This method is called during connection initialization to set up the
66+
// credentials listener. If a listener already exists for the connection ID
67+
// (e.g., after a handoff), it is reused.
68+
//
69+
// Parameters:
70+
// - poolCn: The connection to create/get a listener for
71+
// - reAuth: Function to re-authenticate the connection with new credentials
72+
// - onErr: Function to call when re-authentication fails
73+
//
74+
// Returns:
75+
// - auth.CredentialsListener: The listener to subscribe to the credentials provider
76+
// - error: Non-nil if poolCn is nil
77+
//
78+
// Note: The reAuth and onErr callbacks are captured once when the listener is
79+
// created and reused for the connection's lifetime. They should not change.
80+
//
81+
// Thread-safe: Can be called concurrently during connection initialization.
82+
func (m *Manager) Listener(
83+
poolCn *pool.Conn,
84+
reAuth func(*pool.Conn, auth.Credentials) error,
85+
onErr func(*pool.Conn, error),
86+
) (auth.CredentialsListener, error) {
87+
if poolCn == nil {
88+
return nil, errors.New("poolCn cannot be nil")
89+
}
90+
connID := poolCn.GetID()
91+
// if we reconnect the underlying network connection, the streaming credentials listener will continue to work
92+
// so we can get the old listener from the cache and use it.
93+
// subscribing the same (an already subscribed) listener for a StreamingCredentialsProvider SHOULD be a no-op
94+
listener, ok := m.credentialsListeners.Get(connID)
95+
if !ok || listener == nil {
96+
// Create new listener for this connection
97+
// Note: Callbacks (reAuth, onErr) are captured once and reused for the connection's lifetime
98+
newCredListener := &ConnReAuthCredentialsListener{
99+
conn: poolCn,
100+
reAuth: reAuth,
101+
onErr: onErr,
102+
manager: m,
103+
}
104+
105+
m.credentialsListeners.Add(connID, newCredListener)
106+
listener = newCredListener
107+
}
108+
return listener, nil
109+
}
110+
111+
// MarkForReAuth marks a connection for re-authentication.
112+
//
113+
// This method is called by the credentials listener when new credentials are
114+
// received. It delegates to the pool hook to schedule background re-authentication.
115+
//
116+
// Parameters:
117+
// - poolCn: The connection to re-authenticate
118+
// - reAuthFn: Function to call for re-authentication, receives error if acquisition fails
119+
//
120+
// Thread-safe: Called by credentials listeners when credentials change.
121+
func (m *Manager) MarkForReAuth(poolCn *pool.Conn, reAuthFn func(error)) {
122+
connID := poolCn.GetID()
123+
m.poolHookRef.MarkForReAuth(connID, reAuthFn)
124+
}
125+
126+
// RemoveListener removes the credentials listener for a connection.
127+
//
128+
// This method is called by the pool hook's OnRemove to clean up listeners
129+
// when connections are removed from the pool.
130+
//
131+
// Parameters:
132+
// - connID: The connection ID whose listener should be removed
133+
//
134+
// Thread-safe: Called during connection removal.
135+
func (m *Manager) RemoveListener(connID uint64) {
136+
m.credentialsListeners.Remove(connID)
137+
}

0 commit comments

Comments
 (0)