Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
K
k3s
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Registry
Registry
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Jacklull
k3s
Commits
d694dd1d
Commit
d694dd1d
authored
Mar 04, 2025
by
Brad Davidson
Committed by
Brad Davidson
Mar 05, 2025
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add periodic background snapshot reconcile
Interval is configurable with new etcd-snapshot-reconcile-interval flag Signed-off-by:
Brad Davidson
<
brad.davidson@rancher.com
>
parent
bed1f668
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
45 additions
and
21 deletions
+45
-21
server.go
pkg/cli/cmds/server.go
+7
-0
server.go
pkg/cli/server/server.go
+7
-0
cluster.go
pkg/cluster/cluster.go
+25
-18
managed.go
pkg/cluster/managed.go
+3
-3
types.go
pkg/daemons/config/types.go
+1
-0
etcd_linux_test.go
pkg/etcd/etcd_linux_test.go
+2
-0
No files found.
pkg/cli/cmds/server.go
View file @
d694dd1d
...
...
@@ -92,6 +92,7 @@ type Server struct {
EtcdExposeMetrics
bool
EtcdSnapshotDir
string
EtcdSnapshotCron
string
EtcdSnapshotReconcile
time
.
Duration
EtcdSnapshotRetention
int
EtcdSnapshotCompress
bool
EtcdListFormat
string
...
...
@@ -390,6 +391,12 @@ var ServerFlags = []cli.Flag{
Destination
:
&
ServerConfig
.
EtcdSnapshotCron
,
Value
:
"0 */12 * * *"
,
},
&
cli
.
DurationFlag
{
Name
:
"etcd-snapshot-reconcile-interval"
,
Usage
:
"(db) Snapshot reconcile interval"
,
Destination
:
&
ServerConfig
.
EtcdSnapshotReconcile
,
Value
:
10
*
time
.
Minute
,
},
&
cli
.
IntFlag
{
Name
:
"etcd-snapshot-retention"
,
Usage
:
"(db) Number of snapshots to retain"
,
...
...
pkg/cli/server/server.go
View file @
d694dd1d
...
...
@@ -184,12 +184,19 @@ func run(app *cli.Context, cfg *cmds.Server, leaderControllers server.CustomCont
serverConfig
.
ControlConfig
.
VModule
=
cmds
.
LogConfig
.
VModule
if
!
cfg
.
EtcdDisableSnapshots
||
cfg
.
ClusterReset
{
if
cfg
.
EtcdSnapshotReconcile
<=
0
{
return
errors
.
New
(
"etcd-snapshot-reconcile-interval must be greater than 0s"
)
}
serverConfig
.
ControlConfig
.
EtcdSnapshotCompress
=
cfg
.
EtcdSnapshotCompress
serverConfig
.
ControlConfig
.
EtcdSnapshotName
=
cfg
.
EtcdSnapshotName
serverConfig
.
ControlConfig
.
EtcdSnapshotCron
=
cfg
.
EtcdSnapshotCron
serverConfig
.
ControlConfig
.
EtcdSnapshotDir
=
cfg
.
EtcdSnapshotDir
serverConfig
.
ControlConfig
.
EtcdSnapshotReconcile
=
metav1
.
Duration
{
Duration
:
cfg
.
EtcdSnapshotReconcile
}
serverConfig
.
ControlConfig
.
EtcdSnapshotRetention
=
cfg
.
EtcdSnapshotRetention
if
cfg
.
EtcdS3
{
if
cfg
.
EtcdS3Timeout
<=
0
{
return
errors
.
New
(
"etcd-s3-timeout must be greater than 0s"
)
}
serverConfig
.
ControlConfig
.
EtcdS3
=
&
config
.
EtcdS3
{
AccessKey
:
cfg
.
EtcdS3AccessKey
,
Bucket
:
cfg
.
EtcdS3BucketName
,
...
...
pkg/cluster/cluster.go
View file @
d694dd1d
...
...
@@ -3,7 +3,6 @@ package cluster
import
(
"context"
"net/url"
"runtime"
"strings"
"time"
...
...
@@ -44,53 +43,61 @@ func (c *Cluster) Start(ctx context.Context) (<-chan struct{}, error) {
return
ready
,
nil
}
// start managed
database (if necessary)
// start managed
etcd database; when kine is in use this is a no-op.
if
err
:=
c
.
start
(
ctx
);
err
!=
nil
{
return
nil
,
pkgerrors
.
WithMessage
(
err
,
"start managed database"
)
}
// get the wait channel for testing managed database readiness
ready
,
err
:=
c
.
testClusterDB
(
ctx
)
if
err
!=
nil
{
return
nil
,
err
}
// get the wait channel for testing etcd server readiness; when kine is in
// use the channel is closed immediately.
ready
:=
c
.
testClusterDB
(
ctx
)
// set c.config.Datastore and c.config.Runtime.EtcdConfig with values
// necessary to build etcd clients, and start kine listener if necessary.
if
err
:=
c
.
startStorage
(
ctx
,
false
);
err
!=
nil
{
return
nil
,
err
}
// if necessary, store bootstrap data to datastore
// if necessary, store bootstrap data to datastore. saveBootstrap is only set
// when using kine, so this can be done before the ready channel has been closed.
if
c
.
saveBootstrap
{
if
err
:=
Save
(
ctx
,
c
.
config
,
false
);
err
!=
nil
{
return
nil
,
err
}
}
// at this point, if etcd is in use, it's bootstrapping is complete
// so save the bootstrap data. We will need for etcd to be up. If
// the save call returns an error, we panic since subsequent etcd
// snapshots will be empty.
if
c
.
managedDB
!=
nil
{
go
func
()
{
for
{
select
{
case
<-
ready
:
// always save to managed etcd, to ensure that any file modified locally are in sync with the datastore.
// this will panic if multiple keys exist, to prevent nodes from running with different bootstrap data.
if
err
:=
Save
(
ctx
,
c
.
config
,
false
);
err
!=
nil
{
panic
(
err
)
}
if
!
c
.
config
.
EtcdDisableSnapshots
{
_
=
wait
.
PollUntilContextCancel
(
ctx
,
time
.
Second
,
true
,
func
(
ctx
context
.
Context
)
(
bool
,
error
)
{
err
:=
c
.
managedDB
.
ReconcileSnapshotData
(
ctx
)
if
err
!=
nil
{
// do an initial reconcile of snapshots with a fast retry until it succeeds
wait
.
PollUntilContextCancel
(
ctx
,
time
.
Second
,
true
,
func
(
ctx
context
.
Context
)
(
bool
,
error
)
{
if
err
:=
c
.
managedDB
.
ReconcileSnapshotData
(
ctx
);
err
!=
nil
{
logrus
.
Errorf
(
"Failed to record snapshots for cluster: %v"
,
err
)
return
false
,
nil
}
return
err
==
nil
,
nil
return
true
,
nil
})
// continue reconciling snapshots in the background at the configured interval.
// the interval is jittered by 5% to avoid all nodes reconciling at the same time.
wait
.
JitterUntilWithContext
(
ctx
,
func
(
ctx
context
.
Context
)
{
if
err
:=
c
.
managedDB
.
ReconcileSnapshotData
(
ctx
);
err
!=
nil
{
logrus
.
Errorf
(
"Failed to record snapshots for cluster: %v"
,
err
)
}
},
c
.
config
.
EtcdSnapshotReconcile
.
Duration
,
0.05
,
false
)
}
return
case
<-
ctx
.
Done
()
:
return
default
:
runtime
.
Gosched
()
}
}
}()
...
...
pkg/cluster/managed.go
View file @
d694dd1d
...
...
@@ -25,11 +25,11 @@ import (
// testClusterDB returns a channel that will be closed when the datastore connection is available.
// The datastore is tested for readiness every 5 seconds until the test succeeds.
func
(
c
*
Cluster
)
testClusterDB
(
ctx
context
.
Context
)
(
<-
chan
struct
{},
error
)
{
func
(
c
*
Cluster
)
testClusterDB
(
ctx
context
.
Context
)
<-
chan
struct
{}
{
result
:=
make
(
chan
struct
{})
if
c
.
managedDB
==
nil
{
close
(
result
)
return
result
,
nil
return
result
}
go
func
()
{
...
...
@@ -50,7 +50,7 @@ func (c *Cluster) testClusterDB(ctx context.Context) (<-chan struct{}, error) {
}
}()
return
result
,
nil
return
result
}
// start starts the database, unless a cluster reset has been requested, in which case
...
...
pkg/daemons/config/types.go
View file @
d694dd1d
...
...
@@ -242,6 +242,7 @@ type Control struct {
EtcdExposeMetrics
bool
`json:"-"`
EtcdSnapshotDir
string
`json:"-"`
EtcdSnapshotCron
string
`json:"-"`
EtcdSnapshotReconcile
metav1
.
Duration
`json:"-"`
EtcdSnapshotRetention
int
`json:"-"`
EtcdSnapshotCompress
bool
`json:"-"`
EtcdListFormat
string
`json:"-"`
...
...
pkg/etcd/etcd_linux_test.go
View file @
d694dd1d
...
...
@@ -31,6 +31,7 @@ import (
healthpb
"google.golang.org/grpc/health/grpc_health_v1"
"google.golang.org/grpc/reflection"
"google.golang.org/grpc/status"
metav1
"k8s.io/apimachinery/pkg/apis/meta/v1"
utilnet
"k8s.io/apimachinery/pkg/util/net"
"k8s.io/apimachinery/pkg/util/wait"
)
...
...
@@ -67,6 +68,7 @@ func generateTestConfig() *config.Control {
DataDir
:
"/tmp/k3s/"
,
// Different than the default value
EtcdSnapshotName
:
"etcd-snapshot"
,
EtcdSnapshotCron
:
"0 */12 * * *"
,
EtcdSnapshotReconcile
:
metav1
.
Duration
{
Duration
:
10
*
time
.
Minute
},
EtcdSnapshotRetention
:
5
,
EtcdS3
:
&
config
.
EtcdS3
{
Endpoint
:
"s3.amazonaws.com"
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment