mirror of
https://github.com/GlueOps/autoglue.git
synced 2026-02-13 04:40:05 +01:00
fix: improve job tracking
Signed-off-by: allanice001 <allanice001@gmail.com>
This commit is contained in:
@@ -36,6 +36,21 @@ func ClusterActionWorker(db *gorm.DB) archer.WorkerFn {
|
||||
var args ClusterActionArgs
|
||||
_ = j.ParseArguments(&args)
|
||||
|
||||
runID, _ := uuid.Parse(j.ID)
|
||||
|
||||
updateRun := func(status string, errMsg string) {
|
||||
updates := map[string]any{
|
||||
"status": status,
|
||||
"error": errMsg,
|
||||
}
|
||||
if status == "succeeded" || status == "failed" {
|
||||
updates["finised_at"] = time.Now().UTC()
|
||||
}
|
||||
db.Model(&models.ClusterRun{}).Where("id = ?", runID).Updates(updates)
|
||||
}
|
||||
|
||||
updateRun("running", "")
|
||||
|
||||
logger := log.With().
|
||||
Str("job", j.ID).
|
||||
Str("cluster_id", args.ClusterID.String()).
|
||||
@@ -56,18 +71,20 @@ func ClusterActionWorker(db *gorm.DB) archer.WorkerFn {
|
||||
Preload("NodePools.Servers.SshKey").
|
||||
Where("id = ? AND organization_id = ?", args.ClusterID, args.OrgID).
|
||||
First(&c).Error; err != nil {
|
||||
|
||||
updateRun("failed", fmt.Errorf("load cluster: %w", err).Error())
|
||||
return nil, fmt.Errorf("load cluster: %w", err)
|
||||
}
|
||||
|
||||
// ---- Step 1: Prepare (mostly lifted from ClusterPrepareWorker)
|
||||
if err := setClusterStatus(db, c.ID, clusterStatusBootstrapping, ""); err != nil {
|
||||
updateRun("failed", err.Error())
|
||||
return nil, fmt.Errorf("mark bootstrapping: %w", err)
|
||||
}
|
||||
c.Status = clusterStatusBootstrapping
|
||||
|
||||
if err := validateClusterForPrepare(&c); err != nil {
|
||||
_ = setClusterStatus(db, c.ID, clusterStatusFailed, err.Error())
|
||||
updateRun("failed", err.Error())
|
||||
return nil, fmt.Errorf("validate: %w", err)
|
||||
}
|
||||
|
||||
@@ -75,6 +92,7 @@ func ClusterActionWorker(db *gorm.DB) archer.WorkerFn {
|
||||
keyPayloads, sshConfig, err := buildSSHAssetsForCluster(db, &c, allServers)
|
||||
if err != nil {
|
||||
_ = setClusterStatus(db, c.ID, clusterStatusFailed, err.Error())
|
||||
updateRun("failed", err.Error())
|
||||
return nil, fmt.Errorf("build ssh assets: %w", err)
|
||||
}
|
||||
|
||||
@@ -98,6 +116,7 @@ func ClusterActionWorker(db *gorm.DB) archer.WorkerFn {
|
||||
orgKey, orgSecret, err := findOrCreateClusterAutomationKey(db, c.OrganizationID, c.ID, 24*time.Hour)
|
||||
if err != nil {
|
||||
_ = setClusterStatus(db, c.ID, clusterStatusFailed, err.Error())
|
||||
updateRun("failed", err.Error())
|
||||
return nil, fmt.Errorf("org key: %w", err)
|
||||
}
|
||||
dtoCluster.OrgKey = &orgKey
|
||||
@@ -106,6 +125,7 @@ func ClusterActionWorker(db *gorm.DB) archer.WorkerFn {
|
||||
payloadJSON, err := json.MarshalIndent(dtoCluster, "", " ")
|
||||
if err != nil {
|
||||
_ = setClusterStatus(db, c.ID, clusterStatusFailed, err.Error())
|
||||
updateRun("failed", err.Error())
|
||||
return nil, fmt.Errorf("marshal payload: %w", err)
|
||||
}
|
||||
|
||||
@@ -115,11 +135,13 @@ func ClusterActionWorker(db *gorm.DB) archer.WorkerFn {
|
||||
cancel()
|
||||
if err != nil {
|
||||
_ = setClusterStatus(db, c.ID, clusterStatusFailed, err.Error())
|
||||
updateRun("failed", err.Error())
|
||||
return nil, fmt.Errorf("push assets: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
if err := setClusterStatus(db, c.ID, clusterStatusPending, ""); err != nil {
|
||||
updateRun("failed", err.Error())
|
||||
return nil, fmt.Errorf("mark pending: %w", err)
|
||||
}
|
||||
c.Status = clusterStatusPending
|
||||
@@ -132,11 +154,13 @@ func ClusterActionWorker(db *gorm.DB) archer.WorkerFn {
|
||||
if err != nil {
|
||||
logger.Error().Err(err).Str("output", out).Msg("ping-servers failed")
|
||||
_ = setClusterStatus(db, c.ID, clusterStatusFailed, fmt.Sprintf("make ping-servers: %v", err))
|
||||
updateRun("failed", err.Error())
|
||||
return nil, fmt.Errorf("ping-servers: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
if err := setClusterStatus(db, c.ID, clusterStatusProvisioning, ""); err != nil {
|
||||
updateRun("failed", err.Error())
|
||||
return nil, fmt.Errorf("mark provisioning: %w", err)
|
||||
}
|
||||
c.Status = clusterStatusProvisioning
|
||||
@@ -149,13 +173,18 @@ func ClusterActionWorker(db *gorm.DB) archer.WorkerFn {
|
||||
if err != nil {
|
||||
logger.Error().Err(err).Str("output", out).Msg("bootstrap target failed")
|
||||
_ = setClusterStatus(db, c.ID, clusterStatusFailed, fmt.Sprintf("make %s: %v", args.MakeTarget, err))
|
||||
updateRun("failed", err.Error())
|
||||
return nil, fmt.Errorf("make %s: %w", args.MakeTarget, err)
|
||||
}
|
||||
}
|
||||
|
||||
if err := setClusterStatus(db, c.ID, clusterStatusReady, ""); err != nil {
|
||||
updateRun("failed", err.Error())
|
||||
return nil, fmt.Errorf("mark ready: %w", err)
|
||||
}
|
||||
|
||||
updateRun("succeeded", "")
|
||||
|
||||
return ClusterActionResult{
|
||||
Status: "ok",
|
||||
Action: args.Action,
|
||||
|
||||
Reference in New Issue
Block a user