Compare commits

...

3 Commits

Author SHA1 Message Date
allanice001
2057f92b82 fix: improve job tracking 2025-12-26 15:04:31 +00:00
allanice001
169283b6c7 fix: improve job tracking
Signed-off-by: allanice001 <allanice001@gmail.com>
2025-12-26 15:04:15 +00:00
allanice001
865270312c fix: update jobs
Signed-off-by: allanice001 <allanice001@gmail.com>
2025-12-26 04:47:08 +00:00
3 changed files with 39 additions and 5 deletions

View File

@@ -36,6 +36,21 @@ func ClusterActionWorker(db *gorm.DB) archer.WorkerFn {
var args ClusterActionArgs var args ClusterActionArgs
_ = j.ParseArguments(&args) _ = j.ParseArguments(&args)
runID, _ := uuid.Parse(j.ID)
updateRun := func(status string, errMsg string) {
updates := map[string]any{
"status": status,
"error": errMsg,
}
if status == "succeeded" || status == "failed" {
updates["finised_at"] = time.Now().UTC()
}
db.Model(&models.ClusterRun{}).Where("id = ?", runID).Updates(updates)
}
updateRun("running", "")
logger := log.With(). logger := log.With().
Str("job", j.ID). Str("job", j.ID).
Str("cluster_id", args.ClusterID.String()). Str("cluster_id", args.ClusterID.String()).
@@ -56,18 +71,20 @@ func ClusterActionWorker(db *gorm.DB) archer.WorkerFn {
Preload("NodePools.Servers.SshKey"). Preload("NodePools.Servers.SshKey").
Where("id = ? AND organization_id = ?", args.ClusterID, args.OrgID). Where("id = ? AND organization_id = ?", args.ClusterID, args.OrgID).
First(&c).Error; err != nil { First(&c).Error; err != nil {
updateRun("failed", fmt.Errorf("load cluster: %w", err).Error())
return nil, fmt.Errorf("load cluster: %w", err) return nil, fmt.Errorf("load cluster: %w", err)
} }
// ---- Step 1: Prepare (mostly lifted from ClusterPrepareWorker) // ---- Step 1: Prepare (mostly lifted from ClusterPrepareWorker)
if err := setClusterStatus(db, c.ID, clusterStatusBootstrapping, ""); err != nil { if err := setClusterStatus(db, c.ID, clusterStatusBootstrapping, ""); err != nil {
updateRun("failed", err.Error())
return nil, fmt.Errorf("mark bootstrapping: %w", err) return nil, fmt.Errorf("mark bootstrapping: %w", err)
} }
c.Status = clusterStatusBootstrapping c.Status = clusterStatusBootstrapping
if err := validateClusterForPrepare(&c); err != nil { if err := validateClusterForPrepare(&c); err != nil {
_ = setClusterStatus(db, c.ID, clusterStatusFailed, err.Error()) _ = setClusterStatus(db, c.ID, clusterStatusFailed, err.Error())
updateRun("failed", err.Error())
return nil, fmt.Errorf("validate: %w", err) return nil, fmt.Errorf("validate: %w", err)
} }
@@ -75,6 +92,7 @@ func ClusterActionWorker(db *gorm.DB) archer.WorkerFn {
keyPayloads, sshConfig, err := buildSSHAssetsForCluster(db, &c, allServers) keyPayloads, sshConfig, err := buildSSHAssetsForCluster(db, &c, allServers)
if err != nil { if err != nil {
_ = setClusterStatus(db, c.ID, clusterStatusFailed, err.Error()) _ = setClusterStatus(db, c.ID, clusterStatusFailed, err.Error())
updateRun("failed", err.Error())
return nil, fmt.Errorf("build ssh assets: %w", err) return nil, fmt.Errorf("build ssh assets: %w", err)
} }
@@ -98,6 +116,7 @@ func ClusterActionWorker(db *gorm.DB) archer.WorkerFn {
orgKey, orgSecret, err := findOrCreateClusterAutomationKey(db, c.OrganizationID, c.ID, 24*time.Hour) orgKey, orgSecret, err := findOrCreateClusterAutomationKey(db, c.OrganizationID, c.ID, 24*time.Hour)
if err != nil { if err != nil {
_ = setClusterStatus(db, c.ID, clusterStatusFailed, err.Error()) _ = setClusterStatus(db, c.ID, clusterStatusFailed, err.Error())
updateRun("failed", err.Error())
return nil, fmt.Errorf("org key: %w", err) return nil, fmt.Errorf("org key: %w", err)
} }
dtoCluster.OrgKey = &orgKey dtoCluster.OrgKey = &orgKey
@@ -106,6 +125,7 @@ func ClusterActionWorker(db *gorm.DB) archer.WorkerFn {
payloadJSON, err := json.MarshalIndent(dtoCluster, "", " ") payloadJSON, err := json.MarshalIndent(dtoCluster, "", " ")
if err != nil { if err != nil {
_ = setClusterStatus(db, c.ID, clusterStatusFailed, err.Error()) _ = setClusterStatus(db, c.ID, clusterStatusFailed, err.Error())
updateRun("failed", err.Error())
return nil, fmt.Errorf("marshal payload: %w", err) return nil, fmt.Errorf("marshal payload: %w", err)
} }
@@ -115,11 +135,13 @@ func ClusterActionWorker(db *gorm.DB) archer.WorkerFn {
cancel() cancel()
if err != nil { if err != nil {
_ = setClusterStatus(db, c.ID, clusterStatusFailed, err.Error()) _ = setClusterStatus(db, c.ID, clusterStatusFailed, err.Error())
updateRun("failed", err.Error())
return nil, fmt.Errorf("push assets: %w", err) return nil, fmt.Errorf("push assets: %w", err)
} }
} }
if err := setClusterStatus(db, c.ID, clusterStatusPending, ""); err != nil { if err := setClusterStatus(db, c.ID, clusterStatusPending, ""); err != nil {
updateRun("failed", err.Error())
return nil, fmt.Errorf("mark pending: %w", err) return nil, fmt.Errorf("mark pending: %w", err)
} }
c.Status = clusterStatusPending c.Status = clusterStatusPending
@@ -132,11 +154,13 @@ func ClusterActionWorker(db *gorm.DB) archer.WorkerFn {
if err != nil { if err != nil {
logger.Error().Err(err).Str("output", out).Msg("ping-servers failed") logger.Error().Err(err).Str("output", out).Msg("ping-servers failed")
_ = setClusterStatus(db, c.ID, clusterStatusFailed, fmt.Sprintf("make ping-servers: %v", err)) _ = setClusterStatus(db, c.ID, clusterStatusFailed, fmt.Sprintf("make ping-servers: %v", err))
updateRun("failed", err.Error())
return nil, fmt.Errorf("ping-servers: %w", err) return nil, fmt.Errorf("ping-servers: %w", err)
} }
} }
if err := setClusterStatus(db, c.ID, clusterStatusProvisioning, ""); err != nil { if err := setClusterStatus(db, c.ID, clusterStatusProvisioning, ""); err != nil {
updateRun("failed", err.Error())
return nil, fmt.Errorf("mark provisioning: %w", err) return nil, fmt.Errorf("mark provisioning: %w", err)
} }
c.Status = clusterStatusProvisioning c.Status = clusterStatusProvisioning
@@ -149,13 +173,18 @@ func ClusterActionWorker(db *gorm.DB) archer.WorkerFn {
if err != nil { if err != nil {
logger.Error().Err(err).Str("output", out).Msg("bootstrap target failed") logger.Error().Err(err).Str("output", out).Msg("bootstrap target failed")
_ = setClusterStatus(db, c.ID, clusterStatusFailed, fmt.Sprintf("make %s: %v", args.MakeTarget, err)) _ = setClusterStatus(db, c.ID, clusterStatusFailed, fmt.Sprintf("make %s: %v", args.MakeTarget, err))
updateRun("failed", err.Error())
return nil, fmt.Errorf("make %s: %w", args.MakeTarget, err) return nil, fmt.Errorf("make %s: %w", args.MakeTarget, err)
} }
} }
if err := setClusterStatus(db, c.ID, clusterStatusReady, ""); err != nil { if err := setClusterStatus(db, c.ID, clusterStatusReady, ""); err != nil {
updateRun("failed", err.Error())
return nil, fmt.Errorf("mark ready: %w", err) return nil, fmt.Errorf("mark ready: %w", err)
} }
updateRun("succeeded", "")
return ClusterActionResult{ return ClusterActionResult{
Status: "ok", Status: "ok",
Action: args.Action, Action: args.Action,

View File

@@ -211,12 +211,18 @@ func RunClusterAction(db *gorm.DB, jobs *bg.Jobs) http.HandlerFunc {
return return
} }
args := bg.ClusterActionArgs{
OrgID: orgID,
ClusterID: clusterID,
Action: action.MakeTarget,
MakeTarget: action.MakeTarget,
}
// Enqueue with run.ID as the job ID so the worker can look it up. // Enqueue with run.ID as the job ID so the worker can look it up.
_, enqueueErr := jobs.Enqueue( _, enqueueErr := jobs.Enqueue(
r.Context(), r.Context(),
run.ID.String(), run.ID.String(),
"cluster_action", "cluster_action",
bg.ClusterActionWorker, args,
archer.WithMaxRetries(3), archer.WithMaxRetries(3),
) )
@@ -225,7 +231,7 @@ func RunClusterAction(db *gorm.DB, jobs *bg.Jobs) http.HandlerFunc {
Where("id = ?", run.ID). Where("id = ?", run.ID).
Updates(map[string]any{ Updates(map[string]any{
"status": models.ClusterRunStatusFailed, "status": models.ClusterRunStatusFailed,
"error": "failed to enqueue job", "error": "failed to enqueue job: " + enqueueErr.Error(),
"finished_at": time.Now().UTC(), "finished_at": time.Now().UTC(),
}).Error }).Error

View File

@@ -946,13 +946,12 @@ export const ClustersPage = () => {
{/* Configure dialog (attachments + kubeconfig + node pools + actions/runs) */} {/* Configure dialog (attachments + kubeconfig + node pools + actions/runs) */}
<Dialog open={!!configCluster} onOpenChange={(open) => !open && setConfigCluster(null)}> <Dialog open={!!configCluster} onOpenChange={(open) => !open && setConfigCluster(null)}>
<DialogContent className="max-h-[90vh] w-full max-w-3xl overflow-y-auto"> <DialogContent className="max-h-[90vh] overflow-y-auto sm:max-w-2xl lg:max-w-250 ">
<DialogHeader> <DialogHeader>
<DialogTitle> <DialogTitle>
Configure Cluster{configCluster?.name ? `: ${configCluster.name}` : ""} Configure Cluster{configCluster?.name ? `: ${configCluster.name}` : ""}
</DialogTitle> </DialogTitle>
</DialogHeader> </DialogHeader>
{configCluster && ( {configCluster && (
<div className="space-y-6 py-2"> <div className="space-y-6 py-2">
{/* Cluster Actions */} {/* Cluster Actions */}