fix: improve job tracking

Signed-off-by: allanice001 <allanice001@gmail.com>
This commit is contained in:
allanice001
2025-12-26 15:04:15 +00:00
parent 865270312c
commit 169283b6c7
2 changed files with 77 additions and 31 deletions

View File

@@ -36,6 +36,21 @@ func ClusterActionWorker(db *gorm.DB) archer.WorkerFn {
var args ClusterActionArgs
_ = j.ParseArguments(&args)
runID, _ := uuid.Parse(j.ID)
updateRun := func(status string, errMsg string) {
updates := map[string]any{
"status": status,
"error": errMsg,
}
if status == "succeeded" || status == "failed" {
updates["finised_at"] = time.Now().UTC()
}
db.Model(&models.ClusterRun{}).Where("id = ?", runID).Updates(updates)
}
updateRun("running", "")
logger := log.With().
Str("job", j.ID).
Str("cluster_id", args.ClusterID.String()).
@@ -56,18 +71,20 @@ func ClusterActionWorker(db *gorm.DB) archer.WorkerFn {
Preload("NodePools.Servers.SshKey").
Where("id = ? AND organization_id = ?", args.ClusterID, args.OrgID).
First(&c).Error; err != nil {
updateRun("failed", fmt.Errorf("load cluster: %w", err).Error())
return nil, fmt.Errorf("load cluster: %w", err)
}
// ---- Step 1: Prepare (mostly lifted from ClusterPrepareWorker)
if err := setClusterStatus(db, c.ID, clusterStatusBootstrapping, ""); err != nil {
updateRun("failed", err.Error())
return nil, fmt.Errorf("mark bootstrapping: %w", err)
}
c.Status = clusterStatusBootstrapping
if err := validateClusterForPrepare(&c); err != nil {
_ = setClusterStatus(db, c.ID, clusterStatusFailed, err.Error())
updateRun("failed", err.Error())
return nil, fmt.Errorf("validate: %w", err)
}
@@ -75,6 +92,7 @@ func ClusterActionWorker(db *gorm.DB) archer.WorkerFn {
keyPayloads, sshConfig, err := buildSSHAssetsForCluster(db, &c, allServers)
if err != nil {
_ = setClusterStatus(db, c.ID, clusterStatusFailed, err.Error())
updateRun("failed", err.Error())
return nil, fmt.Errorf("build ssh assets: %w", err)
}
@@ -98,6 +116,7 @@ func ClusterActionWorker(db *gorm.DB) archer.WorkerFn {
orgKey, orgSecret, err := findOrCreateClusterAutomationKey(db, c.OrganizationID, c.ID, 24*time.Hour)
if err != nil {
_ = setClusterStatus(db, c.ID, clusterStatusFailed, err.Error())
updateRun("failed", err.Error())
return nil, fmt.Errorf("org key: %w", err)
}
dtoCluster.OrgKey = &orgKey
@@ -106,6 +125,7 @@ func ClusterActionWorker(db *gorm.DB) archer.WorkerFn {
payloadJSON, err := json.MarshalIndent(dtoCluster, "", " ")
if err != nil {
_ = setClusterStatus(db, c.ID, clusterStatusFailed, err.Error())
updateRun("failed", err.Error())
return nil, fmt.Errorf("marshal payload: %w", err)
}
@@ -115,11 +135,13 @@ func ClusterActionWorker(db *gorm.DB) archer.WorkerFn {
cancel()
if err != nil {
_ = setClusterStatus(db, c.ID, clusterStatusFailed, err.Error())
updateRun("failed", err.Error())
return nil, fmt.Errorf("push assets: %w", err)
}
}
if err := setClusterStatus(db, c.ID, clusterStatusPending, ""); err != nil {
updateRun("failed", err.Error())
return nil, fmt.Errorf("mark pending: %w", err)
}
c.Status = clusterStatusPending
@@ -132,11 +154,13 @@ func ClusterActionWorker(db *gorm.DB) archer.WorkerFn {
if err != nil {
logger.Error().Err(err).Str("output", out).Msg("ping-servers failed")
_ = setClusterStatus(db, c.ID, clusterStatusFailed, fmt.Sprintf("make ping-servers: %v", err))
updateRun("failed", err.Error())
return nil, fmt.Errorf("ping-servers: %w", err)
}
}
if err := setClusterStatus(db, c.ID, clusterStatusProvisioning, ""); err != nil {
updateRun("failed", err.Error())
return nil, fmt.Errorf("mark provisioning: %w", err)
}
c.Status = clusterStatusProvisioning
@@ -149,13 +173,18 @@ func ClusterActionWorker(db *gorm.DB) archer.WorkerFn {
if err != nil {
logger.Error().Err(err).Str("output", out).Msg("bootstrap target failed")
_ = setClusterStatus(db, c.ID, clusterStatusFailed, fmt.Sprintf("make %s: %v", args.MakeTarget, err))
updateRun("failed", err.Error())
return nil, fmt.Errorf("make %s: %w", args.MakeTarget, err)
}
}
if err := setClusterStatus(db, c.ID, clusterStatusReady, ""); err != nil {
updateRun("failed", err.Error())
return nil, fmt.Errorf("mark ready: %w", err)
}
updateRun("succeeded", "")
return ClusterActionResult{
Status: "ok",
Action: args.Action,

View File

@@ -1,33 +1,51 @@
import { useEffect, useMemo, useState } from "react";
import { actionsApi } from "@/api/actions";
import { clustersApi } from "@/api/clusters";
import { dnsApi } from "@/api/dns";
import { loadBalancersApi } from "@/api/loadbalancers";
import { nodePoolsApi } from "@/api/node_pools";
import { serversApi } from "@/api/servers";
import type { DtoActionResponse, DtoClusterResponse, DtoClusterRunResponse, DtoDomainResponse, DtoLoadBalancerResponse, DtoNodePoolResponse, DtoRecordSetResponse, DtoServerResponse } from "@/sdk";
import { zodResolver } from "@hookform/resolvers/zod";
import { useMutation, useQuery, useQueryClient } from "@tanstack/react-query";
import { AlertCircle, CheckCircle2, CircleSlash2, FileCode2, Globe2, Loader2, MapPin, Pencil, Plus, Search, Server, Wrench } from "lucide-react";
import { useForm } from "react-hook-form";
import { toast } from "sonner";
import { z } from "zod";
import { truncateMiddle } from "@/lib/utils";
import { Badge } from "@/components/ui/badge";
import { Button } from "@/components/ui/button";
import { Dialog, DialogContent, DialogFooter, DialogHeader, DialogTitle, DialogTrigger } from "@/components/ui/dialog";
import { Form, FormControl, FormField, FormItem, FormLabel, FormMessage } from "@/components/ui/form";
import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label";
import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@/components/ui/select";
import { Table, TableBody, TableCell, TableHead, TableHeader, TableRow } from "@/components/ui/table";
import { Textarea } from "@/components/ui/textarea";
import { useEffect, useMemo, useState } from "react"
import { actionsApi } from "@/api/actions"
import { clustersApi } from "@/api/clusters"
import { dnsApi } from "@/api/dns"
import { loadBalancersApi } from "@/api/loadbalancers"
import { nodePoolsApi } from "@/api/node_pools"
import { serversApi } from "@/api/servers"
import type {
DtoActionResponse,
DtoClusterResponse,
DtoClusterRunResponse,
DtoDomainResponse,
DtoLoadBalancerResponse,
DtoNodePoolResponse,
DtoRecordSetResponse,
DtoServerResponse
} from "@/sdk"
import { zodResolver } from "@hookform/resolvers/zod"
import { useMutation, useQuery, useQueryClient } from "@tanstack/react-query"
import {
AlertCircle,
CheckCircle2,
CircleSlash2,
FileCode2,
Globe2,
Loader2,
MapPin,
Pencil,
Plus,
Search,
Server,
Wrench
} from "lucide-react"
import { useForm } from "react-hook-form"
import { toast } from "sonner"
import { z } from "zod"
import { truncateMiddle } from "@/lib/utils"
import { Badge } from "@/components/ui/badge"
import { Button } from "@/components/ui/button"
import { Dialog, DialogContent, DialogFooter, DialogHeader, DialogTitle, DialogTrigger } from "@/components/ui/dialog"
import { Form, FormControl, FormField, FormItem, FormLabel, FormMessage } from "@/components/ui/form"
import { Input } from "@/components/ui/input"
import { Label } from "@/components/ui/label"
import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@/components/ui/select"
import { Table, TableBody, TableCell, TableHead, TableHeader, TableRow } from "@/components/ui/table"
import { Textarea } from "@/components/ui/textarea"
// --- Schemas ---
@@ -946,13 +964,12 @@ export const ClustersPage = () => {
{/* Configure dialog (attachments + kubeconfig + node pools + actions/runs) */}
<Dialog open={!!configCluster} onOpenChange={(open) => !open && setConfigCluster(null)}>
<DialogContent className="max-h-[90vh] w-full max-w-3xl overflow-y-auto">
<DialogContent className="max-h-[90vh] overflow-y-auto sm:max-w-2xl lg:max-w-250 ">
<DialogHeader>
<DialogTitle>
Configure Cluster{configCluster?.name ? `: ${configCluster.name}` : ""}
</DialogTitle>
</DialogHeader>
{configCluster && (
<div className="space-y-6 py-2">
{/* Cluster Actions */}