mirror of
https://github.com/GlueOps/autoglue.git
synced 2026-02-13 12:50:05 +01:00
chore: cleanup and route refactoring
Signed-off-by: allanice001 <allanice001@gmail.com>
This commit is contained in:
@@ -29,7 +29,7 @@ import (
|
||||
)
|
||||
|
||||
type DbBackupArgs struct {
|
||||
// kept in case you want to change retention or add dry-run later
|
||||
IntervalS int `json:"interval_seconds,omitempty"`
|
||||
}
|
||||
|
||||
type s3Scope struct {
|
||||
@@ -44,6 +44,13 @@ type encAWS struct {
|
||||
|
||||
func DbBackupWorker(db *gorm.DB, jobs *Jobs) archer.WorkerFn {
|
||||
return func(ctx context.Context, j job.Job) (any, error) {
|
||||
args := DbBackupArgs{IntervalS: 3600}
|
||||
_ = j.ParseArguments(&args)
|
||||
|
||||
if args.IntervalS <= 0 {
|
||||
args.IntervalS = 3600
|
||||
}
|
||||
|
||||
if err := DbBackup(ctx, db); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -53,7 +60,7 @@ func DbBackupWorker(db *gorm.DB, jobs *Jobs) archer.WorkerFn {
|
||||
queue = "db_backup_s3"
|
||||
}
|
||||
|
||||
next := time.Now().UTC().Add(1 * time.Hour)
|
||||
next := time.Now().Add(time.Duration(args.IntervalS) * time.Second)
|
||||
|
||||
payload := DbBackupArgs{}
|
||||
|
||||
@@ -73,7 +80,6 @@ func DbBackupWorker(db *gorm.DB, jobs *Jobs) archer.WorkerFn {
|
||||
|
||||
func DbBackup(ctx context.Context, db *gorm.DB) error {
|
||||
cfg, err := config.Load()
|
||||
log.Info().Err(err).Msg("loading config")
|
||||
if err != nil {
|
||||
return fmt.Errorf("load config: %w", err)
|
||||
}
|
||||
|
||||
@@ -2,8 +2,8 @@ package bg
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"fmt"
|
||||
"log"
|
||||
"net"
|
||||
"strings"
|
||||
"time"
|
||||
@@ -13,13 +13,16 @@ import (
|
||||
"github.com/glueops/autoglue/internal/models"
|
||||
"github.com/glueops/autoglue/internal/utils"
|
||||
"github.com/google/uuid"
|
||||
"github.com/rs/zerolog/log"
|
||||
"golang.org/x/crypto/ssh"
|
||||
"gorm.io/gorm"
|
||||
)
|
||||
|
||||
// ----- Public types -----
|
||||
|
||||
type BastionBootstrapArgs struct{}
|
||||
type BastionBootstrapArgs struct {
|
||||
IntervalS int `json:"interval_seconds,omitempty"`
|
||||
}
|
||||
|
||||
type BastionBootstrapFailure struct {
|
||||
ID uuid.UUID `json:"id"`
|
||||
@@ -39,11 +42,17 @@ type BastionBootstrapResult struct {
|
||||
|
||||
// ----- Worker -----
|
||||
|
||||
func BastionBootstrapWorker(db *gorm.DB) archer.WorkerFn {
|
||||
func BastionBootstrapWorker(db *gorm.DB, jobs *Jobs) archer.WorkerFn {
|
||||
return func(ctx context.Context, j job.Job) (any, error) {
|
||||
args := BastionBootstrapArgs{IntervalS: 120}
|
||||
jobID := j.ID
|
||||
start := time.Now()
|
||||
|
||||
_ = j.ParseArguments(&args)
|
||||
if args.IntervalS <= 0 {
|
||||
args.IntervalS = 120
|
||||
}
|
||||
|
||||
var servers []models.Server
|
||||
if err := db.
|
||||
Preload("SshKey").
|
||||
@@ -105,7 +114,7 @@ func BastionBootstrapWorker(db *gorm.DB) archer.WorkerFn {
|
||||
// 4) SSH + install docker
|
||||
host := net.JoinHostPort(*s.PublicIPAddress, "22")
|
||||
runCtx, cancel := context.WithTimeout(ctx, perHostTimeout)
|
||||
out, err := sshInstallDockerWithOutput(runCtx, host, s.SSHUser, []byte(privKey))
|
||||
out, err := sshInstallDockerWithOutput(runCtx, db, s, host, s.SSHUser, []byte(privKey))
|
||||
cancel()
|
||||
|
||||
if err != nil {
|
||||
@@ -147,9 +156,17 @@ func BastionBootstrapWorker(db *gorm.DB) archer.WorkerFn {
|
||||
Failures: failures,
|
||||
}
|
||||
|
||||
// log.Printf("[bastion] level=INFO job=%s step=finish processed=%d ready=%d failed=%d elapsed_ms=%d",
|
||||
// jobID, proc, ok, fail, res.ElapsedMs)
|
||||
log.Debug().Int("processed", proc).Int("ready", ok).Int("failed", fail).Msg("[bastion] reconcile tick ok")
|
||||
|
||||
next := time.Now().Add(time.Duration(args.IntervalS) * time.Second)
|
||||
_, _ = jobs.Enqueue(
|
||||
ctx,
|
||||
uuid.NewString(),
|
||||
"bootstrap_bastion",
|
||||
args,
|
||||
archer.WithScheduleTime(next),
|
||||
archer.WithMaxRetries(1),
|
||||
)
|
||||
return res, nil
|
||||
}
|
||||
}
|
||||
@@ -187,16 +204,24 @@ func logHostInfo(jobID string, s *models.Server, step, msg string, kv ...any) {
|
||||
// ----- SSH & command execution -----
|
||||
|
||||
// returns combined stdout/stderr so caller can log it on error
|
||||
func sshInstallDockerWithOutput(ctx context.Context, host, user string, privateKeyPEM []byte) (string, error) {
|
||||
func sshInstallDockerWithOutput(
|
||||
ctx context.Context,
|
||||
db *gorm.DB,
|
||||
s *models.Server,
|
||||
host, user string,
|
||||
privateKeyPEM []byte,
|
||||
) (string, error) {
|
||||
signer, err := ssh.ParsePrivateKey(privateKeyPEM)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("parse private key: %w", err)
|
||||
}
|
||||
|
||||
hkcb := makeDBHostKeyCallback(db, s)
|
||||
|
||||
config := &ssh.ClientConfig{
|
||||
User: user,
|
||||
Auth: []ssh.AuthMethod{ssh.PublicKeys(signer)},
|
||||
HostKeyCallback: ssh.InsecureIgnoreHostKey(), // TODO: known_hosts verification
|
||||
HostKeyCallback: hkcb,
|
||||
Timeout: 30 * time.Second,
|
||||
}
|
||||
|
||||
@@ -494,3 +519,38 @@ func wrapSSHError(err error, output string) error {
|
||||
func sshEscape(s string) string {
|
||||
return fmt.Sprintf("%q", s)
|
||||
}
|
||||
|
||||
// makeDBHostKeyCallback returns a HostKeyCallback bound to a specific server row.
|
||||
// TOFU semantics:
|
||||
// - If s.SSHHostKey is empty: store the current key in DB and accept.
|
||||
// - If s.SSHHostKey is set: require exact match, else error (possible MITM/reinstall).
|
||||
func makeDBHostKeyCallback(db *gorm.DB, s *models.Server) ssh.HostKeyCallback {
|
||||
return func(hostname string, remote net.Addr, key ssh.PublicKey) error {
|
||||
algo := key.Type()
|
||||
enc := base64.StdEncoding.EncodeToString(key.Marshal())
|
||||
|
||||
// First-time connect: persist key (TOFU).
|
||||
if s.SSHHostKey == "" {
|
||||
if err := db.Model(&models.Server{}).
|
||||
Where("id = ? AND (ssh_host_key IS NULL or ssh_host_key = '')", s.ID).
|
||||
Updates(map[string]any{
|
||||
"ssh_host_key": enc,
|
||||
"ssh_host_key_algo": algo,
|
||||
}).Error; err != nil {
|
||||
return fmt.Errorf("store new host key for %s (%s): %w", hostname, s.ID, err)
|
||||
}
|
||||
|
||||
s.SSHHostKey = enc
|
||||
s.SSHHostKeyAlgo = algo
|
||||
return nil
|
||||
}
|
||||
|
||||
if s.SSHHostKeyAlgo != algo || s.SSHHostKey != enc {
|
||||
return fmt.Errorf(
|
||||
"host key mismatch for %s (server_id=%s, stored=%s/%s, got=%s/%s) - POSSIBLE MITM or host reinstalled",
|
||||
hostname, s.ID, s.SSHHostKeyAlgo, s.SSHHostKey, algo, enc,
|
||||
)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
@@ -75,7 +75,7 @@ func NewJobs(gdb *gorm.DB, dbUrl string) (*Jobs, error) {
|
||||
|
||||
c.Register(
|
||||
"bootstrap_bastion",
|
||||
BastionBootstrapWorker(gdb),
|
||||
BastionBootstrapWorker(gdb, jobs),
|
||||
archer.WithInstances(instances),
|
||||
archer.WithTimeout(time.Duration(timeoutSec)*time.Second),
|
||||
)
|
||||
@@ -100,6 +100,13 @@ func NewJobs(gdb *gorm.DB, dbUrl string) (*Jobs, error) {
|
||||
archer.WithInstances(1),
|
||||
archer.WithTimeout(15*time.Minute),
|
||||
)
|
||||
|
||||
c.Register(
|
||||
"dns_reconcile",
|
||||
DNSReconsileWorker(gdb, jobs),
|
||||
archer.WithInstances(1),
|
||||
archer.WithTimeout(2*time.Minute),
|
||||
)
|
||||
return jobs, nil
|
||||
}
|
||||
|
||||
|
||||
597
internal/bg/dns.go
Normal file
597
internal/bg/dns.go
Normal file
@@ -0,0 +1,597 @@
|
||||
package bg
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/dyaksa/archer"
|
||||
"github.com/dyaksa/archer/job"
|
||||
"github.com/glueops/autoglue/internal/handlers/dto"
|
||||
"github.com/glueops/autoglue/internal/models"
|
||||
"github.com/glueops/autoglue/internal/utils"
|
||||
"github.com/google/uuid"
|
||||
"github.com/rs/zerolog/log"
|
||||
"gorm.io/gorm"
|
||||
|
||||
"github.com/aws/aws-sdk-go-v2/aws"
|
||||
"github.com/aws/aws-sdk-go-v2/config"
|
||||
"github.com/aws/aws-sdk-go-v2/credentials"
|
||||
r53 "github.com/aws/aws-sdk-go-v2/service/route53"
|
||||
r53types "github.com/aws/aws-sdk-go-v2/service/route53/types"
|
||||
)
|
||||
|
||||
/************* args & small DTOs *************/
|
||||
|
||||
type DNSReconcileArgs struct {
|
||||
MaxDomains int `json:"max_domains,omitempty"`
|
||||
MaxRecords int `json:"max_records,omitempty"`
|
||||
IntervalS int `json:"interval_seconds,omitempty"`
|
||||
}
|
||||
|
||||
// TXT marker content (compact)
|
||||
type ownershipMarker struct {
|
||||
Ver string `json:"v"` // "ag1"
|
||||
Org string `json:"org"` // org UUID
|
||||
Rec string `json:"rec"` // record UUID
|
||||
Fp string `json:"fp"` // short fp (first 16 of sha256)
|
||||
}
|
||||
|
||||
// ExternalDNS poison owner id – MUST NOT match any real external-dns --txt-owner-id
|
||||
const externalDNSPoisonOwner = "autoglue-lock"
|
||||
|
||||
// ExternalDNS poison content – fake owner so real external-dns skips it.
|
||||
const externalDNSPoisonValue = "heritage=external-dns,external-dns/owner=" + externalDNSPoisonOwner + ",external-dns/resource=manual/autoglue"
|
||||
|
||||
/************* entrypoint worker *************/
|
||||
|
||||
func DNSReconsileWorker(db *gorm.DB, jobs *Jobs) archer.WorkerFn {
|
||||
return func(ctx context.Context, j job.Job) (any, error) {
|
||||
args := DNSReconcileArgs{MaxDomains: 25, MaxRecords: 100, IntervalS: 30}
|
||||
_ = j.ParseArguments(&args)
|
||||
|
||||
if args.MaxDomains <= 0 {
|
||||
args.MaxDomains = 25
|
||||
}
|
||||
if args.MaxRecords <= 0 {
|
||||
args.MaxRecords = 100
|
||||
}
|
||||
if args.IntervalS <= 0 {
|
||||
args.IntervalS = 30
|
||||
}
|
||||
|
||||
processedDomains, processedRecords, err := reconcileDNSOnce(ctx, db, args)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msg("[dns] reconcile tick failed")
|
||||
} else {
|
||||
log.Debug().
|
||||
Int("domains", processedDomains).
|
||||
Int("records", processedRecords).
|
||||
Msg("[dns] reconcile tick ok")
|
||||
}
|
||||
|
||||
next := time.Now().Add(time.Duration(args.IntervalS) * time.Second)
|
||||
_, _ = jobs.Enqueue(ctx, uuid.NewString(), "dns_reconcile", args,
|
||||
archer.WithScheduleTime(next),
|
||||
archer.WithMaxRetries(1),
|
||||
)
|
||||
|
||||
return map[string]any{
|
||||
"domains_processed": processedDomains,
|
||||
"records_processed": processedRecords,
|
||||
}, nil
|
||||
}
|
||||
}
|
||||
|
||||
/************* core tick *************/
|
||||
|
||||
func reconcileDNSOnce(ctx context.Context, db *gorm.DB, args DNSReconcileArgs) (int, int, error) {
|
||||
var domains []models.Domain
|
||||
|
||||
// 1) validate/backfill pending domains
|
||||
if err := db.
|
||||
Where("status = ?", "pending").
|
||||
Order("created_at ASC").
|
||||
Limit(args.MaxDomains).
|
||||
Find(&domains).Error; err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
|
||||
domainsProcessed := 0
|
||||
for i := range domains {
|
||||
if err := processDomain(ctx, db, &domains[i]); err != nil {
|
||||
log.Error().Err(err).Str("domain", domains[i].DomainName).Msg("[dns] domain processing failed")
|
||||
} else {
|
||||
domainsProcessed++
|
||||
}
|
||||
}
|
||||
|
||||
// 2) apply pending record sets for ready domains
|
||||
var readyDomains []models.Domain
|
||||
if err := db.Where("status = ?", "ready").Find(&readyDomains).Error; err != nil {
|
||||
return domainsProcessed, 0, err
|
||||
}
|
||||
|
||||
recordsProcessed := 0
|
||||
for i := range readyDomains {
|
||||
n, err := processPendingRecordsForDomain(ctx, db, &readyDomains[i], args.MaxRecords)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Str("domain", readyDomains[i].DomainName).Msg("[dns] record processing failed")
|
||||
continue
|
||||
}
|
||||
recordsProcessed += n
|
||||
}
|
||||
|
||||
return domainsProcessed, recordsProcessed, nil
|
||||
}
|
||||
|
||||
/************* domain processing *************/
|
||||
|
||||
func processDomain(ctx context.Context, db *gorm.DB, d *models.Domain) error {
|
||||
orgID := d.OrganizationID
|
||||
|
||||
// 1) Load credential (org-guarded)
|
||||
var cred models.Credential
|
||||
if err := db.Where("id = ? AND organization_id = ?", d.CredentialID, orgID).First(&cred).Error; err != nil {
|
||||
return setDomainFailed(db, d, fmt.Errorf("credential not found: %w", err))
|
||||
}
|
||||
|
||||
// 2) Decrypt → dto.AWSCredential
|
||||
secret, err := utils.DecryptForOrg(orgID, cred.EncryptedData, cred.IV, cred.Tag, db)
|
||||
if err != nil {
|
||||
return setDomainFailed(db, d, fmt.Errorf("decrypt: %w", err))
|
||||
}
|
||||
var awsCred dto.AWSCredential
|
||||
if err := jsonUnmarshalStrict([]byte(secret), &awsCred); err != nil {
|
||||
return setDomainFailed(db, d, fmt.Errorf("secret decode: %w", err))
|
||||
}
|
||||
|
||||
// 3) Client
|
||||
r53c, _, err := newRoute53Client(ctx, awsCred)
|
||||
if err != nil {
|
||||
return setDomainFailed(db, d, err)
|
||||
}
|
||||
|
||||
// 4) Backfill zone id if missing
|
||||
zoneID := strings.TrimSpace(d.ZoneID)
|
||||
if zoneID == "" {
|
||||
zid, err := findHostedZoneID(ctx, r53c, d.DomainName)
|
||||
if err != nil {
|
||||
return setDomainFailed(db, d, fmt.Errorf("discover zone id: %w", err))
|
||||
}
|
||||
zoneID = zid
|
||||
d.ZoneID = zoneID
|
||||
}
|
||||
|
||||
// 5) Sanity: can fetch zone
|
||||
if _, err := r53c.GetHostedZone(ctx, &r53.GetHostedZoneInput{Id: aws.String(zoneID)}); err != nil {
|
||||
return setDomainFailed(db, d, fmt.Errorf("get hosted zone: %w", err))
|
||||
}
|
||||
|
||||
// 6) Mark ready
|
||||
d.Status = "ready"
|
||||
d.LastError = ""
|
||||
if err := db.Save(d).Error; err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func setDomainFailed(db *gorm.DB, d *models.Domain, cause error) error {
|
||||
d.Status = "failed"
|
||||
d.LastError = truncateErr(cause.Error())
|
||||
_ = db.Save(d).Error
|
||||
return cause
|
||||
}
|
||||
|
||||
/************* record processing *************/
|
||||
|
||||
func processPendingRecordsForDomain(ctx context.Context, db *gorm.DB, d *models.Domain, max int) (int, error) {
|
||||
orgID := d.OrganizationID
|
||||
|
||||
// reload credential
|
||||
var cred models.Credential
|
||||
if err := db.Where("id = ? AND organization_id = ?", d.CredentialID, orgID).First(&cred).Error; err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
secret, err := utils.DecryptForOrg(orgID, cred.EncryptedData, cred.IV, cred.Tag, db)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
var awsCred dto.AWSCredential
|
||||
if err := jsonUnmarshalStrict([]byte(secret), &awsCred); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
r53c, _, err := newRoute53Client(ctx, awsCred)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
var records []models.RecordSet
|
||||
if err := db.
|
||||
Where("domain_id = ? AND status = ?", d.ID, "pending").
|
||||
Order("created_at ASC").
|
||||
Limit(max).
|
||||
Find(&records).Error; err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
applied := 0
|
||||
for i := range records {
|
||||
if err := applyRecord(ctx, db, r53c, d, &records[i]); err != nil {
|
||||
log.Error().Err(err).Str("rr", records[i].Name).Msg("[dns] apply record failed")
|
||||
_ = setRecordFailed(db, &records[i], err)
|
||||
continue
|
||||
}
|
||||
applied++
|
||||
}
|
||||
return applied, nil
|
||||
}
|
||||
|
||||
// core write + ownership + external-dns hardening
|
||||
|
||||
func applyRecord(ctx context.Context, db *gorm.DB, r53c *r53.Client, d *models.Domain, r *models.RecordSet) error {
|
||||
zoneID := strings.TrimSpace(d.ZoneID)
|
||||
if zoneID == "" {
|
||||
return errors.New("domain has no zone_id")
|
||||
}
|
||||
|
||||
rt := strings.ToUpper(r.Type)
|
||||
|
||||
// FQDN & marker
|
||||
fq := recordFQDN(r.Name, d.DomainName) // ends with "."
|
||||
mname := markerName(fq)
|
||||
expected := buildMarkerValue(d.OrganizationID.String(), r.ID.String(), r.Fingerprint)
|
||||
|
||||
// ---- ExternalDNS preflight ----
|
||||
extOwned, err := hasExternalDNSOwnership(ctx, r53c, zoneID, fq, rt)
|
||||
if err != nil {
|
||||
return fmt.Errorf("external_dns_lookup: %w", err)
|
||||
}
|
||||
if extOwned {
|
||||
r.Owner = "external"
|
||||
_ = db.Save(r).Error
|
||||
return fmt.Errorf("ownership_conflict: external-dns claims %s; refusing to modify", strings.TrimSuffix(fq, "."))
|
||||
}
|
||||
|
||||
// ---- Autoglue ownership preflight via _autoglue.<fqdn> TXT ----
|
||||
markerVals, err := getMarkerTXTValues(ctx, r53c, zoneID, mname)
|
||||
if err != nil {
|
||||
return fmt.Errorf("marker lookup: %w", err)
|
||||
}
|
||||
hasForeignOwner := false
|
||||
hasOurExact := false
|
||||
for _, v := range markerVals {
|
||||
mk, ok := parseMarkerValue(v)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
switch {
|
||||
case mk.Org == d.OrganizationID.String() && mk.Rec == r.ID.String() && mk.Fp == shortFP(r.Fingerprint):
|
||||
hasOurExact = true
|
||||
case mk.Org != d.OrganizationID.String() || mk.Rec != r.ID.String():
|
||||
hasForeignOwner = true
|
||||
}
|
||||
}
|
||||
if hasForeignOwner {
|
||||
r.Owner = "external"
|
||||
_ = db.Save(r).Error
|
||||
return fmt.Errorf("ownership_conflict: marker for %s is owned by another controller; refusing to modify", strings.TrimSuffix(fq, "."))
|
||||
}
|
||||
|
||||
// Build RR change (UPSERT)
|
||||
rrChange := r53types.Change{
|
||||
Action: r53types.ChangeActionUpsert,
|
||||
ResourceRecordSet: &r53types.ResourceRecordSet{
|
||||
Name: aws.String(fq),
|
||||
Type: r53types.RRType(rt),
|
||||
},
|
||||
}
|
||||
|
||||
// Decode user values
|
||||
var userVals []string
|
||||
if len(r.Values) > 0 {
|
||||
if err := jsonUnmarshalStrict([]byte(r.Values), &userVals); err != nil {
|
||||
return fmt.Errorf("values decode: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Quote TXT values as required by Route53
|
||||
recs := make([]r53types.ResourceRecord, 0, len(userVals))
|
||||
for _, v := range userVals {
|
||||
v = strings.TrimSpace(v)
|
||||
if rt == "TXT" && !(strings.HasPrefix(v, `"`) && strings.HasSuffix(v, `"`)) {
|
||||
v = strconv.Quote(v)
|
||||
}
|
||||
recs = append(recs, r53types.ResourceRecord{Value: aws.String(v)})
|
||||
}
|
||||
rrChange.ResourceRecordSet.ResourceRecords = recs
|
||||
if r.TTL != nil {
|
||||
ttl := int64(*r.TTL)
|
||||
rrChange.ResourceRecordSet.TTL = aws.Int64(ttl)
|
||||
}
|
||||
|
||||
// Build marker TXT change (UPSERT)
|
||||
markerChange := r53types.Change{
|
||||
Action: r53types.ChangeActionUpsert,
|
||||
ResourceRecordSet: &r53types.ResourceRecordSet{
|
||||
Name: aws.String(mname),
|
||||
Type: r53types.RRTypeTxt,
|
||||
TTL: aws.Int64(300),
|
||||
ResourceRecords: []r53types.ResourceRecord{
|
||||
{Value: aws.String(strconv.Quote(expected))},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// Build external-dns poison TXT changes
|
||||
poisonChanges := buildExternalDNSPoisonTXTChanges(fq, rt)
|
||||
|
||||
// Apply all in one batch (atomic-ish)
|
||||
changes := []r53types.Change{rrChange, markerChange}
|
||||
changes = append(changes, poisonChanges...)
|
||||
|
||||
_, err = r53c.ChangeResourceRecordSets(ctx, &r53.ChangeResourceRecordSetsInput{
|
||||
HostedZoneId: aws.String(zoneID),
|
||||
ChangeBatch: &r53types.ChangeBatch{Changes: changes},
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Success → mark ready & ownership
|
||||
r.Status = "ready"
|
||||
r.LastError = ""
|
||||
r.Owner = "autoglue"
|
||||
if err := db.Save(r).Error; err != nil {
|
||||
return err
|
||||
}
|
||||
_ = hasOurExact // could be used to skip marker write in future
|
||||
return nil
|
||||
}
|
||||
|
||||
func setRecordFailed(db *gorm.DB, r *models.RecordSet, cause error) error {
|
||||
msg := truncateErr(cause.Error())
|
||||
r.Status = "failed"
|
||||
r.LastError = msg
|
||||
// classify ownership on conflict
|
||||
if strings.HasPrefix(msg, "ownership_conflict") {
|
||||
r.Owner = "external"
|
||||
} else if r.Owner == "" || r.Owner == "unknown" {
|
||||
r.Owner = "unknown"
|
||||
}
|
||||
_ = db.Save(r).Error
|
||||
return cause
|
||||
}
|
||||
|
||||
/************* AWS helpers *************/
|
||||
|
||||
func newRoute53Client(ctx context.Context, cred dto.AWSCredential) (*r53.Client, *aws.Config, error) {
|
||||
// Route53 is global, but config still wants a region
|
||||
region := strings.TrimSpace(cred.Region)
|
||||
if region == "" {
|
||||
region = "us-east-1"
|
||||
}
|
||||
cfg, err := config.LoadDefaultConfig(ctx,
|
||||
config.WithRegion(region),
|
||||
config.WithCredentialsProvider(credentials.NewStaticCredentialsProvider(
|
||||
cred.AccessKeyID, cred.SecretAccessKey, "",
|
||||
)),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
return r53.NewFromConfig(cfg), &cfg, nil
|
||||
}
|
||||
|
||||
func findHostedZoneID(ctx context.Context, c *r53.Client, domain string) (string, error) {
|
||||
d := normalizeDomain(domain)
|
||||
out, err := c.ListHostedZonesByName(ctx, &r53.ListHostedZonesByNameInput{
|
||||
DNSName: aws.String(d),
|
||||
})
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
for _, hz := range out.HostedZones {
|
||||
if strings.TrimSuffix(aws.ToString(hz.Name), ".") == d {
|
||||
return trimZoneID(aws.ToString(hz.Id)), nil
|
||||
}
|
||||
}
|
||||
return "", fmt.Errorf("hosted zone not found for %q", d)
|
||||
}
|
||||
|
||||
func trimZoneID(id string) string {
|
||||
return strings.TrimPrefix(id, "/hostedzone/")
|
||||
}
|
||||
|
||||
func normalizeDomain(s string) string {
|
||||
s = strings.TrimSpace(strings.ToLower(s))
|
||||
return strings.TrimSuffix(s, ".")
|
||||
}
|
||||
|
||||
func recordFQDN(name, domain string) string {
|
||||
name = strings.TrimSpace(name)
|
||||
if name == "" || name == "@" {
|
||||
return normalizeDomain(domain) + "."
|
||||
}
|
||||
if strings.HasSuffix(name, ".") {
|
||||
return name
|
||||
}
|
||||
return fmt.Sprintf("%s.%s.", name, normalizeDomain(domain))
|
||||
}
|
||||
|
||||
/************* TXT marker / external-dns helpers *************/
|
||||
|
||||
func markerName(fqdn string) string {
|
||||
trimmed := strings.TrimSuffix(fqdn, ".")
|
||||
return "_autoglue." + trimmed + "."
|
||||
}
|
||||
|
||||
func shortFP(full string) string {
|
||||
if len(full) > 16 {
|
||||
return full[:16]
|
||||
}
|
||||
return full
|
||||
}
|
||||
|
||||
func buildMarkerValue(orgID, recID, fp string) string {
|
||||
return "v=ag1 org=" + orgID + " rec=" + recID + " fp=" + shortFP(fp)
|
||||
}
|
||||
|
||||
func parseMarkerValue(s string) (ownershipMarker, bool) {
|
||||
out := ownershipMarker{}
|
||||
fields := strings.Fields(s)
|
||||
if len(fields) < 4 {
|
||||
return out, false
|
||||
}
|
||||
kv := map[string]string{}
|
||||
for _, f := range fields {
|
||||
parts := strings.SplitN(f, "=", 2)
|
||||
if len(parts) == 2 {
|
||||
kv[parts[0]] = parts[1]
|
||||
}
|
||||
}
|
||||
if kv["v"] == "" || kv["org"] == "" || kv["rec"] == "" || kv["fp"] == "" {
|
||||
return out, false
|
||||
}
|
||||
out.Ver, out.Org, out.Rec, out.Fp = kv["v"], kv["org"], kv["rec"], kv["fp"]
|
||||
return out, true
|
||||
}
|
||||
|
||||
func getMarkerTXTValues(ctx context.Context, c *r53.Client, zoneID, marker string) ([]string, error) {
|
||||
return getTXTValues(ctx, c, zoneID, marker)
|
||||
}
|
||||
|
||||
// generic TXT fetcher
|
||||
func getTXTValues(ctx context.Context, c *r53.Client, zoneID, name string) ([]string, error) {
|
||||
out, err := c.ListResourceRecordSets(ctx, &r53.ListResourceRecordSetsInput{
|
||||
HostedZoneId: aws.String(zoneID),
|
||||
StartRecordName: aws.String(name),
|
||||
StartRecordType: r53types.RRTypeTxt,
|
||||
MaxItems: aws.Int32(1),
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(out.ResourceRecordSets) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
rrset := out.ResourceRecordSets[0]
|
||||
if aws.ToString(rrset.Name) != name || rrset.Type != r53types.RRTypeTxt {
|
||||
return nil, nil
|
||||
}
|
||||
vals := make([]string, 0, len(rrset.ResourceRecords))
|
||||
for _, rr := range rrset.ResourceRecords {
|
||||
vals = append(vals, aws.ToString(rr.Value))
|
||||
}
|
||||
return vals, nil
|
||||
}
|
||||
|
||||
// detect external-dns-style ownership for this fqdn/type
|
||||
func hasExternalDNSOwnership(ctx context.Context, c *r53.Client, zoneID, fqdn, rrType string) (bool, error) {
|
||||
base := strings.TrimSuffix(fqdn, ".")
|
||||
candidates := []string{
|
||||
// with txtPrefix=extdns-, external-dns writes both:
|
||||
// extdns-<fqdn> and extdns-<rrtype-lc>-<fqdn>
|
||||
"extdns-" + base + ".",
|
||||
"extdns-" + strings.ToLower(rrType) + "-" + base + ".",
|
||||
}
|
||||
for _, name := range candidates {
|
||||
vals, err := getTXTValues(ctx, c, zoneID, name)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
for _, raw := range vals {
|
||||
v := strings.TrimSpace(raw)
|
||||
// strip surrounding quotes if present
|
||||
if len(v) >= 2 && v[0] == '"' && v[len(v)-1] == '"' {
|
||||
if unq, err := strconv.Unquote(v); err == nil {
|
||||
v = unq
|
||||
}
|
||||
}
|
||||
meta := parseExternalDNSMeta(v)
|
||||
if meta == nil {
|
||||
continue
|
||||
}
|
||||
if meta["heritage"] == "external-dns" &&
|
||||
meta["external-dns/owner"] != "" &&
|
||||
meta["external-dns/owner"] != externalDNSPoisonOwner {
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// parseExternalDNSMeta parses the comma-separated external-dns TXT format into a small map.
|
||||
func parseExternalDNSMeta(v string) map[string]string {
|
||||
parts := strings.Split(v, ",")
|
||||
if len(parts) == 0 {
|
||||
return nil
|
||||
}
|
||||
meta := make(map[string]string, len(parts))
|
||||
for _, p := range parts {
|
||||
p = strings.TrimSpace(p)
|
||||
if p == "" {
|
||||
continue
|
||||
}
|
||||
kv := strings.SplitN(p, "=", 2)
|
||||
if len(kv) != 2 {
|
||||
continue
|
||||
}
|
||||
meta[kv[0]] = kv[1]
|
||||
}
|
||||
if len(meta) == 0 {
|
||||
return nil
|
||||
}
|
||||
return meta
|
||||
}
|
||||
|
||||
// build poison TXT records so external-dns thinks some *other* owner manages this
|
||||
func buildExternalDNSPoisonTXTChanges(fqdn, rrType string) []r53types.Change {
|
||||
base := strings.TrimSuffix(fqdn, ".")
|
||||
names := []string{
|
||||
"extdns-" + base + ".",
|
||||
"extdns-" + strings.ToLower(rrType) + "-" + base + ".",
|
||||
}
|
||||
val := strconv.Quote(externalDNSPoisonValue)
|
||||
changes := make([]r53types.Change, 0, len(names))
|
||||
for _, n := range names {
|
||||
changes = append(changes, r53types.Change{
|
||||
Action: r53types.ChangeActionUpsert,
|
||||
ResourceRecordSet: &r53types.ResourceRecordSet{
|
||||
Name: aws.String(n),
|
||||
Type: r53types.RRTypeTxt,
|
||||
TTL: aws.Int64(300),
|
||||
ResourceRecords: []r53types.ResourceRecord{
|
||||
{Value: aws.String(val)},
|
||||
},
|
||||
},
|
||||
})
|
||||
}
|
||||
return changes
|
||||
}
|
||||
|
||||
/************* misc utils *************/
|
||||
|
||||
func truncateErr(s string) string {
|
||||
const max = 2000
|
||||
if len(s) > max {
|
||||
return s[:max]
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
// Strict unmarshal that treats "null" -> zero value correctly.
|
||||
func jsonUnmarshalStrict(b []byte, dst any) error {
|
||||
if len(b) == 0 {
|
||||
return errors.New("empty json")
|
||||
}
|
||||
return json.Unmarshal(b, dst)
|
||||
}
|
||||
Reference in New Issue
Block a user