Skip to content

Commit 7b2f497

Browse files
authored
Merge pull request #121 from LNVPS/copilot/fix-deleted-vms-issue
Fix deleted VMs: race condition in cleanup and invoice/payment guards for deleted VMs
2 parents 65f0c64 + a6c0b5a commit 7b2f497

5 files changed

Lines changed: 157 additions & 24 deletions

File tree

lnvps_api/src/api/routes.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1308,6 +1308,9 @@ async fn get_user_vm(auth: &Nip98Auth, this: &RouterState, id: u64) -> Result<(u
13081308
if uid != vm.user_id {
13091309
return Err(ApiError::new("VM does not belong to you"));
13101310
}
1311+
if vm.deleted {
1312+
return Err(ApiError::new("VM not found"));
1313+
}
13111314
Ok((uid, vm))
13121315
}
13131316

lnvps_api/src/provisioner/rollback_tests.rs

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -496,12 +496,11 @@ mod tests {
496496
provisioner.delete_vm(vm_id).await?;
497497

498498
// Verify complete cleanup:
499-
// Note: MockDb hard-deletes VMs, so we can't verify VM.deleted flag
500-
// In production, the VM would be soft-deleted (deleted = true)
499+
// VM should be soft-deleted (deleted = true), matching production MySQL behavior
501500

502-
// 1. VM should no longer be accessible (MockDb hard-delete)
503-
let vm_get_result = db.get_vm(vm_id).await;
504-
assert!(vm_get_result.is_err(), "VM should be deleted from MockDb");
501+
// 1. VM should be soft-deleted
502+
let vm_after = db.get_vm(vm_id).await?;
503+
assert!(vm_after.deleted, "VM should be deleted from MockDb");
505504

506505
// 2. IPs should be soft-deleted with refs cleared
507506
let ips_after = db.list_vm_ip_assignments(vm_id).await?;
@@ -667,9 +666,9 @@ mod tests {
667666
let result = provisioner.delete_vm(vm_id).await;
668667
assert!(result.is_ok(), "Delete should succeed");
669668

670-
// Verify VM is deleted (MockDb hard-deletes)
671-
let vm_get_result = db.get_vm(vm_id).await;
672-
assert!(vm_get_result.is_err(), "VM should be deleted from MockDb");
669+
// Verify VM is soft-deleted (matching production MySQL behavior)
670+
let vm_after = db.get_vm(vm_id).await?;
671+
assert!(vm_after.deleted, "VM should be deleted from MockDb");
673672

674673
// Verify IP assignments are marked as deleted
675674
let ips_after = db.list_vm_ip_assignments(vm_id).await?;

lnvps_api/src/worker.rs

Lines changed: 131 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -487,14 +487,35 @@ impl Worker {
487487

488488
// Process deletions first
489489
for vm in vms_to_delete {
490-
info!("Deleting unpaid VM {}", vm.id);
491-
if let Err(e) = self.provisioner.delete_vm(vm.id).await {
492-
error!("Failed to delete unpaid VM {}: {}", vm.id, e);
493-
self.queue_admin_notification(
494-
format!("Failed to delete unpaid VM {}:\n{}", vm.id, e),
495-
Some(format!("VM {} Deletion Failed", vm.id)),
496-
)
497-
.await
490+
// Re-read the VM from the database to guard against a race condition where a
491+
// payment was confirmed between the initial list_vms() snapshot and now.
492+
// Only proceed with deletion if the VM is still in the unpaid (new) state.
493+
match self.db.get_vm(vm.id).await {
494+
Ok(current_vm) if current_vm.created == current_vm.expires => {
495+
info!("Deleting unpaid VM {}", vm.id);
496+
if let Err(e) = self.provisioner.delete_vm(vm.id).await {
497+
error!("Failed to delete unpaid VM {}: {}", vm.id, e);
498+
self.queue_admin_notification(
499+
format!("Failed to delete unpaid VM {}:\n{}", vm.id, e),
500+
Some(format!("VM {} Deletion Failed", vm.id)),
501+
)
502+
.await
503+
}
504+
}
505+
Ok(_) => {
506+
info!(
507+
"VM {} was paid since last check, skipping deletion",
508+
vm.id
509+
);
510+
}
511+
Err(e) => {
512+
error!("Failed to re-read VM {} before deletion: {}", vm.id, e);
513+
self.queue_admin_notification(
514+
format!("Failed to re-read VM {} before deletion:\n{}", vm.id, e),
515+
Some(format!("VM {} Pre-Deletion Read Failed", vm.id)),
516+
)
517+
.await
518+
}
498519
}
499520
}
500521

@@ -2350,3 +2371,105 @@ impl Worker {
23502371
Ok(())
23512372
}
23522373
}
2374+
2375+
#[cfg(test)]
2376+
mod tests {
2377+
use super::*;
2378+
use crate::mocks::{MockDnsServer, MockNode};
2379+
use crate::settings::mock_settings;
2380+
use crate::provisioner::LNVpsProvisioner;
2381+
use lnvps_api_common::{MockDb, MockExchangeRate};
2382+
use lnvps_db::{LNVpsDbBase, UserSshKey, Vm};
2383+
2384+
async fn setup_worker(db: Arc<MockDb>) -> Result<Worker> {
2385+
let settings = mock_settings();
2386+
let node = Arc::new(MockNode::default());
2387+
let rates = Arc::new(MockExchangeRate::new());
2388+
let dns = MockDnsServer::new();
2389+
let provisioner = Arc::new(LNVpsProvisioner::new(
2390+
settings.clone(),
2391+
db.clone(),
2392+
node,
2393+
rates,
2394+
Some(Arc::new(dns)),
2395+
));
2396+
let cache = VmStateCache::new();
2397+
Worker::new(db, provisioner, &settings, cache, None).await
2398+
}
2399+
2400+
async fn add_vm_with_state(
2401+
db: &Arc<MockDb>,
2402+
created: DateTime<Utc>,
2403+
expires: DateTime<Utc>,
2404+
) -> Result<Vm> {
2405+
let pubkey: [u8; 32] = rand::random();
2406+
let user_id = db.upsert_user(&pubkey).await?;
2407+
let ssh_key_id = db
2408+
.insert_user_ssh_key(&UserSshKey {
2409+
id: 0,
2410+
name: "test".to_string(),
2411+
user_id,
2412+
created: Utc::now(),
2413+
key_data: "ssh-rsa AAA==".into(),
2414+
})
2415+
.await?;
2416+
let vm = Vm {
2417+
id: 0,
2418+
host_id: 1,
2419+
user_id,
2420+
image_id: 1,
2421+
template_id: Some(1),
2422+
custom_template_id: None,
2423+
ssh_key_id,
2424+
created,
2425+
expires,
2426+
disk_id: 1,
2427+
mac_address: "ff:ff:ff:ff:ff:ff".to_string(),
2428+
deleted: false,
2429+
ref_code: None,
2430+
auto_renewal_enabled: false,
2431+
disabled: false,
2432+
};
2433+
let vm_id = db.insert_vm(&vm).await?;
2434+
Ok(db.get_vm(vm_id).await?)
2435+
}
2436+
2437+
/// An unpaid VM (created == expires) that is older than 1 hour must be deleted by check_vms.
2438+
#[tokio::test]
2439+
async fn test_check_vms_deletes_unpaid_vm_after_one_hour() -> Result<()> {
2440+
let db = Arc::new(MockDb::default());
2441+
let old = Utc::now().sub(TimeDelta::hours(2));
2442+
let vm = add_vm_with_state(&db, old, old).await?;
2443+
let vm_id = vm.id;
2444+
2445+
let worker = setup_worker(db.clone()).await?;
2446+
worker.check_vms().await?;
2447+
2448+
// VM should be soft-deleted
2449+
let vms = db.vms.lock().await;
2450+
let deleted = vms.get(&vm_id).map(|v| v.deleted).unwrap_or(false);
2451+
assert!(deleted, "Unpaid VM older than 1 hour should be deleted");
2452+
Ok(())
2453+
}
2454+
2455+
/// An unpaid VM that was created less than 1 hour ago must NOT be deleted by check_vms.
2456+
#[tokio::test]
2457+
async fn test_check_vms_skips_unpaid_vm_within_one_hour() -> Result<()> {
2458+
let db = Arc::new(MockDb::default());
2459+
let recent = Utc::now().sub(TimeDelta::minutes(30));
2460+
let vm = add_vm_with_state(&db, recent, recent).await?;
2461+
let vm_id = vm.id;
2462+
2463+
let worker = setup_worker(db.clone()).await?;
2464+
worker.check_vms().await?;
2465+
2466+
// VM should still be present and not deleted
2467+
let vms = db.vms.lock().await;
2468+
let deleted = vms.get(&vm_id).map(|v| v.deleted).unwrap_or(true);
2469+
assert!(
2470+
!deleted,
2471+
"Unpaid VM younger than 1 hour should not be deleted"
2472+
);
2473+
Ok(())
2474+
}
2475+
}

lnvps_api_common/src/mock.rs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -661,7 +661,9 @@ impl LNVpsDbBase for MockDb {
661661

662662
async fn delete_vm(&self, vm_id: u64) -> DbResult<()> {
663663
let mut vms = self.vms.lock().await;
664-
vms.remove(&vm_id);
664+
if let Some(vm) = vms.get_mut(&vm_id) {
665+
vm.deleted = true;
666+
}
665667
Ok(())
666668
}
667669

@@ -834,8 +836,10 @@ impl LNVpsDbBase for MockDb {
834836
p.is_paid = true;
835837
p.paid_at = Some(Utc::now());
836838
}
837-
if let Some(v) = v.get_mut(&payment.vm_id) {
838-
v.expires = v.expires.add(TimeDelta::seconds(payment.time_value as i64));
839+
if let Some(vm) = v.get_mut(&payment.vm_id) {
840+
// Un-delete the VM if it was deleted (e.g. auto-cleaned up before payment arrived)
841+
vm.deleted = false;
842+
vm.expires = vm.expires.add(TimeDelta::seconds(payment.time_value as i64));
839843
}
840844
Ok(())
841845
}

lnvps_db/src/mysql.rs

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -799,11 +799,15 @@ impl LNVpsDbBase for LNVpsDbMysql {
799799
.execute(&mut *tx)
800800
.await?;
801801

802-
sqlx::query("update vm set expires = TIMESTAMPADD(SECOND, ?, expires) where id = ?")
803-
.bind(vm_payment.time_value)
804-
.bind(vm_payment.vm_id)
805-
.execute(&mut *tx)
806-
.await?;
802+
// Un-delete the VM if it was deleted (e.g. auto-cleaned up before payment arrived)
803+
// and extend its expiry. This handles payment methods with longer timeouts.
804+
sqlx::query(
805+
"update vm set expires = TIMESTAMPADD(SECOND, ?, expires), deleted = 0 where id = ?",
806+
)
807+
.bind(vm_payment.time_value)
808+
.bind(vm_payment.vm_id)
809+
.execute(&mut *tx)
810+
.await?;
807811

808812
tx.commit().await?;
809813
Ok(())

0 commit comments

Comments
 (0)