From b4fbf510019dd86e83dfea68c1b0593968aa0f27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20du=20Hamel?= Date: Mon, 1 Jun 2026 20:22:26 +0200 Subject: [PATCH 1/3] refactor: img-cond->img_uncond --- src/guidance.cpp | 37 +++++----- src/guidance.h | 10 +-- src/stable-diffusion.cpp | 146 +++++++++++++++++++++------------------ 3 files changed, 101 insertions(+), 92 deletions(-) diff --git a/src/guidance.cpp b/src/guidance.cpp index 3cfc36792..7da833fc2 100644 --- a/src/guidance.cpp +++ b/src/guidance.cpp @@ -82,17 +82,18 @@ namespace sd::guidance { output.pred = pred_cond; if (has_tensor(input.pred_uncond)) { const sd::Tensor& pred_uncond = *input.pred_uncond; - if (has_tensor(input.pred_img_cond)) { - const sd::Tensor& pred_img_cond = *input.pred_img_cond; - output.pred = pred_uncond + - image_guidance_scale_ * (pred_img_cond - pred_uncond) + - guidance_scale_ * (pred_cond - pred_img_cond); + if (has_tensor(input.pred_img_uncond)) { + const sd::Tensor& pred_img_uncond = *input.pred_img_uncond; + output.pred = pred_img_uncond + + image_guidance_scale_ * (pred_uncond - pred_img_uncond) + + guidance_scale_ * (pred_cond - pred_uncond); + } else { output.pred = pred_uncond + guidance_scale_ * (pred_cond - pred_uncond); } - } else if (has_tensor(input.pred_img_cond)) { - const sd::Tensor& pred_img_cond = *input.pred_img_cond; - output.pred = pred_img_cond + guidance_scale_ * (pred_cond - pred_img_cond); + } else if (has_tensor(input.pred_img_uncond)) { + const sd::Tensor& pred_img_uncond = *input.pred_img_uncond; + output.pred = pred_img_uncond + guidance_scale_ * (pred_cond - pred_img_uncond); } return output; @@ -139,24 +140,24 @@ namespace sd::guidance { output.pred = pred_cond; if (has_tensor(input.pred_uncond)) { const sd::Tensor& pred_uncond = *input.pred_uncond; - if (has_tensor(input.pred_img_cond)) { - const sd::Tensor& pred_img_cond = *input.pred_img_cond; - output.pred = pred_uncond + - image_guidance_scale_ * (pred_img_cond - pred_uncond) + - guidance_scale_ * (pred_cond - pred_img_cond); + if (has_tensor(input.pred_img_uncond)) { + const sd::Tensor& pred_img_uncond = *input.pred_img_uncond; + output.pred = pred_img_uncond + + image_guidance_scale_ * (pred_uncond - pred_img_uncond) + + guidance_scale_ * (pred_cond - pred_uncond); } else { output.pred = pred_uncond + guidance_scale_ * (pred_cond - pred_uncond); } - } else if (has_tensor(input.pred_img_cond)) { - const sd::Tensor& pred_img_cond = *input.pred_img_cond; - output.pred = pred_img_cond + guidance_scale_ * (pred_cond - pred_img_cond); + } else if (has_tensor(input.pred_img_uncond)) { + const sd::Tensor& pred_img_uncond = *input.pred_img_uncond; + output.pred = pred_img_uncond + guidance_scale_ * (pred_cond - pred_img_uncond); } - if (!has_tensor(input.pred_uncond) && !has_tensor(input.pred_img_cond)) { + if (!has_tensor(input.pred_uncond) && !has_tensor(input.pred_img_uncond)) { return output; } const sd::Tensor* pred_uncond = input.pred_uncond; - const sd::Tensor* pred_img_cond = input.pred_img_cond; + const sd::Tensor* pred_img_cond = input.pred_img_uncond; sd::Tensor deltas = calculate_guidance_delta(pred_cond, pred_uncond, diff --git a/src/guidance.h b/src/guidance.h index c6c4e1966..26b79de9c 100644 --- a/src/guidance.h +++ b/src/guidance.h @@ -29,11 +29,11 @@ namespace sd::guidance { bool parse_skip_layer_guidance_uncond_arg(const char* extra_sample_args); struct GuidanceInput { - int step = 0; - size_t schedule_size = 0; - const sd::Tensor* pred_cond = nullptr; - const sd::Tensor* pred_uncond = nullptr; - const sd::Tensor* pred_img_cond = nullptr; + int step = 0; + size_t schedule_size = 0; + const sd::Tensor* pred_cond = nullptr; + const sd::Tensor* pred_uncond = nullptr; + const sd::Tensor* pred_img_uncond = nullptr; std::function()> predict_skip_layer; }; diff --git a/src/stable-diffusion.cpp b/src/stable-diffusion.cpp index 239115fe3..ffb2fdb7e 100644 --- a/src/stable-diffusion.cpp +++ b/src/stable-diffusion.cpp @@ -1929,7 +1929,7 @@ class StableDiffusionGGML { sd::Tensor noise, const SDCondition& cond, const SDCondition& uncond, - const SDCondition& img_cond, + const SDCondition& img_uncond, const SDCondition& id_cond, const sd::Tensor& control_image, float control_strength, @@ -2054,7 +2054,7 @@ class StableDiffusionGGML { sd::Tensor cond_out; sd::Tensor uncond_out; - sd::Tensor img_cond_out; + sd::Tensor img_uncond_out; sd_sample::SampleStepCacheDispatcher step_cache(cache_runtime, step, sigma); std::vector> controls; DiffusionParams diffusion_params; @@ -2073,7 +2073,7 @@ class StableDiffusionGGML { &controls); static const std::vector> empty_ref_latents; - bool uncond_without_ref_latents = !img_cond.empty() && + bool uncond_without_ref_latents = !img_uncond.empty() && !ref_latents.empty() && sd_version_supports_ref_latent_img_cfg(version); @@ -2160,26 +2160,27 @@ class StableDiffusionGGML { uncond_skip_layers = &skip_layer_guidance.layers(); } uncond_out = run_condition(uncond, - nullptr, - uncond_skip_layers, - uncond_without_ref_latents ? &empty_ref_latents : nullptr); + uncond.c_concat.empty() ? nullptr : &uncond.c_concat, + uncond_skip_layers); if (uncond_out.empty()) { return {}; } } - if (!img_cond.empty()) { - img_cond_out = run_condition(img_cond, - cond.c_concat.empty() ? nullptr : &cond.c_concat); - if (img_cond_out.empty()) { + if (!img_uncond.empty()) { + img_uncond_out = run_condition(img_uncond, + img_uncond.c_concat.empty() ? nullptr : &img_uncond.c_concat, + nullptr, + uncond_without_ref_latents ? &empty_ref_latents : nullptr); + if (img_uncond_out.empty()) { return {}; } } sd::guidance::GuidanceInput guidance_input; - guidance_input.step = step; - guidance_input.schedule_size = sigmas.size(); - guidance_input.pred_cond = &cond_out; - guidance_input.pred_uncond = uncond_out.empty() ? nullptr : &uncond_out; - guidance_input.pred_img_cond = img_cond_out.empty() ? nullptr : &img_cond_out; + guidance_input.step = step; + guidance_input.schedule_size = sigmas.size(); + guidance_input.pred_cond = &cond_out; + guidance_input.pred_uncond = uncond_out.empty() ? nullptr : &uncond_out; + guidance_input.pred_img_uncond = img_uncond_out.empty() ? nullptr : &img_uncond_out; sd::guidance::GuiderOutput guided = primary_guidance.forward(guidance_input, {}); if (guided.pred.empty()) { @@ -3168,9 +3169,9 @@ struct GenerationRequest { int diffusion_model_down_factor = -1; int64_t seed = -1; bool use_uncond = false; - bool use_img_cond = false; + bool use_img_uncond = false; bool use_high_noise_uncond = false; - bool use_high_noise_img_cond = false; + bool use_high_noise_img_uncond = false; bool has_ref_images = false; const sd_cache_params_t* cache_params = nullptr; int batch_count = 1; @@ -3328,13 +3329,13 @@ struct GenerationRequest { static void resolve_guidance(sd_ctx_t* sd_ctx, sd_guidance_params_t* guidance, bool* use_uncond, - bool* use_img_cond, + bool* use_img_uncond, bool has_ref_images, const char* stage_name = nullptr) { GGML_ASSERT(guidance != nullptr); GGML_ASSERT(use_uncond != nullptr); - GGML_ASSERT(use_img_cond != nullptr); - // out_uncond + text_cfg_scale * (out_cond - out_img_cond) + image_cfg_scale * (out_img_cond - out_uncond) + GGML_ASSERT(use_img_uncond != nullptr); + // out_uncond + text_cfg_scale * (out_cond - out_img_uncond) + image_cfg_scale * (out_img_uncond - out_uncond) // img_cfg == txt_cfg means that img_cfg is not used bool img_cfg_was_unset = !std::isfinite(guidance->img_cfg); if (!std::isfinite(guidance->img_cfg)) { @@ -3348,13 +3349,12 @@ struct GenerationRequest { guidance->img_cfg = guidance->txt_cfg; } - if (guidance->txt_cfg != 1.f) { + if (guidance->img_cfg != guidance->txt_cfg) { *use_uncond = true; } - if (guidance->img_cfg != guidance->txt_cfg) { - *use_img_cond = true; - *use_uncond = true; + if (guidance->img_cfg != 1.f) { + *use_img_uncond = true; } if (guidance->txt_cfg < 1.f) { @@ -3373,12 +3373,12 @@ struct GenerationRequest { resolve_hires(); seed = resolve_seed(seed); - resolve_guidance(sd_ctx, &guidance, &use_uncond, &use_img_cond, has_ref_images); + resolve_guidance(sd_ctx, &guidance, &use_uncond, &use_img_uncond, has_ref_images); if (sd_ctx->sd->high_noise_diffusion_model) { resolve_guidance(sd_ctx, &high_noise_guidance, &use_high_noise_uncond, - &use_high_noise_img_cond, + &use_high_noise_img_uncond, has_ref_images, "high noise: "); } @@ -3497,7 +3497,7 @@ struct SamplePlan { struct ImageGenerationLatents { sd::Tensor init_latent; sd::Tensor concat_latent; - sd::Tensor uncond_concat_latent; + sd::Tensor img_uncond_concat_latent; sd::Tensor audio_latent; sd::Tensor video_positions; sd::Tensor control_image; @@ -3820,7 +3820,7 @@ static int get_ltxav_num_audio_latents(int frames, int fps) { struct ImageGenerationEmbeds { SDCondition cond; SDCondition uncond; - SDCondition img_cond; + SDCondition img_uncond; SDCondition id_cond; }; @@ -3979,7 +3979,7 @@ static std::optional prepare_image_generation_latents(sd LOG_WARN("This model needs at least one reference image; using an empty reference"); ref_images.push_back(sd::zeros({request->width, request->height, 3, 1})); request->guidance.img_cfg = request->guidance.txt_cfg; - request->use_img_cond = false; + request->use_img_uncond = false; } if (!ref_images.empty()) { @@ -4032,7 +4032,7 @@ static std::optional prepare_image_generation_latents(sd } sd::Tensor concat_latent; - sd::Tensor uncond_concat_latent; + sd::Tensor img_uncond_concat_latent; if (sd_version_is_inpaint(sd_ctx->sd->version)) { sd::Tensor masked_init_latent; @@ -4060,8 +4060,8 @@ static std::optional prepare_image_generation_latents(sd request->height / request->vae_scale_factor}); mask = mask.permute({1, 3, 0, 2}).reshape({request->width / request->vae_scale_factor, request->height / request->vae_scale_factor, request->vae_scale_factor * request->vae_scale_factor, 1}); - concat_latent = sd::ops::concat(masked_init_latent, mask, 2); - uncond_concat_latent = sd::ops::concat(uncond_masked_init_latent, mask, 2); + concat_latent = sd::ops::concat(masked_init_latent, mask, 2); + img_uncond_concat_latent = sd::ops::concat(uncond_masked_init_latent, mask, 2); } else if (sd_ctx->sd->version == VERSION_FLEX_2) { concat_latent = sd::ops::concat(masked_init_latent, latent_mask, 2); if (!control_latent.empty()) { @@ -4070,16 +4070,16 @@ static std::optional prepare_image_generation_latents(sd concat_latent = sd::ops::concat(concat_latent, sd::Tensor::zeros_like(masked_init_latent), 2); } - uncond_concat_latent = sd::ops::concat(uncond_masked_init_latent, latent_mask, 2); - uncond_concat_latent = sd::ops::concat(uncond_concat_latent, sd::Tensor::zeros_like(masked_init_latent), 2); + img_uncond_concat_latent = sd::ops::concat(uncond_masked_init_latent, latent_mask, 2); + img_uncond_concat_latent = sd::ops::concat(img_uncond_concat_latent, sd::Tensor::zeros_like(masked_init_latent), 2); } else { // SD1.x SD2.x SDXL inpaint - concat_latent = sd::ops::concat(latent_mask, masked_init_latent, 2); - uncond_concat_latent = sd::ops::concat(latent_mask, uncond_masked_init_latent, 2); + concat_latent = sd::ops::concat(latent_mask, masked_init_latent, 2); + img_uncond_concat_latent = sd::ops::concat(latent_mask, uncond_masked_init_latent, 2); } } if (sd_version_is_unet_edit(sd_ctx->sd->version)) { - concat_latent = sd::ops::interpolate(ref_latents[0], init_latent.shape()); - uncond_concat_latent = sd::Tensor::zeros_like(concat_latent); + concat_latent = sd::ops::interpolate(ref_latents[0], init_latent.shape()); + img_uncond_concat_latent = sd::Tensor::zeros_like(concat_latent); } if (sd_ctx->sd->version == VERSION_FLUX_CONTROLS) { if (!control_latent.empty()) { @@ -4087,7 +4087,7 @@ static std::optional prepare_image_generation_latents(sd } else { concat_latent = sd::Tensor::zeros_like(init_latent); } - uncond_concat_latent = sd::Tensor::zeros_like(concat_latent); + img_uncond_concat_latent = sd::Tensor::zeros_like(concat_latent); } if (sd_img_gen_params->init_image.data != nullptr || sd_img_gen_params->ref_images_count > 0) { @@ -4096,12 +4096,12 @@ static std::optional prepare_image_generation_latents(sd } ImageGenerationLatents latents; - latents.init_latent = std::move(init_latent); - latents.concat_latent = std::move(concat_latent); - latents.uncond_concat_latent = std::move(uncond_concat_latent); - latents.control_image = std::move(control_image_tensor); - latents.ref_images = std::move(ref_images); - latents.ref_latents = std::move(ref_latents); + latents.init_latent = std::move(init_latent); + latents.concat_latent = std::move(concat_latent); + latents.img_uncond_concat_latent = std::move(img_uncond_concat_latent); + latents.control_image = std::move(control_image_tensor); + latents.ref_images = std::move(ref_images); + latents.ref_latents = std::move(ref_latents); if (sd_version_is_inpaint(sd_ctx->sd->version)) { latent_mask = sd::ops::max_pool_2d(latent_mask, @@ -4135,7 +4135,7 @@ static std::optional prepare_image_generation_embeds(sd_c cond.c_concat = latents->concat_latent; // TODO: optimize } - bool use_ref_latent_img_cfg = request->use_img_cond && + bool use_ref_latent_img_cfg = request->use_img_uncond && !latents->ref_images.empty() && sd_version_supports_ref_latent_img_cfg(sd_ctx->sd->version); @@ -4152,24 +4152,32 @@ static std::optional prepare_image_generation_embeds(sd_c uncond = sd_ctx->sd->cond_stage_model->get_learned_condition(sd_ctx->sd->n_threads, condition_params); if (uncond.c_concat.empty()) { - uncond.c_concat = latents->uncond_concat_latent; // TODO: optimize + uncond.c_concat = latents->concat_latent; // TODO: optimize } } - SDCondition img_cond; - if (request->use_img_cond) { - if (use_ref_latent_img_cfg) { - img_cond = uncond; - - std::vector> empty_ref_images; - condition_params.ref_images = &empty_ref_images; - uncond = sd_ctx->sd->cond_stage_model->get_learned_condition(sd_ctx->sd->n_threads, - condition_params); - if (uncond.c_concat.empty()) { - uncond.c_concat = latents->uncond_concat_latent; // TODO: optimize - } + SDCondition img_uncond; + if (request->use_img_uncond) { + if ((request->use_uncond || request->use_high_noise_uncond) && (latents->ref_images.empty() || !use_ref_latent_img_cfg)) { + img_uncond = SDCondition(uncond.c_crossattn, uncond.c_vector, latents->img_uncond_concat_latent); } else { - img_cond = SDCondition(uncond.c_crossattn, uncond.c_vector, cond.c_concat); + bool zero_out_masked = false; + if (sd_version_is_sdxl(sd_ctx->sd->version) && + request->negative_prompt.empty() && + !sd_ctx->sd->is_using_edm_v_parameterization) { + zero_out_masked = true; + } + condition_params.text = request->negative_prompt; + condition_params.zero_out_masked = zero_out_masked; + if (use_ref_latent_img_cfg) { + std::vector> empty_ref_images; + condition_params.ref_images = &empty_ref_images; + } + img_uncond = sd_ctx->sd->cond_stage_model->get_learned_condition(sd_ctx->sd->n_threads, + condition_params); + if (img_uncond.c_concat.empty()) { + img_uncond.c_concat = latents->img_uncond_concat_latent; // TODO: optimize + } } } @@ -4181,10 +4189,10 @@ static std::optional prepare_image_generation_embeds(sd_c } ImageGenerationEmbeds embeds; - embeds.img_cond = std::move(img_cond); - embeds.cond = std::move(cond); - embeds.uncond = std::move(uncond); - embeds.id_cond = std::move(id_cond); + embeds.img_uncond = std::move(img_uncond); + embeds.cond = std::move(cond); + embeds.uncond = std::move(uncond); + embeds.id_cond = std::move(id_cond); return embeds; } @@ -4464,7 +4472,7 @@ SD_API sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* s std::move(noise), embeds.cond, embeds.uncond, - embeds.img_cond, + embeds.img_uncond, embeds.id_cond, latents.control_image, request.control_strength, @@ -4584,7 +4592,7 @@ SD_API sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* s std::move(noise), embeds.cond, embeds.uncond, - embeds.img_cond, + embeds.img_uncond, embeds.id_cond, latents.control_image, request.control_strength, @@ -5288,7 +5296,7 @@ SD_API bool generate_video(sd_ctx_t* sd_ctx, std::move(noise), embeds.cond, request.use_high_noise_uncond ? embeds.uncond : SDCondition(), - embeds.img_cond, + embeds.img_uncond, embeds.id_cond, sd::Tensor(), 0.f, @@ -5334,7 +5342,7 @@ SD_API bool generate_video(sd_ctx_t* sd_ctx, std::move(noise), embeds.cond, request.use_uncond ? embeds.uncond : SDCondition(), - embeds.img_cond, + embeds.img_uncond, embeds.id_cond, sd::Tensor(), 0.f, @@ -5478,7 +5486,7 @@ SD_API bool generate_video(sd_ctx_t* sd_ctx, std::move(noise), embeds.cond, hires_request.use_uncond ? embeds.uncond : SDCondition(), - embeds.img_cond, + embeds.img_uncond, embeds.id_cond, sd::Tensor(), 0.f, From acb881323f5b0d5bc92dd16b0ca84a4709612121 Mon Sep 17 00:00:00 2001 From: leejet Date: Tue, 2 Jun 2026 22:59:50 +0800 Subject: [PATCH 2/3] align APG and CFG++ with img-uncond CFG --- src/guidance.cpp | 24 ++++++++++++------------ src/stable-diffusion.cpp | 4 +++- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/src/guidance.cpp b/src/guidance.cpp index 7da833fc2..2cb305a59 100644 --- a/src/guidance.cpp +++ b/src/guidance.cpp @@ -109,20 +109,20 @@ namespace sd::guidance { static sd::Tensor calculate_guidance_delta(const sd::Tensor& pred_cond, const sd::Tensor* pred_uncond, - const sd::Tensor* pred_img_cond, + const sd::Tensor* pred_img_uncond, float guidance_scale, float image_guidance_scale) { - if (pred_img_cond != nullptr) { + if (pred_img_uncond != nullptr) { if (pred_uncond != nullptr && guidance_scale == 1.0f) { - return *pred_img_cond - *pred_uncond; + return *pred_uncond - *pred_img_uncond; } if (pred_uncond != nullptr) { return pred_cond + - (*pred_uncond * (1.0f - image_guidance_scale) + - *pred_img_cond * (image_guidance_scale - guidance_scale)) / + (*pred_uncond * (image_guidance_scale - guidance_scale) + + *pred_img_uncond * (1.0f - image_guidance_scale)) / (guidance_scale - 1.0f); } - return pred_cond - *pred_img_cond; + return pred_cond - *pred_img_uncond; } return pred_cond - *pred_uncond; } @@ -156,12 +156,12 @@ namespace sd::guidance { return output; } - const sd::Tensor* pred_uncond = input.pred_uncond; - const sd::Tensor* pred_img_cond = input.pred_img_uncond; + const sd::Tensor* pred_uncond = input.pred_uncond; + const sd::Tensor* pred_img_uncond = input.pred_img_uncond; sd::Tensor deltas = calculate_guidance_delta(pred_cond, pred_uncond, - pred_img_cond, + pred_img_uncond, guidance_scale_, image_guidance_scale_); if (params_.momentum != 0.0f) { @@ -203,11 +203,11 @@ namespace sd::guidance { if (pred_uncond != nullptr) { if (guidance_scale_ != 1.0f) { output.pred = pred_cond + (guidance_scale_ - 1.0f) * deltas; - } else if (pred_img_cond != nullptr) { + } else if (pred_img_uncond != nullptr) { output.pred = pred_cond + (image_guidance_scale_ - 1.0f) * deltas; } - } else if (pred_img_cond != nullptr) { - output.pred = *pred_img_cond + guidance_scale_ * deltas; + } else if (pred_img_uncond != nullptr) { + output.pred = *pred_img_uncond + guidance_scale_ * deltas; } return output; diff --git a/src/stable-diffusion.cpp b/src/stable-diffusion.cpp index 016de707e..d073b30ed 100644 --- a/src/stable-diffusion.cpp +++ b/src/stable-diffusion.cpp @@ -2223,7 +2223,9 @@ class StableDiffusionGGML { sd::guidance::GuiderOutput output; output.pred = denoised; if (needs_uncond_denoised) { - const sd::Tensor& base_uncond = !uncond_out.empty() ? uncond_out : cond_out; + const sd::Tensor& base_uncond = !img_uncond_out.empty() + ? img_uncond_out + : (!uncond_out.empty() ? uncond_out : cond_out); output.pred_uncond = base_uncond * c_out + x * c_skip; } if (cache_runtime.spectrum_enabled) { From e0554204d9d23e60697f107bbcfee1e68a2f65d3 Mon Sep 17 00:00:00 2001 From: leejet Date: Wed, 3 Jun 2026 22:46:37 +0800 Subject: [PATCH 3/3] set default img_cfg to 1.f --- src/stable-diffusion.cpp | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/stable-diffusion.cpp b/src/stable-diffusion.cpp index d073b30ed..57bd57c6a 100644 --- a/src/stable-diffusion.cpp +++ b/src/stable-diffusion.cpp @@ -3365,18 +3365,22 @@ struct GenerationRequest { GGML_ASSERT(guidance != nullptr); GGML_ASSERT(use_uncond != nullptr); GGML_ASSERT(use_img_uncond != nullptr); - // out_uncond + text_cfg_scale * (out_cond - out_img_uncond) + image_cfg_scale * (out_img_uncond - out_uncond) - // img_cfg == txt_cfg means that img_cfg is not used - bool img_cfg_was_unset = !std::isfinite(guidance->img_cfg); - if (!std::isfinite(guidance->img_cfg)) { - guidance->img_cfg = guidance->txt_cfg; + // out_img_uncond + text_cfg_scale * (out_cond - out_uncond) + image_cfg_scale * (out_uncond - out_img_uncond) + // -> text_cfg_scale * out_cond + (image_cfg_scale - text_cfg_scale) * out_uncond + (1 - image_cfg_scale) * out_img_uncond + // out_cond : prompt, image latent + // out_uncond : negative prompt, image latent + // out_img_uncond : negative prompt, zero image latent + // image_cfg_scale == 1 reduces 3-cond CFG to 2-cond CFG. + bool img_cfg_was_set = std::isfinite(guidance->img_cfg); + if (!img_cfg_was_set) { + guidance->img_cfg = 1.f; } if (!sd_version_supports_img_cfg(sd_ctx->sd->version, has_ref_images)) { - if (!img_cfg_was_unset && guidance->img_cfg != guidance->txt_cfg) { - LOG_WARN("2-conditioning CFG is not supported with this model, disabling it for better performance"); + if (img_cfg_was_set && guidance->img_cfg != 1.f) { + LOG_WARN("3-conditioning CFG is not supported with this model, disabling it for better performance"); } - guidance->img_cfg = guidance->txt_cfg; + guidance->img_cfg = 1.f; } if (guidance->img_cfg != guidance->txt_cfg) {