Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added docs/comparison/orig_cat.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/comparison/orig_garden.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/comparison/orig_phonograph.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/comparison/rmse1pct_cat.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/comparison/rmse1pct_garden.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/comparison/rmse1pct_phonograph.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/comparison/rmse3pct_cat.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/comparison/rmse3pct_garden.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/comparison/rmse3pct_phonograph.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/comparison/rmse6pct_cat.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/comparison/rmse6pct_garden.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/comparison/rmse6pct_phonograph.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
30 changes: 17 additions & 13 deletions examples/cli/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ struct SDCliParams {
bool verbose = false;
bool canny_preprocess = false;
bool convert_name = false;
float rmse_threshold = 0.0f;

preview_t preview_method = PREVIEW_NONE;
int preview_interval = 1;
Expand Down Expand Up @@ -88,6 +89,16 @@ struct SDCliParams {
&output_begin_idx},
};

options.float_options = {
{"",
"--rmse",
"maximum relative RMSE per tensor for auto mixed-precision quantization in convert mode "
"(e.g. 0.03 = 3%). Sweeps from coarsest to finest quant type and picks the coarsest "
"that stays under this budget. --type sets the quality ceiling (default: f16). "
"Explicit --tensor-type-rules take priority over the RMSE sweep.",
&rmse_threshold},
};

options.bool_options = {
{"",
"--canny",
Expand Down Expand Up @@ -601,23 +612,16 @@ int main(int argc, const char* argv[]) {
LOG_DEBUG("%s", gen_params.to_string().c_str());

if (cli_params.mode == CONVERT) {
bool success = convert(ctx_params.model_path.c_str(),
ctx_params.vae_path.c_str(),
sd_ctx_params_t sd_params = ctx_params.to_sd_ctx_params_t(false, false, false);
bool success = convert(&sd_params,
cli_params.output_path.c_str(),
ctx_params.wtype,
ctx_params.tensor_type_rules.c_str(),
cli_params.convert_name);
cli_params.convert_name,
cli_params.rmse_threshold);
if (!success) {
LOG_ERROR("convert '%s'/'%s' to '%s' failed",
ctx_params.model_path.c_str(),
ctx_params.vae_path.c_str(),
cli_params.output_path.c_str());
LOG_ERROR("convert to '%s' failed", cli_params.output_path.c_str());
return 1;
} else {
LOG_INFO("convert '%s'/'%s' to '%s' success",
ctx_params.model_path.c_str(),
ctx_params.vae_path.c_str(),
cli_params.output_path.c_str());
LOG_INFO("convert to '%s' success", cli_params.output_path.c_str());
return 0;
}
}
Expand Down
6 changes: 6 additions & 0 deletions examples/common/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,10 @@ ArgOptions SDContextParams::get_options() {
"--mmap",
"whether to memory-map model",
true, &enable_mmap},
{"-ll",
"--lazy-load",
"staged loading: evict text encoders from RAM after encoding, diffusion model after sampling, VAE after decoding (forces --mmap on)",
true, &lazy_loading},
{"",
"--control-net-cpu",
"keep controlnet in cpu (for low vram)",
Expand Down Expand Up @@ -697,6 +701,7 @@ std::string SDContextParams::to_string() const {
<< " backend: \"" << backend << "\",\n"
<< " params_backend: \"" << params_backend << "\",\n"
<< " enable_mmap: " << (enable_mmap ? "true" : "false") << ",\n"
<< " lazy_loading: " << (lazy_loading ? "true" : "false") << ",\n"
<< " control_net_cpu: " << (control_net_cpu ? "true" : "false") << ",\n"
<< " clip_on_cpu: " << (clip_on_cpu ? "true" : "false") << ",\n"
<< " vae_on_cpu: " << (vae_on_cpu ? "true" : "false") << ",\n"
Expand Down Expand Up @@ -773,6 +778,7 @@ sd_ctx_params_t SDContextParams::to_sd_ctx_params_t(bool vae_decode_only, bool f
chroma_t5_mask_pad,
qwen_image_zero_cond_t,
max_vram,
lazy_loading,
backend.c_str(),
params_backend.c_str(),
};
Expand Down
1 change: 1 addition & 0 deletions examples/common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ struct SDContextParams {
std::string backend;
std::string params_backend;
bool enable_mmap = false;
bool lazy_loading = false;
bool control_net_cpu = false;
bool clip_on_cpu = false;
bool vae_on_cpu = false;
Expand Down
9 changes: 4 additions & 5 deletions include/stable-diffusion.h
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ typedef struct {
int chroma_t5_mask_pad;
bool qwen_image_zero_cond_t;
float max_vram; // GiB budget for graph-cut segmented param offload (0 = disabled, -1 = auto free VRAM minus 1 GiB)
bool lazy_loading; // staged load: encode text, evict text encoder, load diffusion, evict, load VAE, decode
const char* backend;
const char* params_backend;
} sd_ctx_params_t;
Expand Down Expand Up @@ -463,12 +464,10 @@ SD_API sd_image_t upscale(upscaler_ctx_t* upscaler_ctx,

SD_API int get_upscale_factor(upscaler_ctx_t* upscaler_ctx);

SD_API bool convert(const char* input_path,
const char* vae_path,
SD_API bool convert(const sd_ctx_params_t* params,
const char* output_path,
enum sd_type_t output_type,
const char* tensor_type_rules,
bool convert_name);
bool convert_name,
float rmse_threshold);

SD_API bool preprocess_canny(sd_image_t image,
float high_threshold,
Expand Down
13 changes: 13 additions & 0 deletions src/conditioner.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ struct Conditioner {
const ConditionerParams& conditioner_params) = 0;
virtual void alloc_params_buffer() = 0;
virtual void free_params_buffer() = 0;
virtual void free_compute_buffer() {}
virtual void get_param_tensors(std::map<std::string, ggml_tensor*>& tensors) = 0;
virtual size_t get_params_buffer_size() = 0;
virtual void set_max_graph_vram_bytes(size_t max_vram_bytes) {}
Expand Down Expand Up @@ -805,6 +806,18 @@ struct SD3CLIPEmbedder : public Conditioner {
}
}

void free_compute_buffer() override {
if (clip_l) {
clip_l->free_compute_buffer();
}
if (clip_g) {
clip_g->free_compute_buffer();
}
if (t5) {
t5->free_compute_buffer();
}
}

size_t get_params_buffer_size() override {
size_t buffer_size = 0;
if (clip_l) {
Expand Down
Loading