diff --git a/expose.h b/expose.h index 7e8b8252b88..0c98624b369 100644 --- a/expose.h +++ b/expose.h @@ -197,11 +197,14 @@ struct sd_generation_inputs const bool flip_mask = false; const float denoising_strength = 0.0f; const float cfg_scale = 0.0f; + const float distilled_guidance = -1.0f; + const int shifted_timestep = 0; const int sample_steps = 0; const int width = 0; const int height = 0; const int seed = 0; const char * sample_method = nullptr; + const char * scheduler = nullptr; const int clip_skip = -1; const int vid_req_frames = 1; const int vid_req_avi = 0; diff --git a/koboldcpp.py b/koboldcpp.py index 2a16076b8dd..8ad73c0e513 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -311,11 +311,14 @@ class sd_generation_inputs(ctypes.Structure): ("flip_mask", ctypes.c_bool), ("denoising_strength", ctypes.c_float), ("cfg_scale", ctypes.c_float), + ("distilled_guidance", ctypes.c_float), + ("shifted_timestep", ctypes.c_int), ("sample_steps", ctypes.c_int), ("width", ctypes.c_int), ("height", ctypes.c_int), ("seed", ctypes.c_int), ("sample_method", ctypes.c_char_p), + ("scheduler", ctypes.c_char_p), ("clip_skip", ctypes.c_int), ("vid_req_frames", ctypes.c_int), ("vid_req_avi", ctypes.c_int)] @@ -393,6 +396,8 @@ class embeddings_generation_outputs(ctypes.Structure): ("count", ctypes.c_int), ("data", ctypes.c_char_p)] + + def getdirpath(): return os.path.dirname(os.path.realpath(__file__)) def getabspath(): @@ -1788,9 +1793,58 @@ def sd_comfyui_tranform_params(genparams): print("Warning: ComfyUI Payload Missing!") return genparams +def sd_process_meta_fields(fields, config): + # aliases to match sd.cpp command-line options + aliases = { + 'cfg-scale': 'cfg_scale', + 'guidance': 'distilled_guidance', + 'sampler': 'sampler_name', + 'sampling-method': 'sampler_name', + 'timestep-shift': 'shifted_timestep', + } + fields_dict = {aliases.get(k, k): v for k, v in fields} + # whitelist accepted parameters + whitelist = ['scheduler', 'shifted_timestep', 'distilled_guidance'] + if config: + # note the current UI always set these + whitelist += ['sampler_name', 'cfg_scale'] + fields_dict = {k: v for k, v in fields_dict.items() if k in whitelist} + return fields_dict + +# json with top-level dict +def sd_parse_meta_field(prompt, config=False): + jfields = {} + try: + jfields = json.loads(prompt) + except json.JSONDecodeError: + # accept "field":"value",... without {} (also empty strings) + try: + jfields = json.loads('{ ' + prompt + ' }') + except json.JSONDecodeError: + print("Warning: couldn't parse meta prompt; it should be valid JSON.") + if not isinstance(jfields, dict): + jfields = {} + kv_dict = sd_process_meta_fields(jfields.items(), config) + return kv_dict + + def sd_generate(genparams): global maxctx, args, currentusergenkey, totalgens, pendingabortkey, chatcompl_adapter + sdgendefaults = sd_parse_meta_field(args.sdgendefaults or '', config=True) + params = dict() + defparams = dict() + for k, v in sdgendefaults.items(): + if k in ['sampler_name', 'scheduler']: + # these can be explicitely set to 'default'; process later + # TODO should we consider values like 'clip_skip=-1' as 'default' too? + defparams[k] = v + else: + params[k] = v + # apply most of the defaults + params.update(genparams) + genparams = params + default_adapter = {} if chatcompl_adapter is None else chatcompl_adapter adapter_obj = genparams.get('adapter', default_adapter) forced_negprompt = adapter_obj.get("add_sd_negative_prompt", "") @@ -1816,13 +1870,20 @@ def sd_generate(genparams): flip_mask = genparams.get("inpainting_mask_invert", 0) denoising_strength = tryparsefloat(genparams.get("denoising_strength", 0.6),0.6) cfg_scale = tryparsefloat(genparams.get("cfg_scale", 5),5) + distilled_guidance = tryparsefloat(genparams.get("distilled_guidance", None), None) + shifted_timestep = tryparseint(genparams.get("shifted_timestep", None), None) sample_steps = tryparseint(genparams.get("steps", 20),20) width = tryparseint(genparams.get("width", 512),512) height = tryparseint(genparams.get("height", 512),512) seed = tryparseint(genparams.get("seed", -1),-1) if seed < 0: seed = random.randint(100000, 999999) - sample_method = genparams.get("sampler_name", "k_euler_a") + sample_method = (genparams.get("sampler_name") or "default").lower() + if sample_method == 'default' and 'sampler_name' in defparams: + sample_method = (defparams.get("sampler_name") or "default").lower() + scheduler = (genparams.get("scheduler") or "default").lower() + if scheduler == 'default' and 'scheduler' in defparams: + scheduler = (defparams.get("scheduler") or "default").lower() clip_skip = tryparseint(genparams.get("clip_skip", -1),-1) vid_req_frames = tryparseint(genparams.get("frames", 1),1) vid_req_frames = 1 if (not vid_req_frames or vid_req_frames < 1) else vid_req_frames @@ -1834,6 +1895,10 @@ def sd_generate(genparams): #clean vars cfg_scale = (1 if cfg_scale < 1 else (25 if cfg_scale > 25 else cfg_scale)) + if distilled_guidance is not None and (distilled_guidance < 0 or distilled_guidance > 100): + distilled_guidance = None # fall back to the default + if shifted_timestep is not None and (shifted_timestep < 0 or shifted_timestep > 1000): + shifted_timestep = None # fall back to the default sample_steps = (1 if sample_steps < 1 else (forced_steplimit if sample_steps > forced_steplimit else sample_steps)) vid_req_frames = (1 if vid_req_frames < 1 else (100 if vid_req_frames > 100 else vid_req_frames)) @@ -1852,12 +1917,17 @@ def sd_generate(genparams): inputs.extra_images[n] = extra_image.encode("UTF-8") inputs.flip_mask = flip_mask inputs.cfg_scale = cfg_scale + if distilled_guidance is not None: + inputs.distilled_guidance = distilled_guidance inputs.denoising_strength = denoising_strength + if shifted_timestep is not None: + inputs.shifted_timestep = shifted_timestep inputs.sample_steps = sample_steps inputs.width = width inputs.height = height inputs.seed = seed - inputs.sample_method = sample_method.lower().encode("UTF-8") + inputs.sample_method = sample_method.encode("UTF-8") + inputs.scheduler = scheduler.encode("UTF-8") inputs.clip_skip = clip_skip inputs.vid_req_frames = vid_req_frames inputs.vid_req_avi = vid_req_avi @@ -4675,6 +4745,7 @@ def hide_tooltip(event): sd_clamped_soft_var = ctk.StringVar(value="0") sd_threads_var = ctk.StringVar(value=str(default_threads)) sd_quant_var = ctk.StringVar(value=sd_quant_choices[0]) + sd_gen_defaults_var = ctk.StringVar() whisper_model_var = ctk.StringVar() tts_model_var = ctk.StringVar() @@ -5450,6 +5521,7 @@ def toggletaesd(a,b,c): makecheckbox(images_tab, "Model CPU Offload", sd_offload_cpu_var, 50,padx=8, tooltiptxt="Offload image weights in RAM to save VRAM, swap into VRAM when needed.") makecheckbox(images_tab, "VAE on CPU", sd_vae_cpu_var, 50,padx=160, tooltiptxt="Force VAE to CPU only for image generation.") makecheckbox(images_tab, "CLIP on GPU", sd_clip_gpu_var, 50,padx=280, tooltiptxt="Put CLIP and T5 to GPU for image generation. Otherwise, CLIP will use CPU.") + makelabelentry(images_tab, "Default Params:", sd_gen_defaults_var, 52, 280, padx=110, singleline=True, tooltip='Default image generation parameters when not specified by the UI or API.\nSpecified as JSON fields: {"KEY1":"VALUE1", "KEY2":"VALUE2"...}') # audio tab audio_tab = tabcontent["Audio"] @@ -5723,6 +5795,7 @@ def export_vars(): args.sdloramult = float(sd_loramult_var.get()) else: args.sdlora = "" + args.sdgendefaults = sd_gen_defaults_var.get() if whisper_model_var.get() != "": args.whispermodel = whisper_model_var.get() @@ -5949,6 +6022,7 @@ def import_vars(dict): sd_lora_var.set(dict["sdlora"] if ("sdlora" in dict and dict["sdlora"]) else "") sd_loramult_var.set(str(dict["sdloramult"]) if ("sdloramult" in dict and dict["sdloramult"]) else "1.0") + sd_gen_defaults_var.set(dict.get("sdgendefaults", "")) whisper_model_var.set(dict["whispermodel"] if ("whispermodel" in dict and dict["whispermodel"]) else "") @@ -7782,6 +7856,7 @@ def range_checker(arg: str): sdparsergrouplora.add_argument("--sdlora", metavar=('[filename]'), help="Specify an image generation LORA safetensors model to be applied.", default="") sdparsergroup.add_argument("--sdloramult", metavar=('[amount]'), help="Multiplier for the image LORA model to be applied.", type=float, default=1.0) sdparsergroup.add_argument("--sdtiledvae", metavar=('[maxres]'), help="Adjust the automatic VAE tiling trigger for images above this size. 0 disables vae tiling.", type=int, default=default_vae_tile_threshold) + sdparsergroup.add_argument("--sdgendefaults", metavar=('{"parameter":"value",...}'), help="Sets default parameters for image generation, as a JSON string.", default="") whisperparsergroup = parser.add_argument_group('Whisper Transcription Commands') whisperparsergroup.add_argument("--whispermodel", metavar=('[filename]'), help="Specify a Whisper .bin model to enable Speech-To-Text transcription.", default="") diff --git a/otherarch/sdcpp/sdtype_adapter.cpp b/otherarch/sdcpp/sdtype_adapter.cpp index 0da9d00c9f9..49d9b010f82 100644 --- a/otherarch/sdcpp/sdtype_adapter.cpp +++ b/otherarch/sdcpp/sdtype_adapter.cpp @@ -67,8 +67,11 @@ struct SDParams { int width = 512; int height = 512; - sample_method_t sample_method = EULER_A; + sample_method_t sample_method = SAMPLE_METHOD_DEFAULT; + scheduler_t scheduler = scheduler_t::DEFAULT; int sample_steps = 20; + float distilled_guidance = -1.0f; + float shifted_timestep = 0; float strength = 0.75f; int64_t seed = 42; bool clip_on_cpu = false; @@ -404,8 +407,8 @@ std::string clean_input_prompt(const std::string& input) { } static std::string get_image_params(const sd_img_gen_params_t & params) { - std::stringstream parameter_string; - parameter_string << std::setprecision(3) + std::stringstream ss; + ss << std::setprecision(3) << "Prompt: " << params.prompt << " | NegativePrompt: " << params.negative_prompt << " | Steps: " << params.sample_params.sample_steps @@ -413,11 +416,15 @@ static std::string get_image_params(const sd_img_gen_params_t & params) { << " | Guidance: " << params.sample_params.guidance.distilled_guidance << " | Seed: " << params.seed << " | Size: " << params.width << "x" << params.height - << " | Sampler: " << sd_sample_method_name(params.sample_params.sample_method) - << " | Clip skip: " << params.clip_skip + << " | Sampler: " << sd_sample_method_name(params.sample_params.sample_method); + if (params.sample_params.scheduler != scheduler_t::DEFAULT) + ss << " " << sd_schedule_name(params.sample_params.scheduler); + if (params.sample_params.shifted_timestep != 0) + ss << "| Timestep Shift: " << params.sample_params.shifted_timestep; + ss << " | Clip skip: " << params.clip_skip << " | Model: " << sdmodelfilename << " | Version: KoboldCpp"; - return parameter_string.str(); + return ss.str(); } static inline int rounddown_64(int n) { @@ -519,23 +526,29 @@ static void sd_fix_resolution(int &width, int &height, int img_hard_limit, int i static enum sample_method_t sampler_from_name(const std::string& sampler) { - if(sampler=="euler a"||sampler=="k_euler_a"||sampler=="euler_a") //all lowercase + // all lowercase + enum sample_method_t result = str_to_sample_method(sampler.c_str()); + if (result != sample_method_t::SAMPLE_METHOD_COUNT) + { + return result; + } + else if(sampler=="euler a"||sampler=="k_euler_a") { return sample_method_t::EULER_A; } - else if(sampler=="euler"||sampler=="k_euler") + else if(sampler=="k_euler") { return sample_method_t::EULER; } - else if(sampler=="heun"||sampler=="k_heun") + else if(sampler=="k_heun") { return sample_method_t::HEUN; } - else if(sampler=="dpm2"||sampler=="k_dpm_2") + else if(sampler=="k_dpm_2") { return sample_method_t::DPM2; } - else if(sampler=="lcm"||sampler=="k_lcm") + else if(sampler=="k_lcm") { return sample_method_t::LCM; } @@ -549,11 +562,10 @@ static enum sample_method_t sampler_from_name(const std::string& sampler) } else { - return sample_method_t::EULER_A; + return sample_method_t::SAMPLE_METHOD_DEFAULT; } } - uint8_t* load_image_from_b64(const std::string & b64str, int& width, int& height, int expected_width = 0, int expected_height = 0, int expected_channel = 3) { std::vector decoded_buf = kcpp_base64_decode(b64str); @@ -644,6 +656,19 @@ uint8_t* load_image_from_b64(const std::string & b64str, int& width, int& height image_buffer = resized_image_buffer; } return image_buffer; + +} + +static enum scheduler_t scheduler_from_name(const char * scheduler) +{ + if (scheduler) { + enum scheduler_t result = str_to_schedule(scheduler); + if (result != scheduler_t::SCHEDULE_COUNT) + { + return result; + } + } + return scheduler_t::DEFAULT; } sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs) @@ -674,13 +699,20 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs) sd_params->prompt = cleanprompt; sd_params->negative_prompt = cleannegprompt; sd_params->cfg_scale = inputs.cfg_scale; + sd_params->distilled_guidance = inputs.distilled_guidance; sd_params->sample_steps = inputs.sample_steps; + sd_params->shifted_timestep = inputs.shifted_timestep; sd_params->seed = inputs.seed; sd_params->width = inputs.width; sd_params->height = inputs.height; sd_params->strength = inputs.denoising_strength; sd_params->clip_skip = inputs.clip_skip; sd_params->sample_method = sampler_from_name(inputs.sample_method); + sd_params->scheduler = scheduler_from_name(inputs.scheduler); + + if (sd_params->sample_method == SAMPLE_METHOD_DEFAULT) { + sd_params->sample_method = sd_get_default_sample_method(sd_ctx); + } auto loadedsdver = get_loaded_sd_version(sd_ctx); bool is_img2img = img2img_data != ""; @@ -841,10 +873,15 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs) params.clip_skip = sd_params->clip_skip; params.sample_params.guidance.txt_cfg = sd_params->cfg_scale; params.sample_params.guidance.img_cfg = sd_params->cfg_scale; + if (sd_params->distilled_guidance >= 0.f) { + params.sample_params.guidance.distilled_guidance = sd_params->distilled_guidance; + } params.width = sd_params->width; params.height = sd_params->height; params.sample_params.sample_method = sd_params->sample_method; + params.sample_params.scheduler = sd_params->scheduler; params.sample_params.sample_steps = sd_params->sample_steps; + params.sample_params.shifted_timestep = sd_params->shifted_timestep; params.seed = sd_params->seed; params.strength = sd_params->strength; params.vae_tiling_params.enabled = dotile; @@ -922,6 +959,7 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs) << "\nCFGSCLE:" << params.sample_params.guidance.txt_cfg << "\nSIZE:" << params.width << "x" << params.height << "\nSM:" << sd_sample_method_name(params.sample_params.sample_method) + << "\nSCHED:" << sd_schedule_name(params.sample_params.scheduler) << "\nSTEP:" << params.sample_params.sample_steps << "\nSEED:" << params.seed << "\nBATCH:" << params.batch_count