diff --git a/expose.h b/expose.h index 1ae4fad0692..b6511de09fd 100644 --- a/expose.h +++ b/expose.h @@ -190,7 +190,7 @@ struct sd_load_model_inputs const char * clip2_filename = nullptr; const char * vae_filename = nullptr; const char * lora_filenames[lora_filenames_max] = {}; - const float lora_multiplier = 1.0f; + const float lora_multiplier[lora_filenames_max] = {}; const int lora_apply_mode = 0; const char * photomaker_filename = nullptr; const char * upscaler_filename = nullptr; @@ -226,6 +226,7 @@ struct sd_generation_inputs const bool circular_x = false; const bool circular_y = false; const bool upscale = false; + const float lora_multipliers[lora_filenames_max] = {}; }; struct sd_generation_outputs { diff --git a/koboldcpp.py b/koboldcpp.py index 8b31b09e5b8..7960060a7a2 100755 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -88,6 +88,7 @@ ttsmodelpath = "" #if empty, not initialized embeddingsmodelpath = "" #if empty, not initialized musicdiffusionmodelpath = "" #if empty, not initialized +imglorainfo = [] maxctx = 8192 maxhordectx = 0 #set to whatever maxctx is if 0 maxhordelen = 1024 @@ -320,7 +321,7 @@ class sd_load_model_inputs(ctypes.Structure): ("clip2_filename", ctypes.c_char_p), ("vae_filename", ctypes.c_char_p), ("lora_filenames", ctypes.c_char_p * lora_filenames_max), - ("lora_multiplier", ctypes.c_float), + ("lora_multipliers", ctypes.c_float * lora_filenames_max), ("lora_apply_mode", ctypes.c_int), ("photomaker_filename", ctypes.c_char_p), ("upscaler_filename", ctypes.c_char_p), @@ -354,7 +355,8 @@ class sd_generation_inputs(ctypes.Structure): ("remove_limits", ctypes.c_bool), ("circular_x", ctypes.c_bool), ("circular_y", ctypes.c_bool), - ("upscale", ctypes.c_bool)] + ("upscale", ctypes.c_bool), + ("lora_multipliers", ctypes.c_float * lora_filenames_max)] class sd_generation_outputs(ctypes.Structure): _fields_ = [("status", ctypes.c_int), @@ -1995,8 +1997,11 @@ def sd_load_model(model_filename,vae_filename,lora_filenames,t5xxl_filename,clip inputs.lora_filenames[n] = "".encode("UTF-8") else: inputs.lora_filenames[n] = lora_filenames[n].encode("UTF-8") + if n >= len(args.sdloramult): + inputs.lora_multipliers[n] = 0. + else: + inputs.lora_multipliers[n] = args.sdloramult[n] - inputs.lora_multiplier = args.sdloramult inputs.t5xxl_filename = t5xxl_filename.encode("UTF-8") inputs.clip1_filename = clip1_filename.encode("UTF-8") inputs.clip2_filename = clip2_filename.encode("UTF-8") @@ -2199,6 +2204,16 @@ def sd_generate(genparams): inputs.circular_x = tryparseint(adapter_obj.get("circular_x", genparams.get("circular_x",0)),0) inputs.circular_y = tryparseint(adapter_obj.get("circular_y", genparams.get("circular_y",0)),0) inputs.upscale = (True if tryparseint(genparams.get("enable_hr", 0),0) else False) + + lora_multipliers = prepare_lora_multipliers(genparams.get("lora", [])) + for i in range(lora_filenames_max): + if i < len(lora_multipliers): + inputs.lora_multipliers[i] = lora_multipliers[i] + else: + inputs.lora_multipliers[i] = 0. + print('lora multipliers Python:', lora_multipliers) + print('lora multipliers C:', [x for x in inputs.lora_multipliers]) + ret = handle.sd_generate(inputs) data_main = "" data_extra = "" @@ -4072,6 +4087,9 @@ def do_GET(self): elif clean_path.endswith('/v1/models') or clean_path=='/models': response_body = (json.dumps({"object":"list","data":[{"id":friendlymodelname,"object":"model","created":int(time.time()),"owned_by":"koboldcpp","permission":[],"root":"koboldcpp"}]}).encode()) + elif clean_path.endswith('/sdapi/v1/loras'): + response_body = (json.dumps([{'name': name, 'path': path} for _, name, path, multiplier in imglorainfo if multiplier == 0.])).encode() + elif clean_path.endswith('/sdapi/v1/upscalers'): if args.sdupscaler: response_body = (json.dumps([{"name":"ESRGAN_4x","model_name":"ESRGAN_4x","model_path":"upscaler_model.gguf","model_url":None,"scale":4}]).encode()) @@ -5068,6 +5086,12 @@ def do_POST(self): genparams = sd_comfyui_tranform_params(genparams) elif is_oai_imggen: genparams = sd_oai_tranform_params(genparams) + if not genparams.get('lora'): + # XXX hack to help testing with the stable-ui + prompt, loras = extract_loras_from_prompt(genparams['prompt']) + if loras: + genparams['prompt'] = prompt + genparams['lora'] = loras gen = sd_generate(genparams) gendat = gen["data"] genanim = gen["animated"] @@ -6897,9 +6921,10 @@ def export_vars(): args.sdquant = sd_quant_option(sd_quant_var.get()) if sd_lora_var.get() != "": args.sdlora = [item.strip() for item in sd_lora_var.get().split("|") if item] - args.sdloramult = float(sd_loramult_var.get()) + args.sdloramult = [float(item) for item in sd_loramult_var.get().split("|") if item] else: args.sdlora = None + args.sdloramult = None if gen_defaults_var.get() != "": args.gendefaults = gen_defaults_var.get() @@ -7158,7 +7183,13 @@ def import_vars(dict): sd_lora_var.set(dict["sdlora"] if ("sdlora" in dict and dict["sdlora"]) else "") else: sd_lora_var.set("") - sd_loramult_var.set(str(dict["sdloramult"]) if ("sdloramult" in dict and dict["sdloramult"]) else "1.0") + if "sdloramult" in dict and dict["sdloramult"]: + sdloramult = dict["sdloramult"] + if not isinstance(sdloramult, list): + sdloramult = [sdloramult] + sd_loramult_var.set("|".join([tryparsefloat(n, 0.) for n in sdloramult])) + else: + sd_loramult_var.set("") gen_defaults_var.set(dict["gendefaults"] if ("gendefaults" in dict and dict["gendefaults"]) else "") gen_defaults_overwrite_var.set(1 if "gendefaultsoverwrite" in dict and dict["gendefaultsoverwrite"] else 0) @@ -7602,6 +7633,8 @@ def convert_invalid_args(args): dict["noflashattention"] = not dict["flashattention"] if "sdlora" in dict and isinstance(dict["sdlora"], str): dict["sdlora"] = ([dict["sdlora"]] if dict["sdlora"] else None) + if "sdloramult" in dict and isinstance(dict["sdloramult"], float): + dict["sdloramult"] = ([dict["sdloramult"]] if dict["sdloramult"] else None) return args def setuptunnel(global_memory, has_sd): @@ -8280,6 +8313,88 @@ def main(launch_args, default_args): print("Press ENTER key to exit.", flush=True) input() + +def mk_lora_info(imgloras, multipliers): + # (full path, name, name+extension, can change multiplier) + # XXX for each LoRA, sdapi needs a name and a path; we could use + # the full filename as a path, but we don't know if we can expose it + used_lora_names = set() + result = [] + for i, lora_path in enumerate(imgloras): + multiplier = 0. if i >= len(multipliers) else multipliers[i] + lora_file = os.path.basename(lora_path) + lora_name, lora_ext = os.path.splitext(lora_file) + # ensure unique names + i = 1 + mapped_name = lora_name + while True: + if mapped_name not in used_lora_names: + result.append((lora_path, mapped_name, mapped_name + lora_ext, multiplier)) + used_lora_names.add(mapped_name) + break + i += 1 + mapped_name = lora_name + '_' + str(i) + return result + +def extract_loras_from_prompt(prompt): + + pattern = r']+):([^>]+)>' + lora_data = [] + + matches = list(re.finditer(pattern, prompt)) + + for match in matches: + raw_path = match.group(1) + raw_mul = match.group(2) + try: + mul = float(raw_mul) + except ValueError: + continue + + is_high_noise = False + prefix = "|high_noise|" + if raw_path.startswith(prefix): + raw_path = raw_path[len(prefix):] + is_high_noise = True + + lora_data.append({ + 'name': raw_path, + 'multiplier': mul, + 'is_high_noise': is_high_noise, + }) + + prompt = prompt.replace(match.group(0), "", 1) + + return prompt, lora_data + + +def prepare_lora_multipliers(request_list): + result = [0.] * len(imglorainfo) + map_lora_path = {} + map_lora_name = {} + for i, (fullpath, name, path, origmul) in enumerate(imglorainfo): + map_lora_path[path] = i + map_lora_name[name] = i + if not isinstance(request_list, list): + request_list = [request_list] + for r in request_list: + if not isinstance(r, dict): + continue + multiplier = tryparsefloat(r.get('multiplier'), None) + if multiplier is None: + continue + idx = None + if 'path' in r: + idx = map_lora_path.get(r['path']) + elif 'name' in r: + # XXX this is convenient for the prompt, + # but is the API supposed to support it? + idx = map_lora_name.get(r['name']) + if idx is not None: + result[idx] += multiplier + return result + + def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False): global embedded_kailite, embedded_kcpp_docs, embedded_kcpp_sdui, embedded_kailite_gz, embedded_kcpp_docs_gz, embedded_kcpp_sdui_gz, embedded_lcpp_ui_gz, embedded_musicui, embedded_musicui_gz, start_time, exitcounter, global_memory, using_gui_launcher global libname, args, friendlymodelname, friendlysdmodelname, fullsdmodelpath, password, fullwhispermodelpath, ttsmodelpath, embeddingsmodelpath, musicdiffusionmodelpath, friendlyembeddingsmodelname, has_audio_support, has_vision_support, cached_chat_template @@ -8725,6 +8840,8 @@ def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False): imgloras.append(os.path.abspath(curr)) else: print(f"Missing SD LORA model file {curr}...") + global imglorainfo + imglorainfo = mk_lora_info(imgloras, args.sdloramult) if args.sdvae: if os.path.exists(args.sdvae): imgvae = os.path.abspath(args.sdvae) @@ -9291,7 +9408,7 @@ def range_checker(arg: str): sdparsergrouplora = sdparsergroup.add_mutually_exclusive_group() sdparsergrouplora.add_argument("--sdquant", metavar=('[quantization level 0/1/2]'), help="If specified, loads the model quantized to save memory. 0=off, 1=q8, 2=q4", type=int, choices=[0,1,2], nargs="?", const=2, default=0) sdparsergrouplora.add_argument("--sdlora", metavar=('[filename]'), help="Specify image generation LoRAs safetensors models to be applied. Multiple LoRAs are accepted.", nargs='+') - sdparsergroup.add_argument("--sdloramult", metavar=('[amount]'), help="Multiplier for the image LoRA model to be applied.", type=float, default=1.0) + sdparsergroup.add_argument("--sdloramult", metavar=('[amount]'), help="Multiplier for the image LoRA model to be applied.", type=float, nargs='+', default=[1.0]) sdparsergroup.add_argument("--sdtiledvae", metavar=('[maxres]'), help="Adjust the automatic VAE tiling trigger for images above this size. 0 disables vae tiling.", type=int, default=default_vae_tile_threshold) whisperparsergroup = parser.add_argument_group('Whisper Transcription Commands') whisperparsergroup.add_argument("--whispermodel", metavar=('[filename]'), help="Specify a Whisper .bin model to enable Speech-To-Text transcription.", default="") diff --git a/otherarch/sdcpp/sdtype_adapter.cpp b/otherarch/sdcpp/sdtype_adapter.cpp index a65811a6cab..914cdee3630 100644 --- a/otherarch/sdcpp/sdtype_adapter.cpp +++ b/otherarch/sdcpp/sdtype_adapter.cpp @@ -79,8 +79,8 @@ struct SDParams { bool chroma_use_dit_mask = true; std::vector lora_paths; - std::vector lora_specs; - uint32_t lora_count; + + float lora_multipliers[lora_filenames_max] = {}; }; //shared @@ -380,6 +380,17 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) { params.chroma_use_dit_mask = false; } + // check initial LoRA multipliers + for(size_t i=0;ilora_paths.size();++i) + { + float multiplier = inputs.lora_multiplier[i]; + if (params.lora_apply_mode != LORA_APPLY_AT_RUNTIME && multiplier == 0.) { + printf("\nForcing LoRA at_runtime mode to allow runtime customization\n"); + params.lora_apply_mode = LORA_APPLY_AT_RUNTIME; + } + sd_params->lora_multipliers[i] = multiplier; + } + if(inputs.debugmode==1) { char* buf = sd_ctx_params_to_str(¶ms); @@ -416,21 +427,20 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) { std::filesystem::path mpath(inputs.model_filename); sdmodelfilename = mpath.filename().string(); - sd_params->lora_specs.clear(); - sd_params->lora_specs.reserve(lora_filenames_max*2); + // preload the LoRAs with the initial multipliers + std::vector lora_specs; for(int i=0;ilora_paths.size();++i) { sd_lora_t spec = {}; spec.path = sd_params->lora_paths[i].c_str(); - spec.multiplier = inputs.lora_multiplier; - sd_params->lora_specs.push_back(spec); + spec.multiplier = sd_params->lora_multipliers[i]; + lora_specs.push_back(spec); } - if(sd_params->lora_specs.size()>0 && inputs.lora_multiplier>0) + if(lora_specs.size()>0) { - printf("\nApply %d LoRAs...\n",sd_params->lora_specs.size()); - sd_params->lora_count = sd_params->lora_specs.size(); - sd_ctx->sd->apply_loras(sd_params->lora_specs.data(), sd_params->lora_count); + printf("\nApply %d LoRAs...\n", lora_specs.size()); + sd_ctx->sd->apply_loras(lora_specs.data(), lora_specs.size()); } input_extraimage_buffers.reserve(max_extra_images); @@ -1027,10 +1037,25 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs) params.vae_tiling_params.enabled = dotile; params.batch_count = 1; - // needs to be "reapplied" because sdcpp tracks previously applied LoRAs - // and weights, and apply/unapply the differences at each gen - params.loras = sd_params->lora_specs.data(); - params.lora_count = sd_params->lora_count; + // prepare the LoRA multipliers - similar to the preload code, + // but omit if the multiplier is zero + std::vector lora_specs; + for(size_t i=0;ilora_paths.size();++i) + { + float multiplier = sd_params->lora_multipliers[i] == 0.f + ? inputs.lora_multipliers[i] + : sd_params->lora_multipliers[i]; + if (multiplier != 0.f) { + sd_lora_t spec = {}; + spec.path = sd_params->lora_paths[i].c_str(); + spec.multiplier = multiplier; + lora_specs.push_back(spec); + } + } + // note sdcpp tracks previously applied LoRAs and weights, + // and apply/unapply the differences at each gen + params.loras = lora_specs.data(); + params.lora_count = lora_specs.size(); params.ref_images = reference_imgs.data(); params.ref_images_count = reference_imgs.size(); diff --git a/otherarch/sdcpp/stable-diffusion.cpp b/otherarch/sdcpp/stable-diffusion.cpp index 8176e1dca74..c9259669a0c 100644 --- a/otherarch/sdcpp/stable-diffusion.cpp +++ b/otherarch/sdcpp/stable-diffusion.cpp @@ -132,6 +132,7 @@ class StableDiffusionGGML { std::vector> diffusion_lora_models; std::vector> first_stage_lora_models; bool apply_lora_immediately = false; + std::map> kcpp_lora_cache; std::string taesd_path; bool use_tiny_autoencoder = false; @@ -1170,7 +1171,23 @@ class StableDiffusionGGML { std::shared_ptr load_lora_model_from_file(const std::string& lora_id, float multiplier, ggml_backend_t backend, + std::string stage = "", LoraModel::filter_t lora_tensor_filter = nullptr) { + // kcpp + // first check the cache + bool kcpp_at_runtime = (stage != ""); + std::string lora_key = "|" + stage + "|" + lora_id; + if (kcpp_at_runtime) { + auto it = kcpp_lora_cache.find(lora_key); + if (it != kcpp_lora_cache.end()) { + if (it->second) { + it->second->multiplier = multiplier; + } + return it->second; + } + } + // by construction, kcpp will always find the preloaded LoRAs on the cache + std::string lora_path = lora_id; static std::string high_noise_tag = "|high_noise|"; bool is_high_noise = false; @@ -1182,10 +1199,16 @@ class StableDiffusionGGML { auto lora = std::make_shared(lora_id, backend, lora_path, is_high_noise ? "model.high_noise_" : "", version); if (!lora->load_from_file(n_threads, lora_tensor_filter)) { LOG_WARN("load lora tensors from %s failed", lora_path.c_str()); - return nullptr; + // also cache negatives to avoid I/O at runtime + lora = nullptr; + if (kcpp_at_runtime) + kcpp_lora_cache[lora_key] = lora; + return lora; } lora->multiplier = multiplier; + if (kcpp_at_runtime) + kcpp_lora_cache[lora_key] = lora; return lora; } @@ -1264,7 +1287,7 @@ class StableDiffusionGGML { const std::string& lora_id = kv.first; float multiplier = kv.second; - auto lora = load_lora_model_from_file(lora_id, multiplier, clip_backend, lora_tensor_filter); + auto lora = load_lora_model_from_file(lora_id, multiplier, clip_backend, "cond_stage", lora_tensor_filter); if (lora && !lora->lora_tensors.empty()) { lora->preprocess_lora_tensors(tensors); cond_stage_lora_models.push_back(lora); @@ -1296,7 +1319,7 @@ class StableDiffusionGGML { const std::string& lora_name = kv.first; float multiplier = kv.second; - auto lora = load_lora_model_from_file(lora_name, multiplier, backend, lora_tensor_filter); + auto lora = load_lora_model_from_file(lora_name, multiplier, backend, "diffusion", lora_tensor_filter); if (lora && !lora->lora_tensors.empty()) { lora->preprocess_lora_tensors(tensors); diffusion_lora_models.push_back(lora); @@ -1332,7 +1355,7 @@ class StableDiffusionGGML { const std::string& lora_name = kv.first; float multiplier = kv.second; - auto lora = load_lora_model_from_file(lora_name, multiplier, vae_backend, lora_tensor_filter); + auto lora = load_lora_model_from_file(lora_name, multiplier, vae_backend, "first_stage", lora_tensor_filter); if (lora && !lora->lora_tensors.empty()) { lora->preprocess_lora_tensors(tensors); first_stage_lora_models.push_back(lora);