From 987cb5138d3b466dbd4ba399a7bd727ac293e7c6 Mon Sep 17 00:00:00 2001 From: Aedial Date: Tue, 7 Jan 2025 23:04:47 +0100 Subject: [PATCH] [API] Add image gen v4 preview and remove tests for image gen v1 --- example/generate_image_v4.py | 44 ++++++++ example/generate_text.py | 32 ++++-- novelai_api/ImagePreset.py | 100 +++++++++++++++++- novelai_api/Preset.py | 2 +- novelai_api/_high_level.py | 5 + .../image_presets/presets_v4/default.preset | 31 ++++++ tests/api/test_imagegen_samplers.py | 25 +++-- 7 files changed, 219 insertions(+), 20 deletions(-) create mode 100644 example/generate_image_v4.py create mode 100644 novelai_api/image_presets/presets_v4/default.preset diff --git a/example/generate_image_v4.py b/example/generate_image_v4.py new file mode 100644 index 0000000..ca9f24f --- /dev/null +++ b/example/generate_image_v4.py @@ -0,0 +1,44 @@ +""" +{filename} +============================================================================== + +| Example of how to generate an image +| +| The resulting images will be placed in a folder named "results" +""" + +import asyncio +from pathlib import Path + +from example.boilerplate import API +from novelai_api.ImagePreset import ImageModel, ImagePreset, UCPreset + + +async def main(): + d = Path("results") + d.mkdir(exist_ok=True) + + async with API() as api_handler: + api = api_handler.api + + model = ImageModel.Anime_v4_preview + preset = ImagePreset.from_default_config(model) + preset.seed = 42 + preset.uc_preset = UCPreset.Preset_Heavy + preset.quality_toggle = False + + # even though we give positions, the model can ignore them + preset.characters = [ + # prompt, uc, position + {"prompt": "1girl", "position": "A3"}, + {"prompt": "1boy"}, # default position is "C3" + ] + + # "1girl, 1boy" + quality tags without "rating:general" + prompt = "1girl, 1boy, best quality, very aesthetic, absurdres" + async for _, img in api.high_level.generate_image(prompt, model, preset): + (d / f"image_v4.png").write_bytes(img) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/example/generate_text.py b/example/generate_text.py index b525ff8..b0e99ca 100644 --- a/example/generate_text.py +++ b/example/generate_text.py @@ -27,10 +27,14 @@ async def main(): # model = Model.Sigurd # model = Model.Euterpe # model = Model.Krake - model = Model.Clio + # model = Model.Clio + # model = Model.Kayra + model = Model.Erato # NOTE: plain text prompt prompt = PREAMBLE[model] + # NOTE: preamble should be the start. Look at the PREAMBLE variable in Preset.py for the correct preamble + # prompt = PREAMBLE[model] + "Suddenly," # NOTE: prompt encoded in tokens # prompt = Tokenizer.encode(model, PREAMBLE[model]) @@ -38,15 +42,18 @@ async def main(): preset = Preset("preset", model, {}) # NOTE: instantiation from default (presets/presets_6B_v4/default.txt) # preset = Preset.from_default(model) - # NOTE: instantiation from official file (presets/presets_6B_v4) + # NOTE: instantiation from official file (in presets/ folder) # preset = Preset.from_official(model, "Storywriter") - # NOTE: instantiation from file + # NOTE: instantiation from file (note that each preset is for a specific model) # preset = Preset.from_file("novelai_api/presets/presets_6B_v4/Storywriter.txt") # NOTE: instantiation of a new reset # preset = Preset("new preset", model) + # NOTE: modification of the preset preset.min_length = 1 - preset.max_length = 20 + # NOTE: context size is allowed_max_tokens - output_length - 20 (if generate_until_sentence is True) + # e.g. 8192 - 50 - 20 = 8122 + preset.max_length = 50 # NOTE: instantiate with arguments global_settings = GlobalSettings(num_logprobs=GlobalSettings.NO_LOGPROBS) @@ -60,7 +67,7 @@ async def main(): # bad_words = BanList() # NOTE: ban list with elements in it # bad_words = BanList(" cat", " dog", " boy") - # NOTE: disabled ban list with elements in it + # NOTE: disabled ban list with elements in it (if you want to control it with a condition) # bad_words = BanList(" cat", " dog", " boy", enabled = False) # NOTE: add elements to the bias list if bad_words is not None: @@ -97,6 +104,11 @@ async def main(): # NOTE: stop sequence as tokens # stop_sequence = Tokenizer.encode(model, ["The End", "THE END", "\n"]) + # NOTE: for all models, but Erato + bytes_per_token = 2 + # NOTE: for Erato (because of Llama 3) + # bytes_per_token = 4 + # NOTE: normal generation gen = await api.high_level.generate( prompt, @@ -111,9 +123,9 @@ async def main(): # NOTE: b64-encoded list of tokens ids logger.info(gen["output"]) # NOTE: list of token ids - logger.info(b64_to_tokens(gen["output"])) + logger.info(b64_to_tokens(gen["output"], bytes_per_token)) # NOTE: decoded response - logger.info(Tokenizer.decode(model, b64_to_tokens(gen["output"]))) + logger.info(Tokenizer.decode(model, b64_to_tokens(gen["output"], bytes_per_token))) # NOTE: streamed generation async for token in api.high_level.generate_stream( @@ -131,9 +143,9 @@ async def main(): # NOTE: b64-encoded token id token["token"], # NOTE: token id - b64_to_tokens(token["token"]), - # NOTE: decoded token - Tokenizer.decode(model, b64_to_tokens(token["token"])), + b64_to_tokens(token["token"], bytes_per_token), + # NOTE: decoded token (do note that decoding single tokens can yield broken unicode characters) + Tokenizer.decode(model, b64_to_tokens(token["token"], bytes_per_token)), ) # ... and more examples can be found in tests/test_generate.py diff --git a/novelai_api/ImagePreset.py b/novelai_api/ImagePreset.py index eb0c61a..738bc21 100644 --- a/novelai_api/ImagePreset.py +++ b/novelai_api/ImagePreset.py @@ -33,6 +33,8 @@ class ImageModel(enum.Enum): Furry_v3 = "nai-diffusion-furry-3" Inpainting_Furry_v3 = "nai-diffusion-furry-3-inpainting" + Anime_v4_preview = "nai-diffusion-4-curated-preview" + class ControlNetModel(enum.Enum): """ @@ -93,6 +95,19 @@ class ImageResolution(enum.Enum): Large_Landscape_v3 = (1536, 1024) Large_Square_v3 = (1472, 1472) + # v4 + Small_Portrait_v4 = (512, 768) + Small_Landscape_v4 = (768, 512) + Small_Square_v4 = (640, 640) + + Normal_Portrait_v4 = (832, 1216) + Normal_Landscape_v4 = (1216, 832) + Normal_Square_v4 = (1024, 1024) + + Large_Portrait_v4 = (1024, 1536) + Large_Landscape_v4 = (1536, 1024) + Large_Square_v4 = (1472, 1472) + class ImageSampler(enum.Enum): """ @@ -199,6 +214,15 @@ class ImagePreset: "compression artifacts, unknown text", UCPreset.Preset_None: "lowres", }, + # v4 + ImageModel.Anime_v4_preview: { + UCPreset.Preset_Heavy: "blurry, lowres, error, film grain, scan artifacts, worst quality, bad quality, " + "jpeg artifacts, very displeasing, chromatic aberration, logo, dated, signature, multiple views, " + "gigantic breasts", + UCPreset.Preset_Light: "blurry, lowres, error, worst quality, bad quality, jpeg artifacts, " + "very displeasing, logo, dated, signature", + UCPreset.Preset_None: "lowres", + }, } # inpainting presets are the same as the normal ones @@ -221,6 +245,7 @@ class ImagePreset: } # type completion for __setitem__ and __getitem__ + #: https://docs.novelai.net/image/qualitytags.html quality_toggle: bool #: Automatically uses SMEA when image is above 1 megapixel @@ -281,13 +306,23 @@ class ImagePreset: reference_information_extracted_multiple: List[float] #: reference_strength for multi-vibe transfer reference_strength_multiple: List[float] - #: + #: https://blog.novelai.net/summer-sampler-update-en-3a34eb32b613 variety_plus: bool + #: Whether the AI should strictly follow the positions of the characters or have some freedom + use_coords: bool + + #: https://docs.novelai.net/image/multiplecharacters.html#multi-character-prompting + #: layout = {"prompt": ..., "uc": ..., "position": ... ("A1" to "E5", "C3" is default)} + characters: List[Dict[str, str]] #: Use the old behavior of prompt separation at the 75 tokens mark (can cut words in half) legacy_v3_extend: bool - #: ??? + #: Revision of the default arguments params_version: int + #: Use the old behavior of noise scheduling with the k_euler_ancestral sampler + deliberate_euler_ancestral_bug: bool + #: ??? + prefer_brownian: bool _settings: Dict[str, Any] @@ -360,6 +395,14 @@ def from_v3_furry_config(cls): return cls.from_file(Path(__file__).parent / "image_presets" / "presets_v3" / "default_furry.preset") + @classmethod + def from_v4_config(cls): + """ + Create a new ImagePreset with the default settings from the v4 config + """ + + return cls.from_file(Path(__file__).parent / "image_presets" / "presets_v4" / "default.preset") + @classmethod def from_default_config(cls, model: ImageModel) -> "ImagePreset": """ @@ -383,6 +426,8 @@ def from_default_config(cls, model: ImageModel) -> "ImagePreset": return cls.from_v3_config() elif model in (ImageModel.Furry_v3, ImageModel.Inpainting_Furry_v3): return cls.from_v3_furry_config() + elif model in (ImageModel.Anime_v4_preview,): + return cls.from_v4_config() def __setitem__(self, key: str, value: Any): if key not in self._TYPE_MAPPING: @@ -466,6 +511,7 @@ def to_settings(self, model: ImageModel) -> Dict[str, Any]: settings = copy.deepcopy(self._settings) + # size resolution: Union[ImageResolution, Tuple[int, int]] = settings.pop("resolution") if isinstance(resolution, ImageResolution): resolution: Tuple[int, int] = resolution.value @@ -480,6 +526,7 @@ def to_settings(self, model: ImageModel) -> Dict[str, Any]: settings["seed"] = seed settings["extra_noise_seed"] = seed + # UC uc_preset: Union[UCPreset, None] = settings.pop("uc_preset") if uc_preset is None: default_uc = "" @@ -492,6 +539,7 @@ def to_settings(self, model: ImageModel) -> Dict[str, Any]: combined_uc = f"{default_uc}, {uc}" if default_uc and uc else default_uc if default_uc else uc settings["negative_prompt"] = combined_uc + # sampler sampler: ImageSampler = settings.pop("sampler") if sampler is ImageSampler.ddim and model in (ImageModel.Anime_v3,): sampler = ImageSampler.ddim_v3 @@ -508,6 +556,54 @@ def to_settings(self, model: ImageModel) -> Dict[str, Any]: settings["dynamic_thresholding"] = settings.pop("decrisper") settings["skip_cfg_above_sigma"] = 19 if settings.pop("variety_plus", False) else None + # character prompts + if model in (ImageModel.Anime_v4_preview,): + settings["v4_prompt"] = { + # base_caption is set later, in generate_image + "caption": {"base_caption": None, "char_captions": []}, + "use_coords": self.use_coords, + "use_order": True, + } + settings["v4_negative_prompt"] = {"caption": {"base_caption": combined_uc, "char_captions": []}} + + characters = settings.pop("characters", []) + if not isinstance(characters, list): + raise ValueError("characters must be a list of dictionaries") + + settings["characterPrompts"] = [] + + for i, character in enumerate(characters): + if not isinstance(character, dict): + raise ValueError(f"character #{i} must be a dictionary") + + if "prompt" not in character: + raise ValueError(f"character #{i} must have at least a 'prompt' key") + + prompt = character["prompt"] + if not isinstance(prompt, str): + raise ValueError(f"character #{i} prompt must be a string") + + negative = character.get("uc", "") + + character_position = character.get("position", "") or "C3" + if ( + len(character_position) != 2 + or character_position[0] not in "ABCDE" + or character_position[1] not in "12345" + ): + raise ValueError(f'character #{i} position must be a valid position ("", or "A1" to "E5")') + + pos = { + "x": round(0.5 + 0.2 * (ord(character_position[0]) - ord("C")), 1), + "y": round(0.5 + 0.2 * (ord(character_position[1]) - ord("3")), 1), + } + + settings["characterPrompts"].append({"center": pos, "prompt": prompt, "uc": negative}) + settings["v4_prompt"]["caption"]["char_captions"].append({"centers": [pos], "char_caption": prompt}) + settings["v4_negative_prompt"]["caption"]["char_captions"].append( + {"centers": [pos], "char_caption": negative} + ) + # special arguments kept for metadata purposes (no effect on result) settings["qualityToggle"] = settings.pop("quality_toggle") diff --git a/novelai_api/Preset.py b/novelai_api/Preset.py index 03c1d61..47742bf 100644 --- a/novelai_api/Preset.py +++ b/novelai_api/Preset.py @@ -246,7 +246,7 @@ class Preset(metaclass=_PresetMetaclass): stop_sequences: List[List[int]] #: https://naidb.miraheze.org/wiki/Generation_Settings#Randomness_(Temperature) temperature: float - #: Response length, if no interrupted by a Stop Sequence + #: Response length, if not interrupted by a Stop Sequence max_length: int #: Minimum number of token, if interrupted by a Stop Sequence min_length: int diff --git a/novelai_api/_high_level.py b/novelai_api/_high_level.py index 413a7e7..44df015 100644 --- a/novelai_api/_high_level.py +++ b/novelai_api/_high_level.py @@ -503,6 +503,11 @@ async def generate_image( ImageModel.Inpainting_Furry_v3, ): prompt = f"{prompt}, best quality, amazing quality, very aesthetic, absurdres" + elif model is ImageModel.Anime_v4_preview: + prompt = f"{prompt}, rating:general, best quality, very aesthetic, absurdres" + + if "v4_prompt" in settings: + settings["v4_prompt"]["caption"]["base_caption"] = prompt async for e in self._parent.low_level.generate_image(prompt, model, action, settings): yield e diff --git a/novelai_api/image_presets/presets_v4/default.preset b/novelai_api/image_presets/presets_v4/default.preset new file mode 100644 index 0000000..fd5426a --- /dev/null +++ b/novelai_api/image_presets/presets_v4/default.preset @@ -0,0 +1,31 @@ +{ + "resolution": "Normal_Portrait_v3", + "scale": 6, + "sampler": "k_euler_ancestral", + "steps": 23, + "n_samples": 1, + "strength": 0.7, + "noise": 0, + "uc_preset": "Preset_Heavy", + "quality_toggle": true, + "smea": false, + "smea_dyn": false, + "decrisper": false, + "controlnet_strength": 1, + "legacy": false, + "add_original_image": true, + "uncond_scale": 1, + "cfg_rescale": 0, + "noise_schedule": "karras", + "reference_image_multiple": [], + "reference_information_extracted_multiple": [], + "reference_strength_multiple": [], + "use_coords": true, + "legacy_v3_extend": false, + "deliberate_euler_ancestral_bug": false, + "prefer_brownian": true, + "params_version": 3, + + "seed": 0, + "uc": "" +} diff --git a/tests/api/test_imagegen_samplers.py b/tests/api/test_imagegen_samplers.py index a11d258..f282cb8 100644 --- a/tests/api/test_imagegen_samplers.py +++ b/tests/api/test_imagegen_samplers.py @@ -4,6 +4,7 @@ import asyncio import itertools +from pathlib import Path from typing import Tuple import pytest @@ -15,6 +16,13 @@ sampler_xfail = pytest.mark.xfail(strict=False, raises=NovelAIError, reason="The sampler might not work") models = list(ImageModel) + +# remove outdated models +models.remove(ImageModel.Anime_Full) +models.remove(ImageModel.Anime_Curated) +models.remove(ImageModel.Furry) + +# remove inpainting models models.remove(ImageModel.Inpainting_Anime_Full) models.remove(ImageModel.Inpainting_Anime_Curated) models.remove(ImageModel.Inpainting_Furry) @@ -25,11 +33,17 @@ model_samplers = list(itertools.product(models, samplers)) +test_results_dir = Path(__file__).parent.parent.parent / "test_results" + + @pytest.mark.parametrize( "model_sampler", [ pytest.param(e, marks=sampler_xfail) if e[1] in (ImageSampler.nai_smea, ImageSampler.plms, ImageSampler.k_dpm_adaptive) + or e == (ImageModel.Anime_v3, ImageSampler.k_heun) + or e == (ImageModel.Anime_v4_preview, ImageSampler.ddim) + or e == (ImageModel.Anime_v4_preview, ImageSampler.nai_smea_dyn) else e for e in model_samplers ], @@ -42,7 +56,7 @@ async def test_samplers( model, sampler = model_sampler # ddim_v3 only work with Anime v3 - if sampler is ImageSampler.ddim_v3 and model not in (ImageModel.Anime_v3,): + if sampler is ImageSampler.ddim_v3 and model not in (ImageModel.Anime_v3, ImageModel.Furry_v3): return logger = api_handle.logger @@ -52,12 +66,9 @@ async def test_samplers( preset["sampler"] = sampler preset.copy() - # Furry doesn't have UCPreset.Preset_Low_Quality_Bad_Anatomy - if model is ImageModel.Furry: - preset.uc_preset = UCPreset.Preset_Low_Quality - - async for _, _ in api.high_level.generate_image("1girl", model, preset): - pass + async for _, img in api.high_level.generate_image("1girl", model, preset): + if test_results_dir.exists(): + (test_results_dir / f"image_{model.name}_{sampler.name}.png").write_bytes(img) if __name__ == "__main__":